In this post we are going to use VS Code, Spring Boot and Java to create a pdf document reader.
Install the Spring Boot Extension Pack
Install Spring Initializr Extension
- CMD – Shift – X
- spring-initializr
- Install
Run the Spring Initializr
- CMD – Shift – P
- Spring Initializr: Create Maven Project
- 3.2.1 (Spring Boot Version)
- Java (Project Language)
- com.skills421.examples
- docreader
- Jar (gonna run locally)
- 17 (Java Version)
- 0 (dependencies)
Edit DocreaderApplication.java
package com.skills421.examples.docreader;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
@SpringBootApplication
public class DocreaderApplication {
private final ResourceLoader resourceLoader;
public DocreaderApplication(ResourceLoader resourceLoader) {
this.resourceLoader = resourceLoader;
readTextFile("sample.txt");
readPDFFile("sample.pdf");
}
public static void main(String[] args) {
SpringApplication.run(DocreaderApplication.class, args);
}
public void readTextFile(String fileName) {
try {
Resource resource = resourceLoader.getResource("classpath:" + fileName);
if (resource.exists()) {
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resource.getInputStream()));
String line;
while ((line = bufferedReader.readLine()) != null) {
System.out.println(line);
}
bufferedReader.close();
} else {
System.err.println("File not found on the classpath: " + fileName);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Create src/main/resources/sample.txt
This is line 1 of sample.txt
This is line 2 of sample.txt
Run the Code
- right click on DocreaderApplication.java
- Run Java
Use Pages to generate src/main/resources/sample.pdf
This is line 1 of sample.pdf
This is line 2 of sample.pdf
Add Apache PdfBox to the pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.2.1</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.skills421.examples</groupId>
<artifactId>docreader</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>docreader</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>17</java.version>
<pdfbox.version>2.0.28</pdfbox.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<!-- Apache PDFBox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
Edit DocReaderApplication.java
package com.skills421.examples.docreader;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
@SpringBootApplication
public class DocreaderApplication {
private final ResourceLoader resourceLoader;
public DocreaderApplication(ResourceLoader resourceLoader) {
this.resourceLoader = resourceLoader;
readTextFile("sample.txt");
readPDFFile("sample.pdf");
}
public static void main(String[] args) {
SpringApplication.run(DocreaderApplication.class, args);
}
public void readTextFile(String fileName) {
try {
Resource resource = resourceLoader.getResource("classpath:" + fileName);
if (resource.exists()) {
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resource.getInputStream()));
String line;
while ((line = bufferedReader.readLine()) != null) {
System.out.println(line);
}
bufferedReader.close();
} else {
System.err.println("File not found on the classpath: " + fileName);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public void readPDFFile(String fileName) {
try {
Resource resource = resourceLoader.getResource("classpath:" + fileName);
if (resource.exists()) {
PDDocument document = PDDocument.load(resource.getInputStream());
PDFTextStripper textStripper = new PDFTextStripper();
// Extract text from the PDF document
String text = textStripper.getText(document);
System.out.println("Extracted Text:");
System.out.println(text);
document.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Run the code
- right click on DocreaderApplication.java
- Run Java