Create a SpringBoot pdf document reader


In this post we are going to use VS Code, Spring Boot and Java to create a pdf document reader.

Install the Spring Boot Extension Pack

Install Spring Initializr Extension

  • CMD – Shift – X
  • spring-initializr
  • Install

Run the Spring Initializr

  • CMD – Shift – P
  • Spring Initializr: Create Maven Project
    • 3.2.1 (Spring Boot Version)
    • Java (Project Language)
    • com.skills421.examples
    • docreader
    • Jar (gonna run locally)
    • 17 (Java Version)
    • 0 (dependencies)

Edit DocreaderApplication.java

package com.skills421.examples.docreader;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;

@SpringBootApplication
public class DocreaderApplication {

	private final ResourceLoader resourceLoader;

	public DocreaderApplication(ResourceLoader resourceLoader) {
		this.resourceLoader = resourceLoader;

		readTextFile("sample.txt");
		readPDFFile("sample.pdf");
	}

	public static void main(String[] args) {
		SpringApplication.run(DocreaderApplication.class, args);
	}

	public void readTextFile(String fileName) {
		try {
			Resource resource = resourceLoader.getResource("classpath:" + fileName);

			if (resource.exists()) {
				BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resource.getInputStream()));

				String line;
				while ((line = bufferedReader.readLine()) != null) {
					System.out.println(line);
				}

				bufferedReader.close();
			} else {
				System.err.println("File not found on the classpath: " + fileName);
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

Create src/main/resources/sample.txt

This is line 1 of sample.txt
This is line 2 of sample.txt

Run the Code

  • right click on DocreaderApplication.java
  • Run Java

Use Pages to generate src/main/resources/sample.pdf

This is line 1 of sample.pdf
This is line 2 of sample.pdf

Add Apache PdfBox to the pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<parent>
		<groupId>org.springframework.boot</groupId>
		<artifactId>spring-boot-starter-parent</artifactId>
		<version>3.2.1</version>
		<relativePath/> <!-- lookup parent from repository -->
	</parent>

	<groupId>com.skills421.examples</groupId>
	<artifactId>docreader</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<name>docreader</name>
	<description>Demo project for Spring Boot</description>

	<properties>
		<java.version>17</java.version>
		<pdfbox.version>2.0.28</pdfbox.version>
	</properties>
	<dependencies>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter</artifactId>
		</dependency>

		<!-- Apache PDFBox -->
		<dependency>
			<groupId>org.apache.pdfbox</groupId>
			<artifactId>pdfbox</artifactId>
			<version>${pdfbox.version}</version>
		  </dependency>

		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-test</artifactId>
			<scope>test</scope>
		</dependency>
	</dependencies>

	<build>
		<plugins>
			<plugin>
				<groupId>org.springframework.boot</groupId>
				<artifactId>spring-boot-maven-plugin</artifactId>
			</plugin>
		</plugins>
	</build>

</project>

Edit DocReaderApplication.java

package com.skills421.examples.docreader;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;

@SpringBootApplication
public class DocreaderApplication {

	private final ResourceLoader resourceLoader;

	public DocreaderApplication(ResourceLoader resourceLoader) {
		this.resourceLoader = resourceLoader;

		readTextFile("sample.txt");
		readPDFFile("sample.pdf");
	}

	public static void main(String[] args) {
		SpringApplication.run(DocreaderApplication.class, args);
	}

	public void readTextFile(String fileName) {
		try {
			Resource resource = resourceLoader.getResource("classpath:" + fileName);

			if (resource.exists()) {
				BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resource.getInputStream()));

				String line;
				while ((line = bufferedReader.readLine()) != null) {
					System.out.println(line);
				}

				bufferedReader.close();
			} else {
				System.err.println("File not found on the classpath: " + fileName);
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public void readPDFFile(String fileName) {
		try {
			Resource resource = resourceLoader.getResource("classpath:" + fileName);

			if (resource.exists()) {
				PDDocument document = PDDocument.load(resource.getInputStream());
				PDFTextStripper textStripper = new PDFTextStripper();

				// Extract text from the PDF document
				String text = textStripper.getText(document);

				System.out.println("Extracted Text:");
				System.out.println(text);

				document.close();
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

Run the code

  • right click on DocreaderApplication.java
  • Run Java

References

Leave a comment