Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@
<org.apache.maven.project.info.reports.version>3.0.0</org.apache.maven.project.info.reports.version>
<org.apache.maven.site.version>3.7.1</org.apache.maven.site.version>
<org.apache.maven.surfire.version>3.2.5</org.apache.maven.surfire.version>
<org.apache.pdfbox.version>2.0.31</org.apache.pdfbox.version>
<org.apache.pdfbox.version>3.0.5</org.apache.pdfbox.version>
<org.junit.jupiter.version>5.8.2</org.junit.jupiter.version>
<org.junit.pioneer.version>1.9.1</org.junit.pioneer.version>
<org.junit.platform.version>1.8.2</org.junit.platform.version>
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/com/mindee/extraction/PDFExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.util.List;
import java.util.stream.Collectors;
import javax.imageio.ImageIO;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
Expand Down Expand Up @@ -46,7 +47,7 @@ public PDFExtractor(String filePath) throws IOException {
public PDFExtractor(LocalInputSource source) throws IOException {
this.filename = source.getFilename();
if (source.isPdf()) {
this.sourcePdf = PDDocument.load(source.getFile());
this.sourcePdf = Loader.loadPDF(source.getFile());
} else {
PDDocument document = new PDDocument();
PDPage page = new PDPage();
Expand Down Expand Up @@ -104,7 +105,7 @@ public List<ExtractedPDF> extractSubDocuments(List<List<Integer>> pageIndexes)
+ String.format("%3s", pageIndexElement.get(pageIndexElement.size() - 1) + 1)
.replace(" ", "0") + "." + splitName[1];
extractedPDFs.add(
new ExtractedPDF(PDDocument.load(mergePdfPages(this.sourcePdf, pageIndexElement, false)),
new ExtractedPDF(Loader.loadPDF(mergePdfPages(this.sourcePdf, pageIndexElement, false)),
fieldFilename));
}
return extractedPDFs;
Expand Down
6 changes: 4 additions & 2 deletions src/main/java/com/mindee/input/InputSourceUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.io.IOException;
import java.net.URL;
import javax.imageio.ImageIO;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

Expand Down Expand Up @@ -70,7 +72,7 @@ public static String[] splitNameStrict(String filename) throws MindeeException {
* Returns true if the file is a PDF.
*/
public static boolean isPdf(byte[] fileBytes) {
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(fileBytes))) {
try (PDDocument document = Loader.loadPDF(new RandomAccessReadBuffer(new ByteArrayInputStream(fileBytes)))) {
return true;
} catch (IOException e) {
return false;
Expand All @@ -95,7 +97,7 @@ public static void validateUrl(URL inputUrl) {
*/
public static boolean hasSourceText(byte[] fileBytes) {
try {
PDDocument document = PDDocument.load(new ByteArrayInputStream(fileBytes));
PDDocument document = Loader.loadPDF(new RandomAccessReadBuffer(new ByteArrayInputStream(fileBytes)));
PDFTextStripper stripper = new PDFTextStripper();

for (int i = 0; i < document.getNumberOfPages(); i++) {
Expand Down
14 changes: 8 additions & 6 deletions src/main/java/com/mindee/pdf/PDFUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
Expand All @@ -16,6 +17,7 @@
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.rendering.ImageType;
Expand All @@ -37,7 +39,7 @@ private PDFUtils() {
* @param inputSource The PDF file.
*/
public static int getNumberOfPages(LocalInputSource inputSource) throws IOException {
PDDocument document = PDDocument.load(inputSource.getFile());
PDDocument document = Loader.loadPDF(inputSource.getFile());
int pageCount = document.getNumberOfPages();
document.close();
return pageCount;
Expand Down Expand Up @@ -81,7 +83,7 @@ private static byte[] createPdfFromExistingPdf(
* @param pageNumbers Lit of page numbers to merge together.
*/
public static byte[] mergePdfPages(File file, List<Integer> pageNumbers) throws IOException {
PDDocument document = PDDocument.load(file);
PDDocument document = Loader.loadPDF(file);
return createPdfFromExistingPdf(document, pageNumbers, true);
}

Expand All @@ -100,7 +102,7 @@ public static byte[] mergePdfPages(


public static boolean isPdfEmpty(File file) throws IOException {
return checkIfPdfIsEmpty(PDDocument.load(file));
return checkIfPdfIsEmpty(Loader.loadPDF(file));
}

private static boolean checkIfPdfIsEmpty(PDDocument document) throws IOException {
Expand Down Expand Up @@ -143,7 +145,7 @@ public static List<PdfPageImage> pdfToImages(String filePath) throws IOException
* @return List of all pages as images.
*/
public static List<PdfPageImage> pdfToImages(LocalInputSource source) throws IOException {
PDDocument document = PDDocument.load(source.getFile());
PDDocument document = Loader.loadPDF(source.getFile());
PDFRenderer pdfRenderer = new PDFRenderer(document);
List<PdfPageImage> pdfPageImages = new ArrayList<>();
for (int i = 0; i < document.getNumberOfPages(); i++) {
Expand Down Expand Up @@ -182,7 +184,7 @@ public static PdfPageImage pdfPageToImage(
int pageNumber
) throws IOException {
int index = pageNumber - 1;
PDDocument document = PDDocument.load(source.getFile());
PDDocument document = Loader.loadPDF(source.getFile());
PDFRenderer pdfRenderer = new PDFRenderer(document);
BufferedImage imageBuffer = pdfPageToImageBuffer(index, document, pdfRenderer);
document.close();
Expand Down Expand Up @@ -242,7 +244,7 @@ protected void writeString(String text, List<TextPosition> textPositions) throws
try {
contentStream.showText(text);
} catch (IllegalArgumentException | UnsupportedOperationException e) {
contentStream.setFont(PDType1Font.HELVETICA, fontSize);
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), fontSize);
contentStream.showText(text);
}
contentStream.endText();
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/com/mindee/pdf/PdfBoxApi.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;

/**
Expand All @@ -26,7 +27,7 @@ public SplitPdf split(SplitQuery splitQuery) throws IOException {
throw new MindeeException("This document cannot be open and cannot be split.");
}

try (PDDocument originalDocument = PDDocument.load(splitQuery.getFile())) {
try (PDDocument originalDocument = Loader.loadPDF(splitQuery.getFile())) {
try (PDDocument splitDocument = new PDDocument()) {
int totalOriginalPages = countPages(splitQuery.getFile());

Expand Down Expand Up @@ -73,7 +74,7 @@ private List<Integer> getPageRanges(PageOptions pageOptions, Integer numberOfPag
private boolean checkPdfOpen(byte[] documentFile) {
boolean opens = false;
try {
PDDocument.load(documentFile).close();
Loader.loadPDF(documentFile).close();
opens = true;
} catch (IOException e) {
e.printStackTrace();
Expand All @@ -82,7 +83,7 @@ private boolean checkPdfOpen(byte[] documentFile) {
}

private int countPages(byte[] documentFile) throws IOException {
PDDocument document = PDDocument.load(documentFile);
PDDocument document = Loader.loadPDF(documentFile);
int pageCount = document.getNumberOfPages();
document.close();
return pageCount;
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/com/mindee/pdf/PdfCompressor.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import java.awt.image.BufferedImage;
import java.io.IOException;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
Expand Down Expand Up @@ -38,7 +39,7 @@ public static byte[] compressPdf(
"MINDEE WARNING: Found text inside of the provided PDF file. Compression operation aborted.");
return pdfData;
}
try (PDDocument inputDoc = PDDocument.load(pdfData);
try (PDDocument inputDoc = Loader.loadPDF(pdfData);
PDDocument outputDoc = new PDDocument()) {

PDFRenderer pdfRenderer = new PDFRenderer(inputDoc);
Expand Down
5 changes: 3 additions & 2 deletions src/test/java/com/mindee/input/LocalInputSourceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.util.stream.Collectors;
import javax.imageio.ImageIO;
import org.apache.commons.codec.binary.Base64;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.jupiter.api.Assertions;
Expand Down Expand Up @@ -338,8 +339,8 @@ public void testPdfResizeWithTextKeepsText() throws IOException {
byte[] compressedWithText =
PdfCompressor.compressPdf(initialWithText.getFile(), 100, true, false);

PDDocument originalDoc = PDDocument.load(initialWithText.getFile());
PDDocument compressedDoc = PDDocument.load(compressedWithText);
PDDocument originalDoc = Loader.loadPDF(initialWithText.getFile());
PDDocument compressedDoc = Loader.loadPDF(compressedWithText);

Assertions.assertEquals(originalDoc.getNumberOfPages(), compressedDoc.getNumberOfPages());
Assertions.assertNotEquals(originalDoc.hashCode(), compressedDoc.hashCode());
Expand Down
4 changes: 3 additions & 1 deletion src/test/java/com/mindee/pdf/PDFUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import java.util.Arrays;
import java.util.List;
import java.util.Random;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.junit.jupiter.api.Assertions;
Expand Down Expand Up @@ -43,7 +45,7 @@ public void givenADocumentAndListOfPages_whenMerged_thenReturnsCorrectDocument()
File file = new File("src/test/resources/output/fileToTest.pdf");
List<Integer> pageList = Arrays.asList(0, 2, 3, 1, 10, 2, 1);
byte[] newPdf = PDFUtils.mergePdfPages(file, pageList);
PDDocument document = PDDocument.load(newPdf);
PDDocument document = Loader.loadPDF(newPdf);

Assertions.assertEquals(7, document.getNumberOfPages());
document.close();
Expand Down
Loading