diff --git a/pom.xml b/pom.xml index c27cba030..1f09ce13a 100644 --- a/pom.xml +++ b/pom.xml @@ -415,7 +415,7 @@ 3.0.0 3.7.1 3.2.5 - 2.0.31 + 3.0.5 5.8.2 1.9.1 1.8.2 diff --git a/src/main/java/com/mindee/extraction/PDFExtractor.java b/src/main/java/com/mindee/extraction/PDFExtractor.java index c1a494e40..33d71519e 100644 --- a/src/main/java/com/mindee/extraction/PDFExtractor.java +++ b/src/main/java/com/mindee/extraction/PDFExtractor.java @@ -14,6 +14,7 @@ import java.util.List; import java.util.stream.Collectors; import javax.imageio.ImageIO; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; @@ -46,7 +47,7 @@ public PDFExtractor(String filePath) throws IOException { public PDFExtractor(LocalInputSource source) throws IOException { this.filename = source.getFilename(); if (source.isPdf()) { - this.sourcePdf = PDDocument.load(source.getFile()); + this.sourcePdf = Loader.loadPDF(source.getFile()); } else { PDDocument document = new PDDocument(); PDPage page = new PDPage(); @@ -104,7 +105,7 @@ public List extractSubDocuments(List> pageIndexes) + String.format("%3s", pageIndexElement.get(pageIndexElement.size() - 1) + 1) .replace(" ", "0") + "." + splitName[1]; extractedPDFs.add( - new ExtractedPDF(PDDocument.load(mergePdfPages(this.sourcePdf, pageIndexElement, false)), + new ExtractedPDF(Loader.loadPDF(mergePdfPages(this.sourcePdf, pageIndexElement, false)), fieldFilename)); } return extractedPDFs; diff --git a/src/main/java/com/mindee/input/InputSourceUtils.java b/src/main/java/com/mindee/input/InputSourceUtils.java index 6a68851da..24a26e594 100644 --- a/src/main/java/com/mindee/input/InputSourceUtils.java +++ b/src/main/java/com/mindee/input/InputSourceUtils.java @@ -5,6 +5,8 @@ import java.io.IOException; import java.net.URL; import javax.imageio.ImageIO; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; @@ -70,7 +72,7 @@ public static String[] splitNameStrict(String filename) throws MindeeException { * Returns true if the file is a PDF. */ public static boolean isPdf(byte[] fileBytes) { - try (PDDocument document = PDDocument.load(new ByteArrayInputStream(fileBytes))) { + try (PDDocument document = Loader.loadPDF(new RandomAccessReadBuffer(new ByteArrayInputStream(fileBytes)))) { return true; } catch (IOException e) { return false; @@ -95,7 +97,7 @@ public static void validateUrl(URL inputUrl) { */ public static boolean hasSourceText(byte[] fileBytes) { try { - PDDocument document = PDDocument.load(new ByteArrayInputStream(fileBytes)); + PDDocument document = Loader.loadPDF(new RandomAccessReadBuffer(new ByteArrayInputStream(fileBytes))); PDFTextStripper stripper = new PDFTextStripper(); for (int i = 0; i < document.getNumberOfPages(); i++) { diff --git a/src/main/java/com/mindee/pdf/PDFUtils.java b/src/main/java/com/mindee/pdf/PDFUtils.java index e5d6f819d..f21fa0f7f 100644 --- a/src/main/java/com/mindee/pdf/PDFUtils.java +++ b/src/main/java/com/mindee/pdf/PDFUtils.java @@ -8,6 +8,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; @@ -16,6 +17,7 @@ import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.rendering.ImageType; @@ -37,7 +39,7 @@ private PDFUtils() { * @param inputSource The PDF file. */ public static int getNumberOfPages(LocalInputSource inputSource) throws IOException { - PDDocument document = PDDocument.load(inputSource.getFile()); + PDDocument document = Loader.loadPDF(inputSource.getFile()); int pageCount = document.getNumberOfPages(); document.close(); return pageCount; @@ -81,7 +83,7 @@ private static byte[] createPdfFromExistingPdf( * @param pageNumbers Lit of page numbers to merge together. */ public static byte[] mergePdfPages(File file, List pageNumbers) throws IOException { - PDDocument document = PDDocument.load(file); + PDDocument document = Loader.loadPDF(file); return createPdfFromExistingPdf(document, pageNumbers, true); } @@ -100,7 +102,7 @@ public static byte[] mergePdfPages( public static boolean isPdfEmpty(File file) throws IOException { - return checkIfPdfIsEmpty(PDDocument.load(file)); + return checkIfPdfIsEmpty(Loader.loadPDF(file)); } private static boolean checkIfPdfIsEmpty(PDDocument document) throws IOException { @@ -143,7 +145,7 @@ public static List pdfToImages(String filePath) throws IOException * @return List of all pages as images. */ public static List pdfToImages(LocalInputSource source) throws IOException { - PDDocument document = PDDocument.load(source.getFile()); + PDDocument document = Loader.loadPDF(source.getFile()); PDFRenderer pdfRenderer = new PDFRenderer(document); List pdfPageImages = new ArrayList<>(); for (int i = 0; i < document.getNumberOfPages(); i++) { @@ -182,7 +184,7 @@ public static PdfPageImage pdfPageToImage( int pageNumber ) throws IOException { int index = pageNumber - 1; - PDDocument document = PDDocument.load(source.getFile()); + PDDocument document = Loader.loadPDF(source.getFile()); PDFRenderer pdfRenderer = new PDFRenderer(document); BufferedImage imageBuffer = pdfPageToImageBuffer(index, document, pdfRenderer); document.close(); @@ -242,7 +244,7 @@ protected void writeString(String text, List textPositions) throws try { contentStream.showText(text); } catch (IllegalArgumentException | UnsupportedOperationException e) { - contentStream.setFont(PDType1Font.HELVETICA, fontSize); + contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), fontSize); contentStream.showText(text); } contentStream.endText(); diff --git a/src/main/java/com/mindee/pdf/PdfBoxApi.java b/src/main/java/com/mindee/pdf/PdfBoxApi.java index a1bc64078..8e9749bc5 100644 --- a/src/main/java/com/mindee/pdf/PdfBoxApi.java +++ b/src/main/java/com/mindee/pdf/PdfBoxApi.java @@ -12,6 +12,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; /** @@ -26,7 +27,7 @@ public SplitPdf split(SplitQuery splitQuery) throws IOException { throw new MindeeException("This document cannot be open and cannot be split."); } - try (PDDocument originalDocument = PDDocument.load(splitQuery.getFile())) { + try (PDDocument originalDocument = Loader.loadPDF(splitQuery.getFile())) { try (PDDocument splitDocument = new PDDocument()) { int totalOriginalPages = countPages(splitQuery.getFile()); @@ -73,7 +74,7 @@ private List getPageRanges(PageOptions pageOptions, Integer numberOfPag private boolean checkPdfOpen(byte[] documentFile) { boolean opens = false; try { - PDDocument.load(documentFile).close(); + Loader.loadPDF(documentFile).close(); opens = true; } catch (IOException e) { e.printStackTrace(); @@ -82,7 +83,7 @@ private boolean checkPdfOpen(byte[] documentFile) { } private int countPages(byte[] documentFile) throws IOException { - PDDocument document = PDDocument.load(documentFile); + PDDocument document = Loader.loadPDF(documentFile); int pageCount = document.getNumberOfPages(); document.close(); return pageCount; diff --git a/src/main/java/com/mindee/pdf/PdfCompressor.java b/src/main/java/com/mindee/pdf/PdfCompressor.java index b7f2c73fa..a73244eb0 100644 --- a/src/main/java/com/mindee/pdf/PdfCompressor.java +++ b/src/main/java/com/mindee/pdf/PdfCompressor.java @@ -5,6 +5,7 @@ import java.awt.image.BufferedImage; import java.io.IOException; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; @@ -38,7 +39,7 @@ public static byte[] compressPdf( "MINDEE WARNING: Found text inside of the provided PDF file. Compression operation aborted."); return pdfData; } - try (PDDocument inputDoc = PDDocument.load(pdfData); + try (PDDocument inputDoc = Loader.loadPDF(pdfData); PDDocument outputDoc = new PDDocument()) { PDFRenderer pdfRenderer = new PDFRenderer(inputDoc); diff --git a/src/test/java/com/mindee/input/LocalInputSourceTest.java b/src/test/java/com/mindee/input/LocalInputSourceTest.java index 637e1e987..e9689839d 100644 --- a/src/test/java/com/mindee/input/LocalInputSourceTest.java +++ b/src/test/java/com/mindee/input/LocalInputSourceTest.java @@ -14,6 +14,7 @@ import java.util.stream.Collectors; import javax.imageio.ImageIO; import org.apache.commons.codec.binary.Base64; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.junit.jupiter.api.Assertions; @@ -338,8 +339,8 @@ public void testPdfResizeWithTextKeepsText() throws IOException { byte[] compressedWithText = PdfCompressor.compressPdf(initialWithText.getFile(), 100, true, false); - PDDocument originalDoc = PDDocument.load(initialWithText.getFile()); - PDDocument compressedDoc = PDDocument.load(compressedWithText); + PDDocument originalDoc = Loader.loadPDF(initialWithText.getFile()); + PDDocument compressedDoc = Loader.loadPDF(compressedWithText); Assertions.assertEquals(originalDoc.getNumberOfPages(), compressedDoc.getNumberOfPages()); Assertions.assertNotEquals(originalDoc.hashCode(), compressedDoc.hashCode()); diff --git a/src/test/java/com/mindee/pdf/PDFUtilsTest.java b/src/test/java/com/mindee/pdf/PDFUtilsTest.java index a218baa70..62f52f397 100644 --- a/src/test/java/com/mindee/pdf/PDFUtilsTest.java +++ b/src/test/java/com/mindee/pdf/PDFUtilsTest.java @@ -10,6 +10,8 @@ import java.util.Arrays; import java.util.List; import java.util.Random; + +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.junit.jupiter.api.Assertions; @@ -43,7 +45,7 @@ public void givenADocumentAndListOfPages_whenMerged_thenReturnsCorrectDocument() File file = new File("src/test/resources/output/fileToTest.pdf"); List pageList = Arrays.asList(0, 2, 3, 1, 10, 2, 1); byte[] newPdf = PDFUtils.mergePdfPages(file, pageList); - PDDocument document = PDDocument.load(newPdf); + PDDocument document = Loader.loadPDF(newPdf); Assertions.assertEquals(7, document.getNumberOfPages()); document.close();