Skip to content

Commit 97040e7

Browse files
committed
bump pdfbox
1 parent d2d4069 commit 97040e7

File tree

6 files changed

+19
-14
lines changed

6 files changed

+19
-14
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@
415415
<org.apache.maven.project.info.reports.version>3.0.0</org.apache.maven.project.info.reports.version>
416416
<org.apache.maven.site.version>3.7.1</org.apache.maven.site.version>
417417
<org.apache.maven.surfire.version>3.2.5</org.apache.maven.surfire.version>
418-
<org.apache.pdfbox.version>2.0.31</org.apache.pdfbox.version>
418+
<org.apache.pdfbox.version>3.0.5</org.apache.pdfbox.version>
419419
<org.junit.jupiter.version>5.8.2</org.junit.jupiter.version>
420420
<org.junit.pioneer.version>1.9.1</org.junit.pioneer.version>
421421
<org.junit.platform.version>1.8.2</org.junit.platform.version>

src/main/java/com/mindee/extraction/PDFExtractor.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import java.util.List;
1515
import java.util.stream.Collectors;
1616
import javax.imageio.ImageIO;
17+
import org.apache.pdfbox.Loader;
1718
import org.apache.pdfbox.pdmodel.PDDocument;
1819
import org.apache.pdfbox.pdmodel.PDPage;
1920
import org.apache.pdfbox.pdmodel.PDPageContentStream;
@@ -46,7 +47,7 @@ public PDFExtractor(String filePath) throws IOException {
4647
public PDFExtractor(LocalInputSource source) throws IOException {
4748
this.filename = source.getFilename();
4849
if (source.isPdf()) {
49-
this.sourcePdf = PDDocument.load(source.getFile());
50+
this.sourcePdf = Loader.loadPDF(source.getFile());
5051
} else {
5152
PDDocument document = new PDDocument();
5253
PDPage page = new PDPage();
@@ -104,7 +105,7 @@ public List<ExtractedPDF> extractSubDocuments(List<List<Integer>> pageIndexes)
104105
+ String.format("%3s", pageIndexElement.get(pageIndexElement.size() - 1) + 1)
105106
.replace(" ", "0") + "." + splitName[1];
106107
extractedPDFs.add(
107-
new ExtractedPDF(PDDocument.load(mergePdfPages(this.sourcePdf, pageIndexElement, false)),
108+
new ExtractedPDF(Loader.loadPDF(mergePdfPages(this.sourcePdf, pageIndexElement, false)),
108109
fieldFilename));
109110
}
110111
return extractedPDFs;

src/main/java/com/mindee/input/InputSourceUtils.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.io.IOException;
66
import java.net.URL;
77
import javax.imageio.ImageIO;
8+
import org.apache.pdfbox.Loader;
89
import org.apache.pdfbox.pdmodel.PDDocument;
910
import org.apache.pdfbox.text.PDFTextStripper;
1011

@@ -70,7 +71,7 @@ public static String[] splitNameStrict(String filename) throws MindeeException {
7071
* Returns true if the file is a PDF.
7172
*/
7273
public static boolean isPdf(byte[] fileBytes) {
73-
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(fileBytes))) {
74+
try (PDDocument document = Loader.loadPDF(new ByteArrayInputStream(fileBytes))) {
7475
return true;
7576
} catch (IOException e) {
7677
return false;
@@ -95,7 +96,7 @@ public static void validateUrl(URL inputUrl) {
9596
*/
9697
public static boolean hasSourceText(byte[] fileBytes) {
9798
try {
98-
PDDocument document = PDDocument.load(new ByteArrayInputStream(fileBytes));
99+
PDDocument document = Loader.loadPDF(new ByteArrayInputStream(fileBytes));
99100
PDFTextStripper stripper = new PDFTextStripper();
100101

101102
for (int i = 0; i < document.getNumberOfPages(); i++) {

src/main/java/com/mindee/pdf/PDFUtils.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.io.IOException;
99
import java.util.ArrayList;
1010
import java.util.List;
11+
import org.apache.pdfbox.Loader;
1112
import org.apache.pdfbox.cos.COSDictionary;
1213
import org.apache.pdfbox.cos.COSName;
1314
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -37,7 +38,7 @@ private PDFUtils() {
3738
* @param inputSource The PDF file.
3839
*/
3940
public static int getNumberOfPages(LocalInputSource inputSource) throws IOException {
40-
PDDocument document = PDDocument.load(inputSource.getFile());
41+
PDDocument document = Loader.loadPDF(inputSource.getFile());
4142
int pageCount = document.getNumberOfPages();
4243
document.close();
4344
return pageCount;
@@ -81,7 +82,7 @@ private static byte[] createPdfFromExistingPdf(
8182
* @param pageNumbers Lit of page numbers to merge together.
8283
*/
8384
public static byte[] mergePdfPages(File file, List<Integer> pageNumbers) throws IOException {
84-
PDDocument document = PDDocument.load(file);
85+
PDDocument document = Loader.loadPDF(file);
8586
return createPdfFromExistingPdf(document, pageNumbers, true);
8687
}
8788

@@ -100,7 +101,7 @@ public static byte[] mergePdfPages(
100101

101102

102103
public static boolean isPdfEmpty(File file) throws IOException {
103-
return checkIfPdfIsEmpty(PDDocument.load(file));
104+
return checkIfPdfIsEmpty(Loader.loadPDF(file));
104105
}
105106

106107
private static boolean checkIfPdfIsEmpty(PDDocument document) throws IOException {
@@ -143,7 +144,7 @@ public static List<PdfPageImage> pdfToImages(String filePath) throws IOException
143144
* @return List of all pages as images.
144145
*/
145146
public static List<PdfPageImage> pdfToImages(LocalInputSource source) throws IOException {
146-
PDDocument document = PDDocument.load(source.getFile());
147+
PDDocument document = Loader.loadPDF(source.getFile());
147148
PDFRenderer pdfRenderer = new PDFRenderer(document);
148149
List<PdfPageImage> pdfPageImages = new ArrayList<>();
149150
for (int i = 0; i < document.getNumberOfPages(); i++) {
@@ -182,7 +183,7 @@ public static PdfPageImage pdfPageToImage(
182183
int pageNumber
183184
) throws IOException {
184185
int index = pageNumber - 1;
185-
PDDocument document = PDDocument.load(source.getFile());
186+
PDDocument document = Loader.loadPDF(source.getFile());
186187
PDFRenderer pdfRenderer = new PDFRenderer(document);
187188
BufferedImage imageBuffer = pdfPageToImageBuffer(index, document, pdfRenderer);
188189
document.close();

src/main/java/com/mindee/pdf/PdfBoxApi.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import java.util.stream.Collectors;
1313
import java.util.stream.IntStream;
1414
import java.util.stream.Stream;
15+
import org.apache.pdfbox.Loader;
1516
import org.apache.pdfbox.pdmodel.PDDocument;
1617

1718
/**
@@ -26,7 +27,7 @@ public SplitPdf split(SplitQuery splitQuery) throws IOException {
2627
throw new MindeeException("This document cannot be open and cannot be split.");
2728
}
2829

29-
try (PDDocument originalDocument = PDDocument.load(splitQuery.getFile())) {
30+
try (PDDocument originalDocument = Loader.loadPDF(splitQuery.getFile())) {
3031
try (PDDocument splitDocument = new PDDocument()) {
3132
int totalOriginalPages = countPages(splitQuery.getFile());
3233

@@ -73,7 +74,7 @@ private List<Integer> getPageRanges(PageOptions pageOptions, Integer numberOfPag
7374
private boolean checkPdfOpen(byte[] documentFile) {
7475
boolean opens = false;
7576
try {
76-
PDDocument.load(documentFile).close();
77+
Loader.loadPDF(documentFile).close();
7778
opens = true;
7879
} catch (IOException e) {
7980
e.printStackTrace();
@@ -82,7 +83,7 @@ private boolean checkPdfOpen(byte[] documentFile) {
8283
}
8384

8485
private int countPages(byte[] documentFile) throws IOException {
85-
PDDocument document = PDDocument.load(documentFile);
86+
PDDocument document = Loader.loadPDF(documentFile);
8687
int pageCount = document.getNumberOfPages();
8788
document.close();
8889
return pageCount;

src/main/java/com/mindee/pdf/PdfCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import java.awt.image.BufferedImage;
77
import java.io.IOException;
8+
import org.apache.pdfbox.Loader;
89
import org.apache.pdfbox.pdmodel.PDDocument;
910
import org.apache.pdfbox.pdmodel.PDPage;
1011
import org.apache.pdfbox.pdmodel.PDPageContentStream;
@@ -38,7 +39,7 @@ public static byte[] compressPdf(
3839
"MINDEE WARNING: Found text inside of the provided PDF file. Compression operation aborted.");
3940
return pdfData;
4041
}
41-
try (PDDocument inputDoc = PDDocument.load(pdfData);
42+
try (PDDocument inputDoc = Loader.loadPDF(pdfData);
4243
PDDocument outputDoc = new PDDocument()) {
4344

4445
PDFRenderer pdfRenderer = new PDFRenderer(inputDoc);

0 commit comments

Comments
 (0)