Skip to content

Commit 0700585

Browse files
Fix the incorrectly annotated PDF when an exception happens (#102)
1 parent 9ff9afb commit 0700585

File tree

6 files changed

+22
-17
lines changed

6 files changed

+22
-17
lines changed

java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/html/HtmlGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ public void writeToHtml(List<List<IObject>> contents) {
7878

7979
htmlWriter.write("\n</body>\n</html>");
8080
LOGGER.log(Level.INFO, "Created {0}", htmlFilePath);
81-
} catch (IOException e) {
81+
} catch (Exception e) {
8282
LOGGER.log(Level.WARNING, "Unable to create html output: " + e.getMessage());
8383
}
8484
}

java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/json/JsonWriter.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,10 @@ public static void writeToJson(File inputPDF, String outputFolder, List<List<IOb
5151

5252
jsonGenerator.writeEndArray();
5353
jsonGenerator.writeEndObject();
54+
LOGGER.log(Level.INFO, "Created {0}", jsonFileName);
55+
} catch (Exception ex) {
56+
LOGGER.log(Level.WARNING, "Unable to create JSON output: " + ex.getMessage());
5457
}
55-
LOGGER.log(Level.INFO, "Created {0}", jsonFileName);
5658
}
5759

5860
private static void writeDocumentInfo(JsonGenerator generator, String pdfName) throws IOException {

java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/markdown/MarkdownGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ public void writeToMarkdown(List<List<IObject>> contents) {
7070
}
7171

7272
LOGGER.log(Level.INFO, "Created {0}", markdownFileName);
73-
} catch (IOException e) {
73+
} catch (Exception e) {
7474
LOGGER.log(Level.WARNING, "Unable to create markdown output: " + e.getMessage());
7575
}
7676
}

java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/pdf/PDFWriter.java

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,13 @@
4545

4646
public class PDFWriter {
4747

48-
private static final Map<PDFLayer, PDOptionalContentGroup> optionalContents = new HashMap<>();
49-
50-
private static final List<List<PDAnnotation>> annotations = new ArrayList<>();
5148
private static final Logger LOGGER = Logger.getLogger(PDFWriter.class.getCanonicalName());
52-
private static final List<BoundingBox> pageBoundingBoxes = new ArrayList<>();
5349

54-
public static void updatePDF(File inputPDF, String password, String outputFolder, List<List<IObject>> contents) throws IOException {
50+
private final Map<PDFLayer, PDOptionalContentGroup> optionalContents = new HashMap<>();
51+
private final List<List<PDAnnotation>> annotations = new ArrayList<>();
52+
private final List<BoundingBox> pageBoundingBoxes = new ArrayList<>();
53+
54+
public void updatePDF(File inputPDF, String password, String outputFolder, List<List<IObject>> contents) throws IOException {
5555
try (PDDocument document = Loader.loadPDF(inputPDF, password)) {
5656
for (int pageNumber = 0; pageNumber < StaticContainers.getDocument().getNumberOfPages(); pageNumber++) {
5757
annotations.add(new ArrayList<>());
@@ -74,14 +74,16 @@ public static void updatePDF(File inputPDF, String password, String outputFolder
7474
inputPDF.getName().substring(0, inputPDF.getName().length() - 4) + "_annotated.pdf";
7575
document.save(outputFileName);
7676
LOGGER.log(Level.INFO, "Created {0}", outputFileName);
77+
} catch (Exception ex) {
78+
LOGGER.log(Level.WARNING, "Unable to create annotated PDF output: " + ex.getMessage());
7779
}
7880
}
7981

80-
private static void drawContent(IObject content, PDFLayer layer) throws IOException {
82+
private void drawContent(IObject content, PDFLayer layer) throws IOException {
8183
drawContent(content, layer, null);
8284
}
8385

84-
private static void drawContent(IObject content, PDFLayer layer, PDAnnotation linkedAnnot) throws IOException {
86+
private void drawContent(IObject content, PDFLayer layer, PDAnnotation linkedAnnot) throws IOException {
8587
if ((content instanceof LineChunk)) {
8688
return;
8789
}
@@ -98,7 +100,7 @@ private static void drawContent(IObject content, PDFLayer layer, PDAnnotation li
98100
}
99101
}
100102

101-
private static void drawTableCells(TableBorder table, PDAnnotation annot) throws IOException {
103+
private void drawTableCells(TableBorder table, PDAnnotation annot) throws IOException {
102104
if (table.isTextBlock()) {
103105
for (IObject content : table.getCell(0, 0).getContents()) {
104106
drawContent(content, PDFLayer.TEXT_BLOCK_CONTENT);
@@ -127,7 +129,7 @@ private static void drawTableCells(TableBorder table, PDAnnotation annot) throws
127129
}
128130
}
129131

130-
private static void drawListItems(PDFList list, PDAnnotation annot) throws IOException {
132+
private void drawListItems(PDFList list, PDAnnotation annot) throws IOException {
131133
for (ListItem listItem : list.getListItems()) {
132134
String contentValue = String.format("List item: text content \"%s\"", listItem.toString());
133135
draw(listItem.getBoundingBox(), getColor(SemanticType.LIST), contentValue, null, annot, listItem.getLevel(), PDFLayer.LIST_ITEMS);
@@ -137,7 +139,7 @@ private static void drawListItems(PDFList list, PDAnnotation annot) throws IOExc
137139
}
138140
}
139141

140-
public static PDAnnotation draw(BoundingBox boundingBox, float[] colorArray,
142+
public PDAnnotation draw(BoundingBox boundingBox, float[] colorArray,
141143
String contents, Long id, PDAnnotation linkedAnnot, String level, PDFLayer layerName) {
142144
if (!Objects.equals(boundingBox.getPageNumber(), boundingBox.getLastPageNumber())) {
143145
if (boundingBox instanceof MultiBoundingBox) {
@@ -273,7 +275,7 @@ public static float[] getColor(SemanticType semanticType) {
273275
return null;
274276
}
275277

276-
private static void createOptContentsForAnnotations(PDDocument document) {
278+
private void createOptContentsForAnnotations(PDDocument document) {
277279
if (optionalContents.isEmpty()) {
278280
return;
279281
}
@@ -291,7 +293,7 @@ private static void createOptContentsForAnnotations(PDDocument document) {
291293
optionalContents.clear();
292294
}
293295

294-
public static PDOptionalContentGroup getOptionalContent(PDFLayer layer) {
296+
public PDOptionalContentGroup getOptionalContent(PDFLayer layer) {
295297
PDOptionalContentGroup group = optionalContents.get(layer);
296298
if (group == null) {
297299
COSDictionary cosDictionary = new COSDictionary();

java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/processors/DocumentProcessor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ private static void generateOutputs(String inputPdfName, List<List<IObject>> con
8888
File inputPDF = new File(inputPdfName);
8989
new File(config.getOutputFolder()).mkdirs();
9090
if (config.isGeneratePDF()) {
91-
PDFWriter.updatePDF(inputPDF, config.getPassword(), config.getOutputFolder(), contents);
91+
PDFWriter pdfWriter = new PDFWriter();
92+
pdfWriter.updatePDF(inputPDF, config.getPassword(), config.getOutputFolder(), contents);
9293
}
9394
if (config.isGenerateJSON()) {
9495
JsonWriter.writeToJson(inputPDF, config.getOutputFolder(), contents);

java/opendataloader-pdf-core/src/main/java/org/opendataloader/pdf/text/TextGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public void writeToText(List<List<IObject>> contents) {
5757
}
5858
}
5959
LOGGER.log(Level.INFO, "Created {0}", textFileName);
60-
} catch (IOException e) {
60+
} catch (Exception e) {
6161
LOGGER.log(Level.WARNING, "Unable to create text output: " + e.getMessage());
6262
}
6363
}

0 commit comments

Comments
 (0)