Skip to content

Commit bd1a711

Browse files
♻️ update display & structure for invoice splitter v1
1 parent 475f688 commit bd1a711

File tree

11 files changed

+179
-122
lines changed

11 files changed

+179
-122
lines changed

docs/code_samples/invoice_splitter_v1_async.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
import com.mindee.MindeeClient;
22
import com.mindee.input.LocalInputSource;
33
import com.mindee.parsing.common.AsyncPredictResponse;
4-
import com.mindee.parsing.common.Job;
5-
import com.mindee.parsing.common.Document;
64
import com.mindee.product.invoicesplitter.InvoiceSplitterV1;
75
import java.io.File;
86
import java.io.IOException;
9-
import java.util.Optional;
107

118
public class SimpleMindeeClient {
129

@@ -40,4 +37,5 @@ public class SimpleMindeeClient {
4037
// page -> System.out.println(page.toString())
4138
// );
4239
}
40+
4341
}

docs/financial_document_v1.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ public class SimpleMindeeClient {
107107
########
108108
Document
109109
########
110-
:Mindee ID: b26161ce-35d0-4984-b1ff-886645e160e6
110+
:Mindee ID: f469a24d-3875-4a83-ad43-e0d5aa9da604
111111
:Filename: default_sample.jpg
112112
113113
Inference
@@ -124,8 +124,8 @@ Prediction
124124
:Document Number: INT-001
125125
:Reference Numbers: 2412/2019
126126
:Purchase Date: 2019-11-02
127-
:Due Date: 2019-02-26
128-
:Payment Date: 2019-02-26
127+
:Due Date: 2019-11-17
128+
:Payment Date: 2019-11-17
129129
:Total Net: 195.00
130130
:Total Amount: 204.75
131131
:Taxes:
@@ -176,8 +176,8 @@ Page 0
176176
:Document Number: INT-001
177177
:Reference Numbers: 2412/2019
178178
:Purchase Date: 2019-11-02
179-
:Due Date: 2019-02-26
180-
:Payment Date: 2019-02-26
179+
:Due Date: 2019-11-17
180+
:Payment Date: 2019-11-17
181181
:Total Net: 195.00
182182
:Total Amount: 204.75
183183
:Taxes:

docs/invoice_splitter_v1.md

Lines changed: 42 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,22 @@
11
---
2-
title: Invoice Splitter API Java
2+
title: Invoice Splitter OCR Java
33
category: 622b805aaec68102ea7fcbc2
44
slug: java-invoice-splitter-ocr
55
parentDoc: 631a062c3718850f3519b793
66
---
77
The Java OCR SDK supports the [Invoice Splitter API](https://platform.mindee.com/mindee/invoice_splitter).
88

9-
Using [this sample](https://github.com/mindee/client-lib-test-data/blob/main/products/invoice_splitter/default_sample.pdf), we are going to illustrate how to detect the pages of multiple invoices within the same document.
9+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/invoice_splitter/default_sample.pdf), we are going to illustrate how to extract the data that we want using the OCR SDK.
10+
![Invoice Splitter sample](https://github.com/mindee/client-lib-test-data/blob/main/products/invoice_splitter/default_sample.pdf?raw=true)
1011

1112
# Quick-Start
12-
13-
> **⚠️ Important:** This API only works **asynchronously**, which means that documents have to be sent and retrieved in a specific way:
14-
1513
```java
1614
import com.mindee.MindeeClient;
1715
import com.mindee.input.LocalInputSource;
1816
import com.mindee.parsing.common.AsyncPredictResponse;
19-
import com.mindee.parsing.common.Job;
20-
import com.mindee.parsing.common.Document;
2117
import com.mindee.product.invoicesplitter.InvoiceSplitterV1;
2218
import java.io.File;
2319
import java.io.IOException;
24-
import java.util.Optional;
2520

2621
public class SimpleMindeeClient {
2722

@@ -37,8 +32,8 @@ public class SimpleMindeeClient {
3732

3833
// Parse the file asynchronously
3934
AsyncPredictResponse<InvoiceSplitterV1> response = mindeeClient.enqueueAndParse(
40-
InvoiceSplitterV1.class,
41-
inputSource
35+
InvoiceSplitterV1.class,
36+
inputSource
4237
);
4338

4439
// Print a summary of the response
@@ -55,69 +50,82 @@ public class SimpleMindeeClient {
5550
// page -> System.out.println(page.toString())
5651
// );
5752
}
53+
5854
}
55+
5956
```
6057

6158
**Output (RST):**
62-
6359
```rst
6460
########
6561
Document
6662
########
67-
:Mindee ID: 8c25cc63-212b-4537-9c9b-3fbd3bd0ee20
68-
:Filename: default_sample.jpg
63+
:Mindee ID: 15ad7a19-7b75-43d0-b0c6-9a641a12b49b
64+
:Filename: default_sample.pdf
6965
7066
Inference
7167
#########
72-
:Product: mindee/carte_vitale v1.0
73-
:Rotation applied: Yes
68+
:Product: mindee/invoice_splitter v1.1
69+
:Rotation applied: No
7470
7571
Prediction
7672
==========
77-
:Given Name(s): NATHALIE
78-
:Surname: DURAND
79-
:Social Security Number: 269054958815780
80-
:Issuance Date: 2007-01-01
73+
:Invoice Page Groups:
74+
:Page indexes: 0
75+
:Page indexes: 1
8176
8277
Page Predictions
8378
================
8479
8580
Page 0
8681
------
87-
:Given Name(s): NATHALIE
88-
:Surname: DURAND
89-
:Social Security Number: 269054958815780
90-
:Issuance Date: 2007-01-01
82+
:Invoice Page Groups:
83+
84+
Page 1
85+
------
86+
:Invoice Page Groups:
9187
```
9288

9389
# Field Types
90+
## Standard Fields
91+
These fields are generic and used in several products.
9492

95-
## Specific Fields
93+
### BaseField
94+
Each prediction object contains a set of fields that inherit from the generic `BaseField` class.
95+
A typical `BaseField` object will have the following attributes:
9696

97-
### Page Indexes
97+
* **confidence** (`Double`): the confidence score of the field prediction.
98+
* **boundingBox** (`Polygon`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
99+
* **polygon** (`Polygon`): contains the relative vertices coordinates (`polygon` extends `List<Point>`) of a polygon containing the field in the image.
100+
* **pageId** (`Integer`): the ID of the page, always `null` when at document-level.
98101

99-
List of page group indexes.
102+
> **Note:** A `Point` simply refers to a List of `Double`.
100103
101-
A `PageIndexes` implements the following attributes:
102104

103-
- **pageIndexes** (`List<Integer>`): List of indexes of the pages of a single invoice.
104-
- **confidence** (`Double`): The confidence of the prediction.
105+
Aside from the previous attributes, all basic fields have access to a custom `toString` method that can be used to print their value as a string.
105106

106-
# Attributes
107+
## Specific Fields
108+
Fields which are specific to this product; they are not used in any other product.
109+
110+
### Invoice Page Groups Field
111+
List of page groups. Each group represents a single invoice within a multi-invoice document.
112+
113+
A `InvoiceSplitterV1InvoicePageGroup` implements the following attributes:
107114

115+
* **pageIndexes** (`List<Integer>`): List of page indexes that belong to the same invoice (group).
116+
117+
# Attributes
108118
The following fields are extracted for Invoice Splitter V1:
109119

110120
## Invoice Page Groups
111-
112-
**invoicePageGroups** (`List<`[invoicePageGroups](#page-indexes)`>`): List of page indexes that belong to the same invoice in the PDF.
121+
**invoicePageGroups**(List<[InvoiceSplitterV1InvoicePageGroup](#invoice-page-groups-field)>): List of page groups. Each group represents a single invoice within a multi-invoice document.
113122

114123
```java
115124
for (invoicePageGroupsElem : result.getDocument().getInference().getPrediction().getInvoicePageGroups())
116125
{
117-
System.out.println(invoicePageGroupsElem);
126+
System.out.println(invoicePageGroupsElem.value);
118127
}
119128
```
120129

121130
# Questions?
122-
123131
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)

docs/invoices_v4.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ public class SimpleMindeeClient {
107107
########
108108
Document
109109
########
110-
:Mindee ID: a67b70ea-4b1e-4eac-ae75-dda47a7064ae
110+
:Mindee ID: 86b1833f-138b-4a01-8387-860204b0e631
111111
:Filename: default_sample.jpg
112112
113113
Inference
@@ -122,8 +122,8 @@ Prediction
122122
:Purchase Order Number: AD29094
123123
:Reference Numbers: AD29094
124124
:Purchase Date: 2018-09-25
125-
:Due Date: 2011-12-01
126-
:Payment Date: 2011-12-01
125+
:Due Date:
126+
:Payment Date:
127127
:Total Net: 2145.00
128128
:Total Amount: 2608.20
129129
:Total Tax: 193.20
@@ -168,8 +168,8 @@ Page 0
168168
:Purchase Order Number: AD29094
169169
:Reference Numbers: AD29094
170170
:Purchase Date: 2018-09-25
171-
:Due Date: 2011-12-01
172-
:Payment Date: 2011-12-01
171+
:Due Date:
172+
:Payment Date:
173173
:Total Net: 2145.00
174174
:Total Amount: 2608.20
175175
:Total Tax: 193.20

src/main/java/com/mindee/extraction/PDFExtractor.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import com.mindee.MindeeException;
66
import com.mindee.input.InputSourceUtils;
77
import com.mindee.input.LocalInputSource;
8-
import com.mindee.product.invoicesplitter.InvoiceSplitterV1Document;
8+
import com.mindee.product.invoicesplitter.InvoiceSplitterV1InvoicePageGroup;
99
import java.awt.image.BufferedImage;
1010
import java.io.ByteArrayInputStream;
1111
import java.io.IOException;
@@ -118,11 +118,11 @@ public List<ExtractedPDF> extractSubDocuments(List<List<Integer>> pageIndexes)
118118
* @return a list of extracted files.
119119
* @throws IOException Throws if the file can't be accessed.
120120
*/
121-
public List<ExtractedPDF> extractInvoices(List<InvoiceSplitterV1Document.PageIndexes> pageIndexes)
121+
public List<ExtractedPDF> extractInvoices(List<InvoiceSplitterV1InvoicePageGroup> pageIndexes)
122122
throws IOException {
123123

124124
List<List<Integer>> indexes =
125-
pageIndexes.stream().map(InvoiceSplitterV1Document.PageIndexes::getPageIndexes)
125+
pageIndexes.stream().map(InvoiceSplitterV1InvoicePageGroup::getPageIndexes)
126126
.collect(Collectors.toList());
127127

128128

@@ -138,17 +138,17 @@ public List<ExtractedPDF> extractInvoices(List<InvoiceSplitterV1Document.PageInd
138138
* @return a list of extracted files.
139139
* @throws IOException Throws if the file can't be accessed.
140140
*/
141-
public List<ExtractedPDF> extractInvoices(List<InvoiceSplitterV1Document.PageIndexes> pageIndexes,
141+
public List<ExtractedPDF> extractInvoices(List<InvoiceSplitterV1InvoicePageGroup> pageIndexes,
142142
boolean strict) throws IOException {
143143
List<List<Integer>> correctPageIndexes = new ArrayList<>();
144144
if (!strict) {
145145
return extractInvoices(pageIndexes);
146146
}
147-
Iterator<InvoiceSplitterV1Document.PageIndexes> iterator = pageIndexes.iterator();
147+
Iterator<InvoiceSplitterV1InvoicePageGroup> iterator = pageIndexes.iterator();
148148
List<Integer> currentList = new ArrayList<>();
149149
Double previousConfidence = null;
150150
while (iterator.hasNext()) {
151-
InvoiceSplitterV1Document.PageIndexes pageIndex = iterator.next();
151+
InvoiceSplitterV1InvoicePageGroup pageIndex = iterator.next();
152152
Double confidence = pageIndex.getConfidence();
153153
List<Integer> pageList = pageIndex.getPageIndexes();
154154

src/main/java/com/mindee/product/ind/indianpassport/IndianPassportV1Document.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import lombok.Getter;
1212

1313
/**
14-
* Passport - India API version 1.1 document data.
14+
* Passport - India API version 1.2 document data.
1515
*/
1616
@Getter
1717
@EqualsAndHashCode(callSuper = false)

src/main/java/com/mindee/product/invoicesplitter/InvoiceSplitterV1.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,11 @@
66
import lombok.Getter;
77

88
/**
9-
* The invoice splitter V1 inference.
9+
* Invoice Splitter API version 1 inference prediction.
1010
*/
1111
@Getter
1212
@JsonIgnoreProperties(ignoreUnknown = true)
1313
@EndpointInfo(endpointName = "invoice_splitter", version = "1")
14-
public class InvoiceSplitterV1 extends
15-
Inference<InvoiceSplitterV1Document, InvoiceSplitterV1Document> {
16-
14+
public class InvoiceSplitterV1
15+
extends Inference<InvoiceSplitterV1Document, InvoiceSplitterV1Document> {
1716
}

src/main/java/com/mindee/product/invoicesplitter/InvoiceSplitterV1Document.java

Lines changed: 26 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2,67 +2,50 @@
22

33
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
44
import com.fasterxml.jackson.annotation.JsonProperty;
5+
import com.mindee.parsing.SummaryHelper;
56
import com.mindee.parsing.common.Prediction;
7+
import java.util.ArrayList;
68
import java.util.List;
7-
import java.util.stream.Collectors;
89
import lombok.EqualsAndHashCode;
910
import lombok.Getter;
1011

1112
/**
12-
* Document data for Invoice Splitter, API version 1.
13+
* Invoice Splitter API version 1.2 document data.
1314
*/
1415
@Getter
1516
@EqualsAndHashCode(callSuper = false)
1617
@JsonIgnoreProperties(ignoreUnknown = true)
1718
public class InvoiceSplitterV1Document extends Prediction {
1819

20+
/**
21+
* List of page groups. Each group represents a single invoice within a multi-invoice document.
22+
*/
1923
@JsonProperty("invoice_page_groups")
20-
private List<PageIndexes> invoicePageGroups;
21-
22-
@Override
23-
public String toString() {
24-
StringBuilder outStr = new StringBuilder();
25-
26-
outStr.append(String.format(":Invoice Page Groups: %n"));
27-
if (invoicePageGroups != null) {
28-
String pageGroupsString = this.getInvoicePageGroups().stream()
29-
.map(PageIndexes::toString)
30-
.collect(Collectors.joining(String.format("%n")));
31-
outStr.append(String.format("%s%n", pageGroupsString));
32-
}
33-
return outStr.toString();
34-
}
24+
protected List<InvoiceSplitterV1InvoicePageGroup> invoicePageGroups = new ArrayList<>();
3525

3626
@Override
3727
public boolean isEmpty() {
38-
return invoicePageGroups.isEmpty();
28+
return (
29+
(this.invoicePageGroups == null || this.invoicePageGroups.isEmpty())
30+
);
3931
}
4032

41-
/**
42-
* Represents a grouping of pages.
43-
*/
44-
@Getter
45-
@JsonIgnoreProperties(ignoreUnknown = true)
46-
public static class PageIndexes {
47-
48-
/**
49-
* The confidence about the zone of the value extracted. A value from 0 to 1.
50-
*/
51-
@JsonProperty("confidence")
52-
private Double confidence;
53-
54-
/**
55-
* The page indexes in the document that are grouped together
56-
*/
57-
@JsonProperty("page_indexes")
58-
private List<Integer> pageIndexes;
59-
60-
@Override
61-
public String toString() {
62-
return " :Page indexes: ".concat(
63-
pageIndexes.stream().map((index) -> index.toString())
64-
.collect(Collectors.joining(", "))
65-
);
33+
@Override
34+
public String toString() {
35+
StringBuilder outStr = new StringBuilder();
36+
String invoicePageGroupsSummary = "";
37+
if (!this.getInvoicePageGroups().isEmpty()) {
38+
int[] invoicePageGroupsColSizes = new int[]{74};
39+
invoicePageGroupsSummary =
40+
String.format("%n%s%n ", SummaryHelper.lineSeparator(invoicePageGroupsColSizes, "-"))
41+
+ "| Page Indexes "
42+
+ String.format("|%n%s%n ", SummaryHelper.lineSeparator(invoicePageGroupsColSizes, "="));
43+
invoicePageGroupsSummary += SummaryHelper.arrayToString(this.getInvoicePageGroups(), invoicePageGroupsColSizes);
44+
invoicePageGroupsSummary += String.format("%n%s", SummaryHelper.lineSeparator(invoicePageGroupsColSizes, "-"));
6645
}
46+
outStr.append(
47+
String.format(":Invoice Page Groups: %s%n", invoicePageGroupsSummary)
48+
);
49+
return SummaryHelper.cleanSummary(outStr.toString());
6750
}
6851
}

0 commit comments

Comments
 (0)