diff --git a/package-lock.json b/package-lock.json index 7fb891d61..14de8c1d0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,14 +9,15 @@ "version": "4.27.1", "license": "MIT", "dependencies": { + "@cantoo/pdf-lib": "^2.3.2", "commander": "~9.4.1", "file-type": "~16.5.4", "form-data": "~3.0.1", "node-poppler": "^7.2.2", - "pdf-lib": "^1.17.1", "pdf.js-extract": "^0.2.1", "sharp": "^0.33.5", - "tmp": "^0.2.3" + "tmp": "^0.2.3", + "tslib": "^2.8.1" }, "bin": { "mindee": "bin/mindee.js" @@ -41,6 +42,20 @@ "node": ">= 16" } }, + "node_modules/@cantoo/pdf-lib": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/@cantoo/pdf-lib/-/pdf-lib-2.3.2.tgz", + "integrity": "sha512-mTDE/pRsg+dj1+arXD07HQS4Rkjtw5rN9Rn7ZdNo5Fjz2703SC8ea5SV5PMncWd6aNM5yrDm0P177YvEsjd8TQ==", + "license": "MIT", + "dependencies": { + "@pdf-lib/standard-fonts": "^1.0.0", + "@pdf-lib/upng": "^1.0.1", + "color": "^4.2.3", + "crypto-js": "^4.2.0", + "node-html-better-parser": "^1.4.0", + "pako": "^1.0.11" + } + }, "node_modules/@cspotcode/source-map-support": { "version": "0.8.1", "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", @@ -64,13 +79,6 @@ "tslib": "^2.4.0" } }, - "node_modules/@emnapi/runtime/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "optional": true - }, "node_modules/@es-joy/jsdoccomment": { "version": "0.49.0", "resolved": "https://registry.npmjs.org/@es-joy/jsdoccomment/-/jsdoccomment-0.49.0.tgz", @@ -1778,6 +1786,12 @@ "node": ">= 8" } }, + "node_modules/crypto-js": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/crypto-js/-/crypto-js-4.2.0.tgz", + "integrity": "sha512-KALDyEYgpY+Rlob/iriUtjV6d5Eq+Y191A5g4UqLAi8CyGP9N1+FdVbkc1SxKc2r4YAYqG8JzO2KGL+AizD70Q==", + "license": "MIT" + }, "node_modules/debug": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", @@ -2679,6 +2693,22 @@ "he": "bin/he" } }, + "node_modules/html-entities": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.6.0.tgz", + "integrity": "sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/mdevils" + }, + { + "type": "patreon", + "url": "https://patreon.com/mdevils" + } + ], + "license": "MIT" + }, "node_modules/html-void-elements": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz", @@ -3326,6 +3356,15 @@ "node": ">= 10.13" } }, + "node_modules/node-html-better-parser": { + "version": "1.4.11", + "resolved": "https://registry.npmjs.org/node-html-better-parser/-/node-html-better-parser-1.4.11.tgz", + "integrity": "sha512-rXYKBD30q6Iw/Y8o2ueJILB29Z3RrqQQN/U2PDg4Iz2TSZP/KmqdcGp+pl8o5uYZlvIKh+A4UfOK98pKIb3cRw==", + "license": "MIT", + "dependencies": { + "html-entities": "^2.3.2" + } + }, "node_modules/node-poppler": { "version": "7.2.4", "resolved": "https://registry.npmjs.org/node-poppler/-/node-poppler-7.2.4.tgz", @@ -3504,18 +3543,6 @@ "node": "*" } }, - "node_modules/pdf-lib": { - "version": "1.17.1", - "resolved": "https://registry.npmjs.org/pdf-lib/-/pdf-lib-1.17.1.tgz", - "integrity": "sha512-V/mpyJAoTsN4cnP31vc0wfNA1+p20evqqnap0KLoRUN0Yk/p3wN52DOEsL4oBFcLdb76hlpKPtzJIgo67j/XLw==", - "license": "MIT", - "dependencies": { - "@pdf-lib/standard-fonts": "^1.0.0", - "@pdf-lib/upng": "^1.0.1", - "pako": "^1.0.11", - "tslib": "^1.11.1" - } - }, "node_modules/pdf.js-extract": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/pdf.js-extract/-/pdf.js-extract-0.2.1.tgz", @@ -4242,9 +4269,9 @@ } }, "node_modules/tslib": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", - "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, "node_modules/type-check": { diff --git a/package.json b/package.json index d44be8e98..88b890051 100644 --- a/package.json +++ b/package.json @@ -56,14 +56,15 @@ "typescript": "^5.6.3" }, "dependencies": { + "@cantoo/pdf-lib": "^2.3.2", "commander": "~9.4.1", "file-type": "~16.5.4", "form-data": "~3.0.1", "node-poppler": "^7.2.2", - "pdf-lib": "^1.17.1", "pdf.js-extract": "^0.2.1", "sharp": "^0.33.5", - "tmp": "^0.2.3" + "tmp": "^0.2.3", + "tslib": "^2.8.1" }, "keywords": [ "typescript", diff --git a/src/imageOperations/common/imageExtractor.ts b/src/imageOperations/common/imageExtractor.ts index 4583e79b6..013b68b70 100644 --- a/src/imageOperations/common/imageExtractor.ts +++ b/src/imageOperations/common/imageExtractor.ts @@ -1,4 +1,4 @@ -import { PDFDocument, PDFPage } from "pdf-lib"; +import { PDFDocument, PDFPage } from "@cantoo/pdf-lib"; import { getMinMaxX, getMinMaxY, Polygon } from "../../geometry"; /** diff --git a/src/imageOperations/invoiceSplitterExtractor/invoiceSplitterExtractor.ts b/src/imageOperations/invoiceSplitterExtractor/invoiceSplitterExtractor.ts index fd8960314..83c4303af 100644 --- a/src/imageOperations/invoiceSplitterExtractor/invoiceSplitterExtractor.ts +++ b/src/imageOperations/invoiceSplitterExtractor/invoiceSplitterExtractor.ts @@ -1,4 +1,4 @@ -import { PDFDocument } from "pdf-lib"; +import { PDFDocument } from "@cantoo/pdf-lib"; import { MindeeError, MindeeMimeTypeError } from "../../errors"; import { InvoiceSplitterV1 } from "../../product"; import { LocalInputSource } from "../../input"; @@ -35,7 +35,10 @@ async function getPdfDoc(inputFile: LocalInputSource): Promise { throw new MindeeMimeTypeError("Invoice Splitter is only compatible with pdf documents."); } - const pdfDoc = await PDFDocument.load(inputFile.fileObject); + const pdfDoc = await PDFDocument.load(inputFile.fileObject, { + ignoreEncryption: true, + password: "" + }); if (pdfDoc.getPageCount() < 2) { throw new MindeeError("Invoice Splitter is only compatible with multi-page-pdf documents."); } diff --git a/src/imageOperations/multiReceiptsExtractor/multiReceiptsExtractor.ts b/src/imageOperations/multiReceiptsExtractor/multiReceiptsExtractor.ts index 5559d28c2..e88c933af 100644 --- a/src/imageOperations/multiReceiptsExtractor/multiReceiptsExtractor.ts +++ b/src/imageOperations/multiReceiptsExtractor/multiReceiptsExtractor.ts @@ -1,4 +1,4 @@ -import { PDFDocument, PDFImage, PDFPage } from "pdf-lib"; +import { PDFDocument, PDFImage, PDFPage } from "@cantoo/pdf-lib"; import { MindeeError, MindeeMimeTypeError } from "../../errors"; import { Polygon } from "../../geometry"; import { MultiReceiptsDetectorV1 } from "../../product"; @@ -37,7 +37,10 @@ async function loadPdfDoc(inputFile: LocalInputSource) { '" Currently supported types are .png, .jpg and .pdf' ); } else if (inputFile.isPdf()) { - pdfDoc = await PDFDocument.load(inputFile.fileObject); + pdfDoc = await PDFDocument.load(inputFile.fileObject, { + ignoreEncryption: true, + password: "" + }); } else { pdfDoc = await PDFDocument.create(); let image: PDFImage; diff --git a/src/pdf/pdfCompressor.ts b/src/pdf/pdfCompressor.ts index 68fda53cf..d9dae77a8 100644 --- a/src/pdf/pdfCompressor.ts +++ b/src/pdf/pdfCompressor.ts @@ -3,7 +3,7 @@ import tmp from "tmp"; import { ExtractedPdfInfo, extractTextFromPdf, hasSourceText } from "./pdfUtils"; import * as fs from "node:fs"; import { Poppler } from "node-poppler"; -import { PDFDocument, PDFFont, PDFPage, rgb, StandardFonts } from "pdf-lib"; +import { PDFDocument, PDFFont, PDFPage, rgb, StandardFonts } from "@cantoo/pdf-lib"; import { compressImage } from "../imageOperations"; /** @@ -128,7 +128,10 @@ async function compressPagesWithQuality( disableSourceText: boolean, extractedText: ExtractedPdfInfo | null ): Promise { - const pdfDoc = await PDFDocument.load(pdfData); + const pdfDoc = await PDFDocument.load(pdfData, { + ignoreEncryption: true, + password: "" + }); const compressedPages: Buffer[] = []; for (let i = 0; i < extractedPdfInfo.pages.length; i++) { diff --git a/src/pdf/pdfOperation.ts b/src/pdf/pdfOperation.ts index 878d1c766..b1831cf15 100644 --- a/src/pdf/pdfOperation.ts +++ b/src/pdf/pdfOperation.ts @@ -1,5 +1,5 @@ import { errorHandler } from "../errors/handler"; -import { PDFDocument } from "pdf-lib"; +import { PDFDocument } from "@cantoo/pdf-lib"; import { PageOptions, PageOptionsOperation } from "../input"; import { MindeeError } from "../errors"; import { logger } from "../logger"; @@ -21,6 +21,7 @@ export async function extractPages( ): Promise { const currentPdf = await PDFDocument.load(file, { ignoreEncryption: true, + password: "" }); const newPdf = await PDFDocument.create(); @@ -86,6 +87,7 @@ export async function extractPages( export async function countPages(file: Buffer): Promise { const currentPdf = await PDFDocument.load(file, { ignoreEncryption: true, + password: "" }); return currentPdf.getPageCount(); } diff --git a/tests/pdf/pdfTypes.spec.ts b/tests/pdf/pdfTypes.spec.ts new file mode 100644 index 000000000..4b7dba183 --- /dev/null +++ b/tests/pdf/pdfTypes.spec.ts @@ -0,0 +1,43 @@ +import * as mindee from "../../src"; +import path from "path"; +import { expect } from "chai"; +import * as pdf from "../../src/pdf"; +import { PageOptions } from "../../src/input"; +import { PageOptionsOperation } from "../../src"; +import * as fs from "node:fs"; + +describe("Test pdf lib", () => { + let client: mindee.Client; + beforeEach(async () => { + client = new mindee.Client(); + }); + it("should open a simple XFA form PDF.", async () => { + const inputDoc = client.docFromPath(path.join(__dirname, "../data/file_types/pdf/XfaForm.pdf")); + + await inputDoc.init(); + expect(await pdf.countPages(inputDoc.fileObject)).to.eq(1); + }); + + it("should open an encrypted XFA form PDF.", async () => { + const inputDoc = client.docFromPath(path.join(__dirname, "../data/file_types/pdf/XfaForm_15p_encrypted.pdf")); + + await inputDoc.init(); + expect(await pdf.countPages(inputDoc.fileObject)).to.eq(15); + }); + + + it("should be able to perform page operations on an encrypted XFA form PDF.", async () => { + const inputDoc = client.docFromPath(path.join(__dirname, "../data/file_types/pdf/XfaForm_15p_encrypted.pdf")); + + await inputDoc.init(); + + const pageOptions: PageOptions = { + pageIndexes: [0, 1], + operation: PageOptionsOperation.KeepOnly, + onMinPages: 1, + }; + const splitPdf = await pdf.extractPages(inputDoc.fileObject, pageOptions); + expect(splitPdf.totalPagesRemoved).to.eq(13); + expect(await pdf.countPages(splitPdf.file)).to.eq(2); + }); +});