diff --git a/CHANGELOG.md b/CHANGELOG.md index eadd1cd2..06fcded2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### Unreleased +- Fix garbled text copying in Chrome/Edge for PDFs with >256 unique characters (#1659) + ### [v0.17.2] - 2025-08-30 - Fix rendering lists that spans across pages diff --git a/lib/font/embedded.js b/lib/font/embedded.js index da64efc2..79720686 100644 --- a/lib/font/embedded.js +++ b/lib/font/embedded.js @@ -274,7 +274,7 @@ begincmap 1 begincodespacerange <0000> endcodespacerange -1 beginbfrange +${ranges.length} beginbfrange ${ranges.join('\n')} endbfrange endcmap diff --git a/tests/unit/font.spec.js b/tests/unit/font.spec.js index ae3c8f2c..c8652ad2 100644 --- a/tests/unit/font.spec.js +++ b/tests/unit/font.spec.js @@ -98,6 +98,108 @@ describe('EmbeddedFont', () => { expect(glyphs).toBe(398 + 1); }); + + test('beginbfrange count should match actual number of ranges', () => { + const doc = new PDFDocument({ compress: false }); + const font = PDFFontFactory.open( + doc, + 'tests/fonts/Roboto-Regular.ttf', + undefined, + 'F1099', + ); + + // Generate more than 256 unique characters to trigger multiple bfrange entries + // Each chunk is 256 characters, so we need >256 to get multiple ranges + const chars = []; + + // Add ASCII characters (0-127) + for (let i = 32; i < 127; i++) { + chars.push(String.fromCharCode(i)); + } + + // Add extended Latin characters (128-255) + for (let i = 160; i < 256; i++) { + chars.push(String.fromCharCode(i)); + } + + // Add additional Unicode characters to exceed 256 + const additionalChars = + 'ÁÀÂÄÅÃÆÇÐÉÈÊËÍÌÎÏÑÓÒÔÖÕØŒÞÚÙÛÜÝŸáàâäãåæçðéèêëíìîïıñóòôöõøœßþúùûüýÿĀĂĄĆČĎĐĒĖĘĚĞĢĪĮİĶŁĹĻĽŃŅŇŌŐŔŖŘŠŚŞȘŢȚŤŪŮŰŲŽŹŻāăąćčďđēėęěğģīįķłĺļľńņňōőŕŗřšśşșţțťūůűųžźż'; + + const allChars = chars.join('') + additionalChars; + font.encode(allChars); + + const docData = logData(doc); + font.toUnicodeCmap(); + const text = docData.map((d) => d.toString('utf8')).join(''); + + // Extract the count declaration from "N beginbfrange" + const beginbfrangeMatch = text.match(/(\d+)\s+beginbfrange/); + expect(beginbfrangeMatch).not.toBeNull(); + const declaredCount = parseInt(beginbfrangeMatch[1], 10); + + // Count actual bfrange entries + let actualRangeCount = 0; + const bfrangeBlockMatch = text.match( + /beginbfrange\n((?:.|\n)*?)\nendbfrange/, + ); + if (bfrangeBlockMatch) { + const bfrangeContent = bfrangeBlockMatch[1]; + // Match each bfrange line: [entries] + const rangeMatches = bfrangeContent.matchAll( + /^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm, + ); + actualRangeCount = [...rangeMatches].length; + } + + // The declared count must match the actual number of ranges + expect(declaredCount).toBe(actualRangeCount); + expect(actualRangeCount).toBeGreaterThan(1); // Should have multiple ranges when >256 chars + }); + + test('beginbfrange count should be 1 for fonts with <=256 characters', () => { + const doc = new PDFDocument({ compress: false }); + const font = PDFFontFactory.open( + doc, + 'tests/fonts/Roboto-Regular.ttf', + undefined, + 'F1099', + ); + + // Generate exactly 256 characters + const chars = []; + for (let i = 0; i < 256; i++) { + chars.push(String.fromCharCode(i + 32)); // Start from space (32) to avoid control chars + } + font.encode(chars.join('')); + + const docData = logData(doc); + font.toUnicodeCmap(); + const text = docData.map((d) => d.toString('utf8')).join(''); + + // Extract the count declaration + const beginbfrangeMatch = text.match(/(\d+)\s+beginbfrange/); + expect(beginbfrangeMatch).not.toBeNull(); + const declaredCount = parseInt(beginbfrangeMatch[1], 10); + + // Count actual bfrange entries + let actualRangeCount = 0; + const bfrangeBlockMatch = text.match( + /beginbfrange\n((?:.|\n)*?)\nendbfrange/, + ); + if (bfrangeBlockMatch) { + const bfrangeContent = bfrangeBlockMatch[1]; + const rangeMatches = bfrangeContent.matchAll( + /^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm, + ); + actualRangeCount = [...rangeMatches].length; + } + + // For <=256 characters, should have exactly 1 range + expect(declaredCount).toBe(1); + expect(actualRangeCount).toBe(1); + expect(declaredCount).toBe(actualRangeCount); + }); }); });