Skip to content

Commit 515720d

Browse files
lib: remove Latin-1 fast path from TextDecoder
The Latin-1 fast path was incorrectly enabled only for windows-1252 encoding, which differs from ISO-8859-1 (Latin-1) in the 0x80-0x9F range. Since windows-1252 cannot use the Latin-1 fast path (it requires different character mappings via ICU), and no other encoding uses it, the entire Latin-1 fast path mechanism has been removed. This simplifies the code while fixing the windows-1252 decoding issue. Windows-1252 now correctly uses the ICU decoder for all characters. Fixes: #56542
1 parent 24a2207 commit 515720d

File tree

2 files changed

+1
-60
lines changed

2 files changed

+1
-60
lines changed

lib/internal/encoding.js

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ const kEncoding = Symbol('encoding');
2828
const kDecoder = Symbol('decoder');
2929
const kFatal = Symbol('kFatal');
3030
const kUTF8FastPath = Symbol('kUTF8FastPath');
31-
const kLatin1FastPath = Symbol('kLatin1FastPath');
3231
const kIgnoreBOM = Symbol('kIgnoreBOM');
3332

3433
const {
@@ -420,15 +419,9 @@ function makeTextDecoderICU() {
420419
this[kFatal] = Boolean(options?.fatal);
421420
// Only support fast path for UTF-8.
422421
this[kUTF8FastPath] = enc === 'utf-8';
423-
// Disable Latin-1 fast path for windows-1252 as it differs from ISO-8859-1
424-
// in the 0x80-0x9F range. The fast path uses simdutf which directly maps
425-
// bytes to Unicode codepoints (e.g., 0x92 → U+0092), but windows-1252
426-
// requires different mappings (e.g., 0x92 → U+2019 '). The ICU decoder
427-
// handles these mappings correctly.
428-
this[kLatin1FastPath] = false;
429422
this[kHandle] = undefined;
430423

431-
if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) {
424+
if (!this[kUTF8FastPath]) {
432425
this.#prepareConverter();
433426
}
434427
}
@@ -445,16 +438,11 @@ function makeTextDecoderICU() {
445438
validateDecoder(this);
446439

447440
this[kUTF8FastPath] &&= !(options?.stream);
448-
this[kLatin1FastPath] &&= !(options?.stream);
449441

450442
if (this[kUTF8FastPath]) {
451443
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
452444
}
453445

454-
if (this[kLatin1FastPath]) {
455-
return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]);
456-
}
457-
458446
this.#prepareConverter();
459447

460448
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

test/parallel/test-whatwg-encoding-custom-windows-1252.js

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -83,50 +83,3 @@ const assert = require('assert');
8383
);
8484
}
8585
}
86-
87-
// Test that common Windows-1252 encoding aliases work correctly
88-
// Per WHATWG Encoding Standard, many encodings map to windows-1252
89-
{
90-
const aliases = [
91-
'windows-1252',
92-
'cp1252',
93-
'x-cp1252',
94-
'iso-8859-1', // Per WHATWG spec, iso-8859-1 maps to windows-1252
95-
'latin1', // Per WHATWG spec, latin1 maps to windows-1252
96-
'ascii', // Per WHATWG spec, ascii maps to windows-1252
97-
];
98-
const testByte = 0x92; // Right single quotation mark
99-
const expected = '\u2019';
100-
101-
for (const alias of aliases) {
102-
const decoder = new TextDecoder(alias);
103-
const decoded = decoder.decode(new Uint8Array([testByte]));
104-
assert.strictEqual(
105-
decoded,
106-
expected,
107-
`Encoding alias '${alias}' should decode 0x92 to U+2019 (per WHATWG spec)`
108-
);
109-
}
110-
}
111-
112-
// Test a realistic Windows-1252 text sample
113-
{
114-
const decoder = new TextDecoder('windows-1252');
115-
116-
// "It's a "quote" — with €100"
117-
const bytes = [
118-
0x49, 0x74, 0x92, 0x73, 0x20, 0x61, 0x20, // It's a
119-
0x93, 0x71, 0x75, 0x6F, 0x74, 0x65, 0x94, 0x20, // "quote"
120-
0x97, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, // — with
121-
0x80, 0x31, 0x30, 0x30, // €100
122-
];
123-
124-
const expected = 'It\u2019s a \u201Cquote\u201D \u2014 with \u20AC100';
125-
const decoded = decoder.decode(new Uint8Array(bytes));
126-
127-
assert.strictEqual(
128-
decoded,
129-
expected,
130-
'Realistic Windows-1252 text should decode correctly'
131-
);
132-
}

0 commit comments

Comments
 (0)