diff --git a/src/libraries/Common/src/System/Number.Parsing.Common.cs b/src/libraries/Common/src/System/Number.Parsing.Common.cs index e43cbe14c29226..250c8dde5a6b2e 100644 --- a/src/libraries/Common/src/System/Number.Parsing.Common.cs +++ b/src/libraries/Common/src/System/Number.Parsing.Common.cs @@ -345,14 +345,84 @@ internal enum ParsingStatus if (TChar.CastToUInt32(*str) != '\0') { // We only hurt the failure case - // This fix is for French or Kazakh cultures. Since a user cannot type 0xA0 or 0x202F as a - // space character we use 0x20 space character instead to mean the same. + // This fix is for cultures that use NBSP (U+00A0) or narrow NBSP (U+202F) as group/decimal separators + // (e.g., French, Kazakh, Ukrainian). Since a user cannot easily type these characters, + // we accept regular space (U+0020) as equivalent. while (true) { uint cp = (p < pEnd) ? TChar.CastToUInt32(*p) : '\0'; uint val = TChar.CastToUInt32(*str); - if ((cp != val) && !(IsSpaceReplacingChar(val) && (cp == '\u0020'))) + bool match = cp == val; + + if (!match) + { + // For char (UTF-16), check if either is a space-replacing char and the other is space + if (typeof(TChar) == typeof(char)) + { + match = (IsSpaceReplacingChar(val) && (cp == '\u0020')) || (IsSpaceReplacingChar(cp) && (val == '\u0020')); + } + // For byte (UTF-8), handle multi-byte sequences for NBSP characters + else if (typeof(TChar) == typeof(byte)) + { + // Check if val is start of UTF-8 NBSP (U+00A0: 0xC2 0xA0) and cp is space + if (val == 0xC2 && (str + 1 < stringPointer + value.Length) && TChar.CastToUInt32(*(str + 1)) == 0xA0 && cp == 0x20) + { + // Advance past the 2-byte NBSP in pattern and the space in input + str += 2; + p++; + + if (TChar.CastToUInt32(*str) == '\0') + { + return p; + } + continue; + } + // Check if val is start of UTF-8 narrow NBSP (U+202F: 0xE2 0x80 0xAF) and cp is space + else if (val == 0xE2 && (str + 2 < stringPointer + value.Length) && + TChar.CastToUInt32(*(str + 1)) == 0x80 && TChar.CastToUInt32(*(str + 2)) == 0xAF && cp == 0x20) + { + // Advance past the 3-byte narrow NBSP in pattern and the space in input + str += 3; + p++; + + if (TChar.CastToUInt32(*str) == '\0') + { + return p; + } + continue; + } + // Check if cp is start of UTF-8 NBSP and val is space + else if (cp == 0xC2 && (p + 1 < pEnd) && TChar.CastToUInt32(*(p + 1)) == 0xA0 && val == 0x20) + { + // Advance past the 2-byte NBSP in input and the space in pattern + p += 2; + str++; + + if (TChar.CastToUInt32(*str) == '\0') + { + return p; + } + continue; + } + // Check if cp is start of UTF-8 narrow NBSP and val is space + else if (cp == 0xE2 && (p + 2 < pEnd) && + TChar.CastToUInt32(*(p + 1)) == 0x80 && TChar.CastToUInt32(*(p + 2)) == 0xAF && val == 0x20) + { + // Advance past the 3-byte narrow NBSP in input and the space in pattern + p += 3; + str++; + + if (TChar.CastToUInt32(*str) == '\0') + { + return p; + } + continue; + } + } + } + + if (!match) { break; } diff --git a/src/libraries/System.Runtime.Numerics/tests/BigInteger/parse.ukUA.cs b/src/libraries/System.Runtime.Numerics/tests/BigInteger/parse.ukUA.cs new file mode 100644 index 00000000000000..932ee834e19310 --- /dev/null +++ b/src/libraries/System.Runtime.Numerics/tests/BigInteger/parse.ukUA.cs @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Globalization; +using System.Tests; +using System.Text; +using Xunit; + +namespace System.Numerics.Tests +{ + public class parseTestUkUA + { + [Fact] + public static void ParseUkrainianCultureWithTrailingSpaces() + { + using (new ThreadCultureChange(new CultureInfo("uk-UA"))) + { + // Test UTF-8 parsing with trailing spaces + // Ukrainian culture uses NBSP (0xA0) as NumberGroupSeparator + // When AllowTrailingWhite is set, trailing spaces should be accepted + string testNumber = "123 "; + byte[] utf8Bytes = Encoding.UTF8.GetBytes(testNumber); + + // This should parse successfully with AllowTrailingWhite + BigInteger result = BigInteger.Parse(utf8Bytes, NumberStyles.AllowTrailingWhite); + Assert.Equal(BigInteger.Parse("123"), result); + + // Also test with string parsing + result = BigInteger.Parse(testNumber, NumberStyles.AllowTrailingWhite); + Assert.Equal(BigInteger.Parse("123"), result); + } + } + + [Fact] + public static void ParseUkrainianCultureWithNBSP() + { + using (new ThreadCultureChange(new CultureInfo("uk-UA"))) + { + // Ukrainian culture uses NBSP (0xA0) as NumberGroupSeparator + // Test that NBSP works in both string and UTF-8 parsing + + // Test with NBSP in input (0xA0) + string testWithNBSP = "1\u00a0234\u00a0567"; + byte[] utf8WithNBSP = Encoding.UTF8.GetBytes(testWithNBSP); + BigInteger resultNBSP = BigInteger.Parse(utf8WithNBSP, NumberStyles.AllowThousands); + Assert.Equal(BigInteger.Parse("1234567"), resultNBSP); + + // Also test string parsing + BigInteger resultString = BigInteger.Parse(testWithNBSP, NumberStyles.AllowThousands); + Assert.Equal(BigInteger.Parse("1234567"), resultString); + } + } + } +} diff --git a/src/libraries/System.Runtime.Numerics/tests/System.Runtime.Numerics.Tests.csproj b/src/libraries/System.Runtime.Numerics/tests/System.Runtime.Numerics.Tests.csproj index 5d468f9ce66885..263983cefc231a 100644 --- a/src/libraries/System.Runtime.Numerics/tests/System.Runtime.Numerics.Tests.csproj +++ b/src/libraries/System.Runtime.Numerics/tests/System.Runtime.Numerics.Tests.csproj @@ -46,6 +46,7 @@ +