Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 73 additions & 3 deletions src/libraries/Common/src/System/Number.Parsing.Common.cs
Original file line number Diff line number Diff line change
Expand Up @@ -345,14 +345,84 @@ internal enum ParsingStatus
if (TChar.CastToUInt32(*str) != '\0')
{
// We only hurt the failure case
// This fix is for French or Kazakh cultures. Since a user cannot type 0xA0 or 0x202F as a
// space character we use 0x20 space character instead to mean the same.
// This fix is for cultures that use NBSP (U+00A0) or narrow NBSP (U+202F) as group/decimal separators
// (e.g., French, Kazakh, Ukrainian). Since a user cannot easily type these characters,
// we accept regular space (U+0020) as equivalent.
while (true)
{
uint cp = (p < pEnd) ? TChar.CastToUInt32(*p) : '\0';
uint val = TChar.CastToUInt32(*str);

if ((cp != val) && !(IsSpaceReplacingChar(val) && (cp == '\u0020')))
bool match = cp == val;

if (!match)
{
// For char (UTF-16), check if either is a space-replacing char and the other is space
if (typeof(TChar) == typeof(char))
{
match = (IsSpaceReplacingChar(val) && (cp == '\u0020')) || (IsSpaceReplacingChar(cp) && (val == '\u0020'));
}
// For byte (UTF-8), handle multi-byte sequences for NBSP characters
else if (typeof(TChar) == typeof(byte))
{
// Check if val is start of UTF-8 NBSP (U+00A0: 0xC2 0xA0) and cp is space
if (val == 0xC2 && (str + 1 < stringPointer + value.Length) && TChar.CastToUInt32(*(str + 1)) == 0xA0 && cp == 0x20)
{
// Advance past the 2-byte NBSP in pattern and the space in input
str += 2;
p++;

if (TChar.CastToUInt32(*str) == '\0')
{
return p;
}
continue;
}
// Check if val is start of UTF-8 narrow NBSP (U+202F: 0xE2 0x80 0xAF) and cp is space
else if (val == 0xE2 && (str + 2 < stringPointer + value.Length) &&
TChar.CastToUInt32(*(str + 1)) == 0x80 && TChar.CastToUInt32(*(str + 2)) == 0xAF && cp == 0x20)
{
// Advance past the 3-byte narrow NBSP in pattern and the space in input
str += 3;
p++;

if (TChar.CastToUInt32(*str) == '\0')
{
return p;
}
continue;
}
// Check if cp is start of UTF-8 NBSP and val is space
else if (cp == 0xC2 && (p + 1 < pEnd) && TChar.CastToUInt32(*(p + 1)) == 0xA0 && val == 0x20)
{
// Advance past the 2-byte NBSP in input and the space in pattern
p += 2;
str++;

if (TChar.CastToUInt32(*str) == '\0')
{
return p;
}
continue;
}
// Check if cp is start of UTF-8 narrow NBSP and val is space
else if (cp == 0xE2 && (p + 2 < pEnd) &&
TChar.CastToUInt32(*(p + 1)) == 0x80 && TChar.CastToUInt32(*(p + 2)) == 0xAF && val == 0x20)
{
// Advance past the 3-byte narrow NBSP in input and the space in pattern
p += 3;
str++;

if (TChar.CastToUInt32(*str) == '\0')
{
return p;
}
continue;
}
}
}

if (!match)
{
break;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Globalization;
using System.Tests;
using System.Text;
using Xunit;

namespace System.Numerics.Tests
{
public class parseTestUkUA
{
[Fact]
public static void ParseUkrainianCultureWithTrailingSpaces()
{
using (new ThreadCultureChange(new CultureInfo("uk-UA")))
{
// Test UTF-8 parsing with trailing spaces
// Ukrainian culture uses NBSP (0xA0) as NumberGroupSeparator
// When AllowTrailingWhite is set, trailing spaces should be accepted
string testNumber = "123 ";
byte[] utf8Bytes = Encoding.UTF8.GetBytes(testNumber);

// This should parse successfully with AllowTrailingWhite
BigInteger result = BigInteger.Parse(utf8Bytes, NumberStyles.AllowTrailingWhite);
Assert.Equal(BigInteger.Parse("123"), result);

// Also test with string parsing
result = BigInteger.Parse(testNumber, NumberStyles.AllowTrailingWhite);
Assert.Equal(BigInteger.Parse("123"), result);
}
}

[Fact]
public static void ParseUkrainianCultureWithNBSP()
{
using (new ThreadCultureChange(new CultureInfo("uk-UA")))
{
// Ukrainian culture uses NBSP (0xA0) as NumberGroupSeparator
// Test that NBSP works in both string and UTF-8 parsing

// Test with NBSP in input (0xA0)
string testWithNBSP = "1\u00a0234\u00a0567";
byte[] utf8WithNBSP = Encoding.UTF8.GetBytes(testWithNBSP);
BigInteger resultNBSP = BigInteger.Parse(utf8WithNBSP, NumberStyles.AllowThousands);
Assert.Equal(BigInteger.Parse("1234567"), resultNBSP);

// Also test string parsing
BigInteger resultString = BigInteger.Parse(testWithNBSP, NumberStyles.AllowThousands);
Assert.Equal(BigInteger.Parse("1234567"), resultString);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
<Compile Include="BigInteger\op_rightshift.cs" />
<Compile Include="BigInteger\op_xor.cs" />
<Compile Include="BigInteger\parse.cs" />
<Compile Include="BigInteger\parse.ukUA.cs" />
<Compile Include="BigInteger\pow.cs" />
<Compile Include="BigInteger\properties.cs" />
<Compile Include="BigInteger\remainder.cs" />
Expand Down
Loading