Skip to content

Commit 9c19b02

Browse files
authored
bpo-32002: Refactor C locale coercion tests (GH-4369)
Exactly which locale requests will end up giving you the "C" locale is actually platform dependent. A blank locale and "POSIX" will translate to "C" on most Linux distros, but may not do so on other platforms, so this adjusts the way the tests are structured to better account for that. This is an initial step towards fixing the current test failure on Cygwin (hence the issue reference)
1 parent 0c644fc commit 9c19b02

File tree

2 files changed

+84
-54
lines changed

2 files changed

+84
-54
lines changed

Lib/test/test_c_locale_coercion.py

Lines changed: 82 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -14,30 +14,51 @@
1414
interpreter_requires_environment,
1515
)
1616

17+
# Set the list of ways we expect to be able to ask for the "C" locale
18+
EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "invalid.ascii"]
19+
1720
# Set our expectation for the default encoding used in the C locale
1821
# for the filesystem encoding and the standard streams
19-
20-
# While most *nix platforms default to ASCII in the C locale, some use a
21-
# different encoding.
22-
if sys.platform.startswith("aix"):
23-
C_LOCALE_STREAM_ENCODING = "iso8859-1"
24-
elif test.support.is_android:
25-
C_LOCALE_STREAM_ENCODING = "utf-8"
26-
else:
27-
C_LOCALE_STREAM_ENCODING = "ascii"
28-
29-
# FS encoding is UTF-8 on macOS, other *nix platforms use the locale encoding
30-
if sys.platform == "darwin":
31-
C_LOCALE_FS_ENCODING = "utf-8"
32-
else:
33-
C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
34-
35-
# Note that the above is probably still wrong in some cases, such as:
22+
EXPECTED_C_LOCALE_STREAM_ENCODING = "ascii"
23+
EXPECTED_C_LOCALE_FS_ENCODING = "ascii"
24+
25+
# Set our expectation for the default locale used when none is specified
26+
EXPECT_COERCION_IN_DEFAULT_LOCALE = True
27+
28+
# Apply some platform dependent overrides
29+
if sys.platform.startswith("linux"):
30+
if test.support.is_android:
31+
# Android defaults to using UTF-8 for all system interfaces
32+
EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8"
33+
EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
34+
else:
35+
# Linux distros typically alias the POSIX locale directly to the C
36+
# locale.
37+
# TODO: Once https://bugs.python.org/issue30672 is addressed, we'll be
38+
# able to check this case unconditionally
39+
EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX")
40+
elif sys.platform.startswith("aix"):
41+
# AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII
42+
EXPECTED_C_LOCALE_STREAM_ENCODING = "iso8859-1"
43+
EXPECTED_C_LOCALE_FS_ENCODING = "iso8859-1"
44+
elif sys.platform == "darwin":
45+
# FS encoding is UTF-8 on macOS
46+
EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
47+
elif sys.platform == "cygwin":
48+
# Cygwin defaults to using C.UTF-8
49+
# TODO: Work out a robust dynamic test for this that doesn't rely on
50+
# CPython's own locale handling machinery
51+
EXPECT_COERCION_IN_DEFAULT_LOCALE = False
52+
53+
# Note that the above expectations are still wrong in some cases, such as:
3654
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
37-
# * AIX and any other platforms that use latin-1 in the C locale
55+
# * Any platform other than AIX that uses latin-1 in the C locale
56+
# * Any Linux distro where POSIX isn't a simple alias for the C locale
57+
# * Any Linux distro where the default locale is something other than "C"
3858
#
3959
# Options for dealing with this:
40-
# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
60+
# * Don't set the PY_COERCE_C_LOCALE preprocessor definition on
61+
# such platforms (e.g. it isn't set on Windows)
4162
# * Fix the test expectations to match the actual platform behaviour
4263

4364
# In order to get the warning messages to match up as expected, the candidate
@@ -47,7 +68,7 @@
4768
# There's no reliable cross-platform way of checking locale alias
4869
# lists, so the only way of knowing which of these locales will work
4970
# is to try them with locale.setlocale(). We do that in a subprocess
50-
# to avoid altering the locale of the test runner.
71+
# in setUpModule() below to avoid altering the locale of the test runner.
5172
#
5273
# If the relevant locale module attributes exist, and we're not on a platform
5374
# where we expect it to always succeed, we also check that
@@ -216,8 +237,9 @@ def _check_child_encoding_details(self,
216237
class LocaleConfigurationTests(_LocaleHandlingTestCase):
217238
# Test explicit external configuration via the process environment
218239

219-
def setUpClass():
220-
# This relies on setupModule() having been run, so it can't be
240+
@classmethod
241+
def setUpClass(cls):
242+
# This relies on setUpModule() having been run, so it can't be
221243
# handled via the @unittest.skipUnless decorator
222244
if not AVAILABLE_TARGETS:
223245
raise unittest.SkipTest("No C-with-UTF-8 locale available")
@@ -284,8 +306,8 @@ def _check_c_locale_coercion(self,
284306

285307
if not AVAILABLE_TARGETS:
286308
# Locale coercion is disabled when there aren't any target locales
287-
fs_encoding = C_LOCALE_FS_ENCODING
288-
stream_encoding = C_LOCALE_STREAM_ENCODING
309+
fs_encoding = EXPECTED_C_LOCALE_FS_ENCODING
310+
stream_encoding = EXPECTED_C_LOCALE_STREAM_ENCODING
289311
coercion_expected = False
290312
if expected_warnings:
291313
expected_warnings = [LEGACY_LOCALE_WARNING]
@@ -297,41 +319,47 @@ def _check_c_locale_coercion(self,
297319
"PYTHONCOERCECLOCALE": "",
298320
}
299321
base_var_dict.update(extra_vars)
300-
for env_var in ("LANG", "LC_CTYPE"):
301-
for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):
302-
# XXX (ncoghlan): *BSD platforms don't behave as expected in the
303-
# POSIX locale, so we skip that for now
304-
# See https://bugs.python.org/issue30672 for discussion
305-
if locale_to_set == "POSIX":
306-
continue
322+
if coerce_c_locale is not None:
323+
base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
307324

308-
# Platforms using UTF-8 in the C locale do not print
309-
# CLI_COERCION_WARNING when all the locale envt variables are
310-
# not set or set to the empty string.
325+
# Check behaviour for the default locale
326+
with self.subTest(default_locale=True,
327+
PYTHONCOERCECLOCALE=coerce_c_locale):
328+
if EXPECT_COERCION_IN_DEFAULT_LOCALE:
311329
_expected_warnings = expected_warnings
312-
for _env_var in base_var_dict:
313-
if base_var_dict[_env_var]:
314-
break
315-
else:
316-
if (C_LOCALE_STREAM_ENCODING == "utf-8" and
317-
locale_to_set == "" and coerce_c_locale == "warn"):
318-
_expected_warnings = None
319-
330+
_coercion_expected = coercion_expected
331+
else:
332+
_expected_warnings = None
333+
_coercion_expected = False
334+
# On Android CLI_COERCION_WARNING is not printed when all the
335+
# locale environment variables are undefined or empty. When
336+
# this code path is run with environ['LC_ALL'] == 'C', then
337+
# LEGACY_LOCALE_WARNING is printed.
338+
if (test.support.is_android and
339+
_expected_warnings == [CLI_COERCION_WARNING]):
340+
_expected_warnings = None
341+
self._check_child_encoding_details(base_var_dict,
342+
fs_encoding,
343+
stream_encoding,
344+
_expected_warnings,
345+
_coercion_expected)
346+
347+
# Check behaviour for explicitly configured locales
348+
for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS:
349+
for env_var in ("LANG", "LC_CTYPE"):
320350
with self.subTest(env_var=env_var,
321351
nominal_locale=locale_to_set,
322352
PYTHONCOERCECLOCALE=coerce_c_locale):
323353
var_dict = base_var_dict.copy()
324354
var_dict[env_var] = locale_to_set
325-
if coerce_c_locale is not None:
326-
var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
327355
# Check behaviour on successful coercion
328356
self._check_child_encoding_details(var_dict,
329357
fs_encoding,
330358
stream_encoding,
331-
_expected_warnings,
359+
expected_warnings,
332360
coercion_expected)
333361

334-
def test_test_PYTHONCOERCECLOCALE_not_set(self):
362+
def test_PYTHONCOERCECLOCALE_not_set(self):
335363
# This should coerce to the first available target locale by default
336364
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
337365

@@ -350,27 +378,27 @@ def test_PYTHONCOERCECLOCALE_set_to_warn(self):
350378

351379
def test_PYTHONCOERCECLOCALE_set_to_zero(self):
352380
# The setting "0" should result in the locale coercion being disabled
353-
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
354-
C_LOCALE_STREAM_ENCODING,
381+
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
382+
EXPECTED_C_LOCALE_STREAM_ENCODING,
355383
coerce_c_locale="0",
356384
coercion_expected=False)
357385
# Setting LC_ALL=C shouldn't make any difference to the behaviour
358-
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
359-
C_LOCALE_STREAM_ENCODING,
386+
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
387+
EXPECTED_C_LOCALE_STREAM_ENCODING,
360388
coerce_c_locale="0",
361389
LC_ALL="C",
362390
coercion_expected=False)
363391

364392
def test_LC_ALL_set_to_C(self):
365393
# Setting LC_ALL should render the locale coercion ineffective
366-
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
367-
C_LOCALE_STREAM_ENCODING,
394+
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
395+
EXPECTED_C_LOCALE_STREAM_ENCODING,
368396
coerce_c_locale=None,
369397
LC_ALL="C",
370398
coercion_expected=False)
371399
# And result in a warning about a lack of locale compatibility
372-
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
373-
C_LOCALE_STREAM_ENCODING,
400+
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
401+
EXPECTED_C_LOCALE_STREAM_ENCODING,
374402
coerce_c_locale="warn",
375403
LC_ALL="C",
376404
expected_warnings=[LEGACY_LOCALE_WARNING],
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Adjust C locale coercion testing for the empty locale and POSIX locale
2+
cases to more readily adjust to platform dependent behaviour.

0 commit comments

Comments
 (0)