From c8fc6585856b9be85e989665d956d6ada685983b Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Mon, 13 Oct 2025 11:26:19 +0100
Subject: [PATCH 01/10] deprecate non-ascii

---
 Doc/deprecations/pending-removal-in-3.17.rst           |  6 ++++++
 Lib/encodings/__init__.py                              |  8 +++++++-
 Lib/test/test_codecs.py                                | 10 +++++++---
 .../2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst     |  3 +++
 4 files changed, 23 insertions(+), 4 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst

diff --git a/Doc/deprecations/pending-removal-in-3.17.rst b/Doc/deprecations/pending-removal-in-3.17.rst
index 0a1c2f08cab3bd..e769c9d371e133 100644
--- a/Doc/deprecations/pending-removal-in-3.17.rst
+++ b/Doc/deprecations/pending-removal-in-3.17.rst
@@ -23,6 +23,12 @@ Pending removal in Python 3.17
     (Contributed by Shantanu Jain in :gh:`91896`.)
 
 
+* :mod:`encodings`:
+
+  - Passing non-ascii *encoding* names to :func:`encodings.normalize_encoding`
+    is deprecated and scheduled for removal in Python 3.17.
+    (Contributed by Stan Ulbrych in :gh:`136702`)
+
 * :mod:`typing`:
 
   - Before Python 3.14, old-style unions were implemented using the private class
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 298177eb8003a7..b048fdc0223b86 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -26,9 +26,10 @@
 
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 
-"""#"
+"""
 
 import codecs
+import warnings
 import sys
 from . import aliases
 
@@ -55,6 +56,11 @@ def normalize_encoding(encoding):
     if isinstance(encoding, bytes):
         encoding = str(encoding, "ascii")
 
+    if not encoding.isascii():
+        warnings.warn(
+            "Support for non-ascii encoding names will be removed in 3.17",
+            DeprecationWarning, stacklevel=2)
+
     chars = []
     punct = False
     for c in encoding:
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index c35a4508943506..f1f0ac5ad36fd2 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3886,15 +3886,14 @@ def search_function(encoding):
         self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa-8', 2, 3, 4))
         self.assertEqual(codecs.lookup('TEST.AAA---8'), ('test.aaa---8', 2, 3, 4))
         self.assertEqual(codecs.lookup('TEST.AAA   8'), ('test.aaa---8', 2, 3, 4))
-        self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
         self.assertEqual(codecs.lookup('TEST.AAA.8'), ('test.aaa.8', 2, 3, 4))
         self.assertEqual(codecs.lookup('TEST.AAA...8'), ('test.aaa...8', 2, 3, 4))
+        with self.assertWarns(DeprecationWarning):
+            self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
 
     def test_encodings_normalize_encoding(self):
-        # encodings.normalize_encoding() ignores non-ASCII characters.
         normalize = encodings.normalize_encoding
         self.assertEqual(normalize('utf_8'), 'utf_8')
-        self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
         self.assertEqual(normalize('utf   8'), 'utf_8')
         # encodings.normalize_encoding() doesn't convert
         # characters to lower case.
@@ -3902,6 +3901,11 @@ def test_encodings_normalize_encoding(self):
         self.assertEqual(normalize('utf.8'), 'utf.8')
         self.assertEqual(normalize('utf...8'), 'utf...8')
 
+        # Non-ASCII *encoding* is deprecated.
+        with self.assertWarnsRegex(DeprecationWarning,
+                "Support for non-ascii encoding names will be removed in 3.17"):
+            self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst b/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst
new file mode 100644
index 00000000000000..88303f017f58c4
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst
@@ -0,0 +1,3 @@
+:mod:`encodings`: Deprecate passing a non-ascii *encoding* name to
+:func:`encodings.normalize_encoding` and schedule removal of support for
+Python 3.17.

From 5b50daaddae581499840282c8ba8384d814925f0 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Mon, 13 Oct 2025 11:34:12 +0100
Subject: [PATCH 02/10] Relocate import

---
 Lib/encodings/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index b048fdc0223b86..4a30d786f55881 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -29,7 +29,6 @@
 """
 
 import codecs
-import warnings
 import sys
 from . import aliases
 
@@ -57,6 +56,7 @@ def normalize_encoding(encoding):
         encoding = str(encoding, "ascii")
 
     if not encoding.isascii():
+        import warnings
         warnings.warn(
             "Support for non-ascii encoding names will be removed in 3.17",
             DeprecationWarning, stacklevel=2)

From 95f2e65dbdee909c88cd8b6276ad9c803c4115cb Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Mon, 13 Oct 2025 12:13:40 +0100
Subject: [PATCH 03/10] sanitize charset names in email

---
 Lib/email/_header_value_parser.py |  1 +
 Lib/email/utils.py                | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 91243378dc0441..aa81f3554ca74a 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -796,6 +796,7 @@ def params(self):
                         value = urllib.parse.unquote(value, encoding='latin-1')
                     else:
                         try:
+                            charset = utils._sanitize_charset_name(charset, 'us-ascii')
                             value = value.decode(charset, 'surrogateescape')
                         except (LookupError, UnicodeEncodeError):
                             # XXX: there should really be a custom defect for
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 3de1f0d24a15b0..67cc3a550b7d9d 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -446,6 +446,15 @@ def decode_params(params):
                 new_params.append((name, '"%s"' % value))
     return new_params
 
+def _sanitize_charset_name(charset, fallback_charset):
+    if not charset:
+        return charset
+    sanitized = ''.join(
+        c for c in charset
+        if (ord(c) < 0xDC80 or ord(c) > 0xDCFF) and c.isascii()
+    )
+    return sanitized if sanitized else fallback_charset
+
 def collapse_rfc2231_value(value, errors='replace',
                            fallback_charset='us-ascii'):
     if not isinstance(value, tuple) or len(value) != 3:
@@ -458,6 +467,7 @@ def collapse_rfc2231_value(value, errors='replace',
         # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
         # the value, so use the fallback_charset.
         charset = fallback_charset
+    charset = _sanitize_charset_name(charset, fallback_charset)
     rawbytes = bytes(text, 'raw-unicode-escape')
     try:
         return str(rawbytes, charset, errors)

From fad52cd3cbc9d504190f3c52d84426590094f7a8 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Thu, 16 Oct 2025 16:48:47 +0100
Subject: [PATCH 04/10] Use table, replace with 'ascii'

---
 Lib/email/_header_value_parser.py | 4 ++--
 Lib/email/utils.py                | 9 ++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index aa81f3554ca74a..d4d93006fb71ff 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -796,14 +796,14 @@ def params(self):
                         value = urllib.parse.unquote(value, encoding='latin-1')
                     else:
                         try:
-                            charset = utils._sanitize_charset_name(charset, 'us-ascii')
+                            charset = utils._sanitize_charset_name(charset, 'ascii')
                             value = value.decode(charset, 'surrogateescape')
                         except (LookupError, UnicodeEncodeError):
                             # XXX: there should really be a custom defect for
                             # unknown character set to make it easy to find,
                             # because otherwise unknown charset is a silent
                             # failure.
-                            value = value.decode('us-ascii', 'surrogateescape')
+                            value = value.decode('ascii', 'surrogateescape')
                         if utils._has_surrogates(value):
                             param.defects.append(errors.UndecodableBytesDefect())
                 value_parts.append(value)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 67cc3a550b7d9d..a93a7d0f86f849 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -446,17 +446,16 @@ def decode_params(params):
                 new_params.append((name, '"%s"' % value))
     return new_params
 
+_SANITIZE_TABLE = str.maketrans({i: None for i in range(128, 65536)})
+
 def _sanitize_charset_name(charset, fallback_charset):
     if not charset:
         return charset
-    sanitized = ''.join(
-        c for c in charset
-        if (ord(c) < 0xDC80 or ord(c) > 0xDCFF) and c.isascii()
-    )
+    sanitized = charset.translate(_SANITIZE_TABLE)
     return sanitized if sanitized else fallback_charset
 
 def collapse_rfc2231_value(value, errors='replace',
-                           fallback_charset='us-ascii'):
+                           fallback_charset='ascii'):
     if not isinstance(value, tuple) or len(value) != 3:
         return unquote(value)
     # While value comes to us as a unicode string, we need it to be a bytes

From 9d6f06e00ebe87ed5c163e37ce12287c80a8071b Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sat, 1 Nov 2025 19:39:01 +0000
Subject: [PATCH 05/10] Review

---
 Lib/email/_header_value_parser.py |  3 +--
 Lib/email/utils.py                | 11 +----------
 Lib/test/test_email/test_email.py |  5 ++++-
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index d4d93006fb71ff..91243378dc0441 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -796,14 +796,13 @@ def params(self):
                         value = urllib.parse.unquote(value, encoding='latin-1')
                     else:
                         try:
-                            charset = utils._sanitize_charset_name(charset, 'ascii')
                             value = value.decode(charset, 'surrogateescape')
                         except (LookupError, UnicodeEncodeError):
                             # XXX: there should really be a custom defect for
                             # unknown character set to make it easy to find,
                             # because otherwise unknown charset is a silent
                             # failure.
-                            value = value.decode('ascii', 'surrogateescape')
+                            value = value.decode('us-ascii', 'surrogateescape')
                         if utils._has_surrogates(value):
                             param.defects.append(errors.UndecodableBytesDefect())
                 value_parts.append(value)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index a93a7d0f86f849..3de1f0d24a15b0 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -446,16 +446,8 @@ def decode_params(params):
                 new_params.append((name, '"%s"' % value))
     return new_params
 
-_SANITIZE_TABLE = str.maketrans({i: None for i in range(128, 65536)})
-
-def _sanitize_charset_name(charset, fallback_charset):
-    if not charset:
-        return charset
-    sanitized = charset.translate(_SANITIZE_TABLE)
-    return sanitized if sanitized else fallback_charset
-
 def collapse_rfc2231_value(value, errors='replace',
-                           fallback_charset='ascii'):
+                           fallback_charset='us-ascii'):
     if not isinstance(value, tuple) or len(value) != 3:
         return unquote(value)
     # While value comes to us as a unicode string, we need it to be a bytes
@@ -466,7 +458,6 @@ def collapse_rfc2231_value(value, errors='replace',
         # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
         # the value, so use the fallback_charset.
         charset = fallback_charset
-    charset = _sanitize_charset_name(charset, fallback_charset)
     rawbytes = bytes(text, 'raw-unicode-escape')
     try:
         return str(rawbytes, charset, errors)
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index b8116d073a2670..3e216718fbb18d 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -5717,7 +5717,10 @@ def test_rfc2231_bad_character_in_encoding(self):
 
 """
         msg = email.message_from_string(m)
-        self.assertEqual(msg.get_filename(), 'myfile.txt')
+        import warnings
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", DeprecationWarning)
+            self.assertEqual(msg.get_filename(), 'myfile.txt')
 
     def test_rfc2231_single_tick_in_filename_extended(self):
         eq = self.assertEqual

From 16697dcc6b5f5f0a316fa2e14537d516b9e16bb9 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sat, 1 Nov 2025 20:22:51 +0000
Subject: [PATCH 06/10] Fix second warning

---
 Lib/test/test_email/test_headerregistry.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index ff7a6da644d572..7e9b56e800c5bd 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -1,6 +1,7 @@
 import datetime
 import textwrap
 import unittest
+import warnings
 from email import errors
 from email import policy
 from email.message import Message
@@ -247,7 +248,15 @@ def content_type_as_value(self,
         decoded =  args[2] if l>2 and args[2] is not DITTO else source
         header = 'Content-Type:' + ' ' if source else ''
         folded = args[3] if l>3 else header + decoded + '\n'
-        h = self.make_header('Content-Type', source)
+        # Suppress deprecation warning for rfc2231_nonascii_in_charset_of_charset_parameter_value
+        if 'utf-8%E2%80%9D' in source:
+            with warnings.catch_warnings():
+                warnings.filterwarnings('ignore',
+                    message='Support for non-ascii encoding names',
+                    category=DeprecationWarning)
+                h = self.make_header('Content-Type', source)
+        else:
+            h = self.make_header('Content-Type', source)
         self.assertEqual(h.content_type, content_type)
         self.assertEqual(h.maintype, maintype)
         self.assertEqual(h.subtype, subtype)

From e4036f858f8ad56d0d9ba38c5fb2dac06cb9d215 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sat, 1 Nov 2025 20:52:51 +0000
Subject: [PATCH 07/10] Convert to asserts

---
 Lib/test/test_email/test_email.py          | 4 +---
 Lib/test/test_email/test_headerregistry.py | 8 ++------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 3e216718fbb18d..8fe51f67d7349e 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -5717,9 +5717,7 @@ def test_rfc2231_bad_character_in_encoding(self):
 
 """
         msg = email.message_from_string(m)
-        import warnings
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", DeprecationWarning)
+        with self.assertWarns(DeprecationWarning):
             self.assertEqual(msg.get_filename(), 'myfile.txt')
 
     def test_rfc2231_single_tick_in_filename_extended(self):
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index 7e9b56e800c5bd..043eb376d67554 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -1,7 +1,6 @@
 import datetime
 import textwrap
 import unittest
-import warnings
 from email import errors
 from email import policy
 from email.message import Message
@@ -249,11 +248,8 @@ def content_type_as_value(self,
         header = 'Content-Type:' + ' ' if source else ''
         folded = args[3] if l>3 else header + decoded + '\n'
         # Suppress deprecation warning for rfc2231_nonascii_in_charset_of_charset_parameter_value
-        if 'utf-8%E2%80%9D' in source:
-            with warnings.catch_warnings():
-                warnings.filterwarnings('ignore',
-                    message='Support for non-ascii encoding names',
-                    category=DeprecationWarning)
+        if 'utf-8%E2%80%9D' in source and not 'ascii' in source:
+            with self.assertWarns(DeprecationWarning):
                 h = self.make_header('Content-Type', source)
         else:
             h = self.make_header('Content-Type', source)

From b8fc5f43c72d7a9ccf12605afe85022041acb244 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sat, 1 Nov 2025 22:04:41 +0000
Subject: [PATCH 08/10] Fix for platforms with ordered tests

---
 Lib/test/test_email/test_headerregistry.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index 043eb376d67554..67f655221c917d 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -247,8 +247,11 @@ def content_type_as_value(self,
         decoded =  args[2] if l>2 and args[2] is not DITTO else source
         header = 'Content-Type:' + ' ' if source else ''
         folded = args[3] if l>3 else header + decoded + '\n'
-        # Suppress deprecation warning for rfc2231_nonascii_in_charset_of_charset_parameter_value
-        if 'utf-8%E2%80%9D' in source and not 'ascii' in source:
+        # Both rfc2231 test cases with utf-8%E2%80%9D raise warnings,
+        # clear encoding cache to ensure test isolation.
+        if 'utf-8%E2%80%9D' in source:
+            import encodings
+            encodings._cache.clear()
             with self.assertWarns(DeprecationWarning):
                 h = self.make_header('Content-Type', source)
         else:

From 7592af89de0b4905cc96d607290aea56a198535b Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sat, 1 Nov 2025 22:05:51 +0000
Subject: [PATCH 09/10] !fixup

---
 Lib/test/test_email/test_headerregistry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index 67f655221c917d..1d0d0a49a82917 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -249,7 +249,7 @@ def content_type_as_value(self,
         folded = args[3] if l>3 else header + decoded + '\n'
         # Both rfc2231 test cases with utf-8%E2%80%9D raise warnings,
         # clear encoding cache to ensure test isolation.
-        if 'utf-8%E2%80%9D' in source:
+        if 'utf-8%E2%80%9D' in source and 'ascii' not in source:
             import encodings
             encodings._cache.clear()
             with self.assertWarns(DeprecationWarning):

From 8c598998597aff232a6fbbbdc34f1d17e73f3506 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sun, 2 Nov 2025 14:26:08 +0000
Subject: [PATCH 10/10] Fix CI on Android and iOS

---
 Lib/email/_header_value_parser.py | 4 ++++
 Lib/email/utils.py                | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 91243378dc0441..c7f665b3990512 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -796,6 +796,10 @@ def params(self):
                         value = urllib.parse.unquote(value, encoding='latin-1')
                     else:
                         try:
+                            # Explicitly look up the codec for warning generation, see gh-140030
+                            # Can be removed in 3.17
+                            import codecs
+                            codecs.lookup(charset)
                             value = value.decode(charset, 'surrogateescape')
                         except (LookupError, UnicodeEncodeError):
                             # XXX: there should really be a custom defect for
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 3de1f0d24a15b0..d4824dc3601b2d 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -460,6 +460,10 @@ def collapse_rfc2231_value(value, errors='replace',
         charset = fallback_charset
     rawbytes = bytes(text, 'raw-unicode-escape')
     try:
+        # Explicitly look up the codec for warning generation, see gh-140030
+        # Can be removed in 3.17
+        import codecs
+        codecs.lookup(charset)
         return str(rawbytes, charset, errors)
     except LookupError:
         # charset is not a known codec.