Skip to content

Commit cbf9b8c

Browse files
hugovkhenryiiipicnixzjohnslavik
authored
gh-143658: importlib.metadata: Use str.translate to improve performance of importlib.metadata.Prepared.normalized (#143660)
Co-authored-by: Henry Schreiner <henryschreineriii@gmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Bartosz Sławecki <bartosz@ilikepython.com>
1 parent a6bc60d commit cbf9b8c

File tree

3 files changed

+52
-1
lines changed

3 files changed

+52
-1
lines changed

Lib/importlib/metadata/__init__.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,14 @@ def search(self, prepared: Prepared):
890890
return itertools.chain(infos, eggs)
891891

892892

893+
# Translation table for Prepared.normalize: lowercase and
894+
# replace "-" (hyphen) and "." (dot) with "_" (underscore).
895+
_normalize_table = str.maketrans(
896+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ-.",
897+
"abcdefghijklmnopqrstuvwxyz__",
898+
)
899+
900+
893901
class Prepared:
894902
"""
895903
A prepared search query for metadata on a possibly-named package.
@@ -925,7 +933,13 @@ def normalize(name):
925933
"""
926934
PEP 503 normalization plus dashes as underscores.
927935
"""
928-
return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
936+
# Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503
937+
# About 3x faster, safe since packages only support alphanumeric characters
938+
value = name.translate(_normalize_table)
939+
# Condense repeats (faster than regex)
940+
while "__" in value:
941+
value = value.replace("__", "_")
942+
return value
929943

930944
@staticmethod
931945
def legacy_normalize(name):

Lib/test/test_importlib/metadata/test_api.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from importlib.metadata import (
77
Distribution,
88
PackageNotFoundError,
9+
Prepared,
910
distribution,
1011
entry_points,
1112
files,
@@ -313,3 +314,36 @@ class InvalidateCache(unittest.TestCase):
313314
def test_invalidate_cache(self):
314315
# No externally observable behavior, but ensures test coverage...
315316
importlib.invalidate_caches()
317+
318+
319+
class PreparedTests(unittest.TestCase):
320+
def test_normalize(self):
321+
tests = [
322+
# Simple
323+
("sample", "sample"),
324+
# Mixed case
325+
("Sample", "sample"),
326+
("SAMPLE", "sample"),
327+
("SaMpLe", "sample"),
328+
# Separator conversions
329+
("sample-pkg", "sample_pkg"),
330+
("sample.pkg", "sample_pkg"),
331+
("sample_pkg", "sample_pkg"),
332+
# Multiple separators
333+
("sample---pkg", "sample_pkg"),
334+
("sample___pkg", "sample_pkg"),
335+
("sample...pkg", "sample_pkg"),
336+
# Mixed separators
337+
("sample-._pkg", "sample_pkg"),
338+
("sample_.-pkg", "sample_pkg"),
339+
# Complex
340+
("Sample__Pkg-name.foo", "sample_pkg_name_foo"),
341+
("Sample__Pkg.name__foo", "sample_pkg_name_foo"),
342+
# Uppercase with separators
343+
("SAMPLE-PKG", "sample_pkg"),
344+
("Sample.Pkg", "sample_pkg"),
345+
("SAMPLE_PKG", "sample_pkg"),
346+
]
347+
for name, expected in tests:
348+
with self.subTest(name=name):
349+
self.assertEqual(Prepared.normalize(name), expected)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`importlib.metadata`: Use :meth:`str.translate` to improve performance of
2+
:meth:`!importlib.metadata.Prepared.normalize`. Patch by Hugo van Kemenade and
3+
Henry Schreiner.

0 commit comments

Comments
 (0)