diff --git a/sbom.py b/sbom.py index c7122139..9a3cfa00 100644 --- a/sbom.py +++ b/sbom.py @@ -24,8 +24,9 @@ import tarfile import typing import zipfile +from functools import cache from pathlib import Path -from typing import Any, NotRequired, TypedDict, cast +from typing import Any, LiteralString, NotRequired, TypedDict, cast from urllib.request import urlopen @@ -90,9 +91,20 @@ class CreationInfo(TypedDict): licenseListVersion: str -def spdx_id(value: str) -> str: +# Cache of values that we've seen already. We use this +# to de-duplicate values and their corresponding SPDX ID. +_SPDX_IDS_TO_VALUES: dict[str, Any] = {} + + +@cache +def spdx_id(value: LiteralString) -> str: """Encode a value into characters that are valid in an SPDX ID""" - return re.sub(r"[^a-zA-Z0-9.\-]+", "-", value) + value_as_spdx_id = re.sub(r"[^a-zA-Z0-9.\-]+", "-", value) + # To avoid collisions we append a hash suffix. + suffix = hashlib.sha256(value.encode()).hexdigest()[:8] + value_as_spdx_id = f"{value_as_spdx_id}-{suffix}" + assert _SPDX_IDS_TO_VALUES.setdefault(value_as_spdx_id, value) == value + return value_as_spdx_id def calculate_package_verification_codes(sbom: SBOM) -> None: diff --git a/tests/sbom/sbom-with-pip-removed.json b/tests/sbom/sbom-with-pip-removed.json index 0ba2d4c0..b2646f23 100644 --- a/tests/sbom/sbom-with-pip-removed.json +++ b/tests/sbom/sbom-with-pip-removed.json @@ -13,9 +13,9 @@ "packages": [], "relationships": [ { - "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING", + "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING-497fb0c3", "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-expat" + "spdxElementId": "SPDXRef-PACKAGE-expat-83b93528" } ] } diff --git a/tests/sbom/sbom-with-pip.json b/tests/sbom/sbom-with-pip.json index e758c182..d2ee1c6f 100644 --- a/tests/sbom/sbom-with-pip.json +++ b/tests/sbom/sbom-with-pip.json @@ -12,7 +12,7 @@ "files": [], "packages": [ { - "SPDXID": "SPDXRef-PACKAGE-pip", + "SPDXID": "SPDXRef-PACKAGE-pip-ced959c1", "name": "pip", "versionInfo": "24.0", "licenseConcluded": "MIT", @@ -38,19 +38,19 @@ ], "relationships": [ { - "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING", + "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING-497fb0c3", "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-expat" + "spdxElementId": "SPDXRef-PACKAGE-expat-83b93528" }, { - "relatedSpdxElement": "SPDXRef-PACKAGE-urllib3", + "relatedSpdxElement": "SPDXRef-PACKAGE-urllib3-b7a198af", "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" + "spdxElementId": "SPDXRef-PACKAGE-pip-ced959c1" }, { - "relatedSpdxElement": "SPDXRef-PACKAGE-pip", + "relatedSpdxElement": "SPDXRef-PACKAGE-pip-ced959c1", "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-cpython" + "spdxElementId": "SPDXRef-PACKAGE-cpython-608f998c" } ] } diff --git a/tests/test_sbom.py b/tests/test_sbom.py index 45e66041..8e7713dc 100644 --- a/tests/test_sbom.py +++ b/tests/test_sbom.py @@ -11,6 +11,22 @@ import sbom +@pytest.mark.parametrize( + ["value", "expected"], + [ + ("abc", "abc-ba7816bf"), + ("def", "def-cb8379ac"), + ("SPDXRef-PACKAGE-pip", "SPDXRef-PACKAGE-pip-ced959c1"), + ("SPDXRef-PACKAGE-cpython", "SPDXRef-PACKAGE-cpython-79ab18d2"), + ("SPDXRef-PACKAGE-urllib3", "SPDXRef-PACKAGE-urllib3-b8ab4751"), + ], +) +def test_spdx_id(value: str, expected: str) -> None: + assert sbom.spdx_id(value) == expected + # Check we get the same value next time + assert sbom.spdx_id(value) == expected + + @pytest.mark.parametrize( ["package_sha1s", "package_verification_code"], [