From 6bcd6469f7b384807e8d467b961774f84e233391 Mon Sep 17 00:00:00 2001 From: praneetnadella Date: Tue, 11 Nov 2025 19:44:14 +0000 Subject: [PATCH 1/3] Added function for deterministic ID for class definitions by hashing. --- .../internal/cloudpickle/cloudpickle.py | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py index 8ee770d61691..2768d731c2c1 100644 --- a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py +++ b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py @@ -62,6 +62,7 @@ import dis from enum import Enum import functools +import hashlib import io import itertools import logging @@ -98,7 +99,7 @@ _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() _DYNAMIC_CLASS_STATE_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() -_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() +_DYNAMIC_CLASS_TRACKER_LOCK = threading.RLock() PYPY = platform.python_implementation() == "PyPy" @@ -168,6 +169,7 @@ class CloudPickleConfig: DEFAULT_CONFIG = CloudPickleConfig() +_GENERATING_SENTINEL = object() builtin_code_type = None if PYPY: # builtin-code objects only exist in pypy @@ -179,10 +181,21 @@ class CloudPickleConfig: def _get_or_create_tracker_id(class_def, id_generator): with _DYNAMIC_CLASS_TRACKER_LOCK: class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) + if class_tracker_id is _GENERATING_SENTINEL and id_generator: + raise RuntimeError( + f"Recursive ID generation detected for {class_def}. " + f"The id_generator cannot recursively request an ID for the same class." + ) + if class_tracker_id is None and id_generator is not None: - class_tracker_id = id_generator(class_def) - _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id - _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def + _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = _GENERATING_SENTINEL + try: + class_tracker_id = id_generator(class_def) + _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id + _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def + except: + _DYNAMIC_CLASS_TRACKER_BY_CLASS.pop(class_def, None) + raise return class_tracker_id @@ -1720,3 +1733,10 @@ def dumps( # Backward compat alias. CloudPickler = Pickler + + +def hash_dynamic_classdef(classdef): + """Generates a deterministic ID by hashing the pickled class definition.""" + hexidgest = hashlib.sha256( + dumps(classdef, config=CloudPickleConfig(id_generator=None))).hexdigest() + return hexidgest \ No newline at end of file From 9f7e5215c882f42d30d8c31ada7740fde8d724d5 Mon Sep 17 00:00:00 2001 From: praneetnadella Date: Tue, 11 Nov 2025 23:58:45 +0000 Subject: [PATCH 2/3] Trigger CI: Rerun checks From a543b6d392b1f627e6fd884b804895c9cea37b05 Mon Sep 17 00:00:00 2001 From: praneetnadella Date: Wed, 12 Nov 2025 18:27:32 +0000 Subject: [PATCH 3/3] addresrsing reviwer comments --- sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py index 2768d731c2c1..495e888a5167 100644 --- a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py +++ b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py @@ -193,7 +193,7 @@ def _get_or_create_tracker_id(class_def, id_generator): class_tracker_id = id_generator(class_def) _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def - except: + except Exception: _DYNAMIC_CLASS_TRACKER_BY_CLASS.pop(class_def, None) raise return class_tracker_id @@ -1737,6 +1737,6 @@ def dumps( def hash_dynamic_classdef(classdef): """Generates a deterministic ID by hashing the pickled class definition.""" - hexidgest = hashlib.sha256( + hexdigest = hashlib.sha256( dumps(classdef, config=CloudPickleConfig(id_generator=None))).hexdigest() - return hexidgest \ No newline at end of file + return hexdigest