Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions sdks/python/apache_beam/runners/worker/bundle_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,7 @@ def __init__(
state_handler: sdk_worker.CachingStateHandler,
data_channel_factory: data_plane.DataChannelFactory,
data_sampler: Optional[data_sampler.DataSampler] = None,
instruction_id: Optional[str] = None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be optional, or should we rather make it required?

) -> None:
"""Initialize a bundle processor.

Expand Down Expand Up @@ -1159,8 +1160,13 @@ def __init__(
from apache_beam.runners.worker.sdk_worker_main import terminate_sdk_harness
terminate_sdk_harness()

for op in reversed(self.ops.values()):
op.setup(self.data_sampler)
if instruction_id:
with statesampler.instruction_id(instruction_id):
for op in reversed(self.ops.values()):
op.setup(self.data_sampler)
else:
for op in reversed(self.ops.values()):
op.setup(self.data_sampler)
Comment on lines +1163 to +1169
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The loop for setting up operations is duplicated in both the if and else blocks. This can be refactored to avoid code repetition and improve maintainability. A common pattern for this is to use a conditional context manager.

self.splitting_lock = threading.Lock()

def create_execution_tree(
Expand Down
9 changes: 6 additions & 3 deletions sdks/python/apache_beam/runners/worker/log_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,12 @@ def emit(self, record: logging.LogRecord) -> None:
log_entry.timestamp.nanos = int(nanoseconds)
if record.exc_info:
log_entry.trace = ''.join(traceback.format_exception(*record.exc_info))
instruction_id = statesampler.get_current_instruction_id()
if instruction_id:
log_entry.instruction_id = instruction_id
if hasattr(record, 'instruction_id'):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can simplify/optimize by avoid hasattr then acces?

instruction_id = getattr(record, 'instruction_id', None) or statesampler.get_current_instruction_id()
if instruction_id:
    log_entry.instruction_id = instruction_id

log_entry.instruction_id = record.instruction_id
if not log_entry.instruction_id:
instruction_id = statesampler.get_current_instruction_id()
if instruction_id:
log_entry.instruction_id = instruction_id
Comment on lines +137 to +142
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This logic for setting instruction_id can be simplified to be more concise and readable by using getattr and consolidating the checks.

    instruction_id = getattr(record, 'instruction_id', None)
    if not instruction_id:
      instruction_id = statesampler.get_current_instruction_id()
    if instruction_id:
      log_entry.instruction_id = instruction_id

tracker = statesampler.get_current_tracker()
if tracker:
current_state = tracker.current_state()
Expand Down
3 changes: 2 additions & 1 deletion sdks/python/apache_beam/runners/worker/sdk_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,8 @@ def get(self, instruction_id, bundle_descriptor_id):
self.state_handler_factory.create_state_handler(
pbd.state_api_service_descriptor),
self.data_channel_factory,
self.data_sampler)
self.data_sampler,
instruction_id=instruction_id)
with self._lock:
self.active_bundle_processors[
instruction_id] = bundle_descriptor_id, processor
Expand Down
Loading