Skip to content

Commit e7290a7

Browse files
committed
Refactor batch handling in collect_gil_bench.py to use partitions for better organization
1 parent 7643931 commit e7290a7

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

benchmarks/collect_gil_bench.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@
2323

2424
def run(n_batches: int = 8, batch_size: int = 1_000_000) -> None:
2525
ctx = SessionContext()
26-
batches = []
26+
partitions = []
2727
for i in range(n_batches):
2828
start = i * batch_size
2929
arr = pa.array(range(start, start + batch_size))
30-
batches.append(pa.record_batch([arr], names=["a"]))
30+
batch = pa.record_batch([arr], names=["a"])
31+
partitions.append([batch]) # Each batch in its own partition
3132

32-
df = ctx.create_dataframe([batches])
33+
df = ctx.create_dataframe(partitions)
3334

3435
start = time.perf_counter()
3536
df.collect()

0 commit comments

Comments
 (0)