From 76ebabe75e363774b6e64ed84fde68f013d69d38 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 25 Jul 2025 19:15:05 +0000 Subject: [PATCH] chore: use bigframes.pandas._read_gbq_colab() in benchmarks I have refactored the `read_gbq_colab` benchmarks to call `bigframes.pandas`. I updated the benchmarks in `tests/benchmark/read_gbq_colab` to call `bigframes.pandas._read_gbq_colab(...)` instead of `session._read_gbq_colab(...)`. Since the `session` object is no longer needed in these benchmarks, I also removed that dependency. --- tests/benchmark/read_gbq_colab/aggregate_output.py | 12 ++++-------- tests/benchmark/read_gbq_colab/dry_run.py | 10 ++++------ tests/benchmark/read_gbq_colab/filter_output.py | 12 ++++-------- tests/benchmark/read_gbq_colab/first_page.py | 10 ++++------ tests/benchmark/read_gbq_colab/last_page.py | 10 ++++------ tests/benchmark/read_gbq_colab/sort_output.py | 12 ++++-------- 6 files changed, 24 insertions(+), 42 deletions(-) diff --git a/tests/benchmark/read_gbq_colab/aggregate_output.py b/tests/benchmark/read_gbq_colab/aggregate_output.py index dda4bf95a4..0086e97ee2 100644 --- a/tests/benchmark/read_gbq_colab/aggregate_output.py +++ b/tests/benchmark/read_gbq_colab/aggregate_output.py @@ -15,17 +15,15 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE -def aggregate_output( - *, project_id, dataset_id, table_id, session: bigframes.session.Session -): +def aggregate_output(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( + df = bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" ) @@ -56,9 +54,8 @@ def aggregate_output( project_id, dataset_id, table_id, - session, suffix, - ) = utils.get_configuration(include_table_id=True) + ) = utils.get_configuration(include_table_id=True, include_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -68,5 +65,4 @@ def aggregate_output( project_id=project_id, dataset_id=dataset_id, table_id=table_id, - session=session, ) diff --git a/tests/benchmark/read_gbq_colab/dry_run.py b/tests/benchmark/read_gbq_colab/dry_run.py index c2de1b7cc4..dedbf2c9e2 100644 --- a/tests/benchmark/read_gbq_colab/dry_run.py +++ b/tests/benchmark/read_gbq_colab/dry_run.py @@ -15,13 +15,13 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas -def dry_run(*, project_id, dataset_id, table_id, session: bigframes.session.Session): +def dry_run(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - session._read_gbq_colab( + bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}", dry_run=True, ) @@ -32,9 +32,8 @@ def dry_run(*, project_id, dataset_id, table_id, session: bigframes.session.Sess project_id, dataset_id, table_id, - session, suffix, - ) = utils.get_configuration(include_table_id=True) + ) = utils.get_configuration(include_table_id=True, include_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -44,5 +43,4 @@ def dry_run(*, project_id, dataset_id, table_id, session: bigframes.session.Sess project_id=project_id, dataset_id=dataset_id, table_id=table_id, - session=session, ) diff --git a/tests/benchmark/read_gbq_colab/filter_output.py b/tests/benchmark/read_gbq_colab/filter_output.py index 0db7ac5fd6..f39dc28f7a 100644 --- a/tests/benchmark/read_gbq_colab/filter_output.py +++ b/tests/benchmark/read_gbq_colab/filter_output.py @@ -15,17 +15,15 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE -def filter_output( - *, project_id, dataset_id, table_id, session: bigframes.session.Session -): +def filter_output(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( + df = bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" ) @@ -48,9 +46,8 @@ def filter_output( project_id, dataset_id, table_id, - session, suffix, - ) = utils.get_configuration(include_table_id=True) + ) = utils.get_configuration(include_table_id=True, include_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -60,5 +57,4 @@ def filter_output( project_id=project_id, dataset_id=dataset_id, table_id=table_id, - session=session, ) diff --git a/tests/benchmark/read_gbq_colab/first_page.py b/tests/benchmark/read_gbq_colab/first_page.py index 2df9990d22..8da8a91652 100644 --- a/tests/benchmark/read_gbq_colab/first_page.py +++ b/tests/benchmark/read_gbq_colab/first_page.py @@ -15,15 +15,15 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE -def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.Session): +def first_page(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( + df = bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" ) @@ -37,9 +37,8 @@ def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.S project_id, dataset_id, table_id, - session, suffix, - ) = utils.get_configuration(include_table_id=True) + ) = utils.get_configuration(include_table_id=True, include_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -49,5 +48,4 @@ def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.S project_id=project_id, dataset_id=dataset_id, table_id=table_id, - session=session, ) diff --git a/tests/benchmark/read_gbq_colab/last_page.py b/tests/benchmark/read_gbq_colab/last_page.py index ad785a29e8..6f58908c67 100644 --- a/tests/benchmark/read_gbq_colab/last_page.py +++ b/tests/benchmark/read_gbq_colab/last_page.py @@ -15,15 +15,15 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE -def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Session): +def last_page(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( + df = bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" ) @@ -38,9 +38,8 @@ def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Se project_id, dataset_id, table_id, - session, suffix, - ) = utils.get_configuration(include_table_id=True) + ) = utils.get_configuration(include_table_id=True, include_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -50,5 +49,4 @@ def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Se project_id=project_id, dataset_id=dataset_id, table_id=table_id, - session=session, ) diff --git a/tests/benchmark/read_gbq_colab/sort_output.py b/tests/benchmark/read_gbq_colab/sort_output.py index 997de5683d..bf826eb645 100644 --- a/tests/benchmark/read_gbq_colab/sort_output.py +++ b/tests/benchmark/read_gbq_colab/sort_output.py @@ -15,17 +15,15 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE -def sort_output( - *, project_id, dataset_id, table_id, session: bigframes.session.Session -): +def sort_output(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( + df = bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" ) @@ -48,9 +46,8 @@ def sort_output( project_id, dataset_id, table_id, - session, suffix, - ) = utils.get_configuration(include_table_id=True) + ) = utils.get_configuration(include_table_id=True, include_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -60,5 +57,4 @@ def sort_output( project_id=project_id, dataset_id=dataset_id, table_id=table_id, - session=session, )