From 2fe896603ead237be54063ef6a2ec5d74572f9d1 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 25 Jul 2025 22:44:58 +0000 Subject: [PATCH] chore: use `bigframes.pandas._read_gbq_colab()` in benchmarks --- tests/benchmark/read_gbq_colab/aggregate_output.py | 10 +++------- tests/benchmark/read_gbq_colab/dry_run.py | 9 ++++----- tests/benchmark/read_gbq_colab/filter_output.py | 12 ++++++------ tests/benchmark/read_gbq_colab/first_page.py | 9 ++++----- tests/benchmark/read_gbq_colab/last_page.py | 9 ++++----- tests/benchmark/read_gbq_colab/sort_output.py | 11 ++++------- tests/benchmark/utils.py | 6 +++--- 7 files changed, 28 insertions(+), 38 deletions(-) diff --git a/tests/benchmark/read_gbq_colab/aggregate_output.py b/tests/benchmark/read_gbq_colab/aggregate_output.py index 6acf84d5bc..3df6054d64 100644 --- a/tests/benchmark/read_gbq_colab/aggregate_output.py +++ b/tests/benchmark/read_gbq_colab/aggregate_output.py @@ -15,19 +15,15 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas as bpd PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE -def aggregate_output( - *, project_id, dataset_id, table_id, session: bigframes.session.Session -): +def aggregate_output(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( - f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" - ) + df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}") # Simulate getting the first page, since we'll always do that first in the UI. df.shape diff --git a/tests/benchmark/read_gbq_colab/dry_run.py b/tests/benchmark/read_gbq_colab/dry_run.py index 0f05a2c0b4..6caf08be72 100644 --- a/tests/benchmark/read_gbq_colab/dry_run.py +++ b/tests/benchmark/read_gbq_colab/dry_run.py @@ -15,20 +15,20 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas -def dry_run(*, project_id, dataset_id, table_id, session: bigframes.session.Session): +def dry_run(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - session._read_gbq_colab( + bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}", dry_run=True, ) if __name__ == "__main__": - config = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True, start_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -38,5 +38,4 @@ def dry_run(*, project_id, dataset_id, table_id, session: bigframes.session.Sess project_id=config.project_id, dataset_id=config.dataset_id, table_id=config.table_id, - session=config.session, ) diff --git a/tests/benchmark/read_gbq_colab/filter_output.py b/tests/benchmark/read_gbq_colab/filter_output.py index d35cc6d5f7..b3c9181770 100644 --- a/tests/benchmark/read_gbq_colab/filter_output.py +++ b/tests/benchmark/read_gbq_colab/filter_output.py @@ -15,19 +15,20 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas as bpd PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE def filter_output( - *, project_id, dataset_id, table_id, session: bigframes.session.Session + *, + project_id, + dataset_id, + table_id, ): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( - f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" - ) + df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}") # Simulate getting the first page, since we'll always do that first in the UI. df.shape @@ -54,5 +55,4 @@ def filter_output( project_id=config.project_id, dataset_id=config.dataset_id, table_id=config.table_id, - session=config.session, ) diff --git a/tests/benchmark/read_gbq_colab/first_page.py b/tests/benchmark/read_gbq_colab/first_page.py index eba60297e4..7f8cdb0d51 100644 --- a/tests/benchmark/read_gbq_colab/first_page.py +++ b/tests/benchmark/read_gbq_colab/first_page.py @@ -15,15 +15,15 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE -def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.Session): +def first_page(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( + df = bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" ) @@ -33,7 +33,7 @@ def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.S if __name__ == "__main__": - config = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True, start_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -43,5 +43,4 @@ def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.S project_id=config.project_id, dataset_id=config.dataset_id, table_id=config.table_id, - session=config.session, ) diff --git a/tests/benchmark/read_gbq_colab/last_page.py b/tests/benchmark/read_gbq_colab/last_page.py index d973c84bce..7786e2f8bd 100644 --- a/tests/benchmark/read_gbq_colab/last_page.py +++ b/tests/benchmark/read_gbq_colab/last_page.py @@ -15,15 +15,15 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE -def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Session): +def last_page(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( + df = bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" ) @@ -34,7 +34,7 @@ def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Se if __name__ == "__main__": - config = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True, start_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -44,5 +44,4 @@ def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Se project_id=config.project_id, dataset_id=config.dataset_id, table_id=config.table_id, - session=config.session, ) diff --git a/tests/benchmark/read_gbq_colab/sort_output.py b/tests/benchmark/read_gbq_colab/sort_output.py index 7e1db368c5..7933c4472e 100644 --- a/tests/benchmark/read_gbq_colab/sort_output.py +++ b/tests/benchmark/read_gbq_colab/sort_output.py @@ -15,17 +15,15 @@ import benchmark.utils as utils -import bigframes.session +import bigframes.pandas PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE -def sort_output( - *, project_id, dataset_id, table_id, session: bigframes.session.Session -): +def sort_output(*, project_id, dataset_id, table_id): # TODO(tswast): Support alternative query if table_id is a local DataFrame, # e.g. "{local_inline}" or "{local_large}" - df = session._read_gbq_colab( + df = bigframes.pandas._read_gbq_colab( f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}" ) @@ -44,7 +42,7 @@ def sort_output( if __name__ == "__main__": - config = utils.get_configuration(include_table_id=True) + config = utils.get_configuration(include_table_id=True, start_session=False) current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( @@ -54,5 +52,4 @@ def sort_output( project_id=config.project_id, dataset_id=config.dataset_id, table_id=config.table_id, - session=config.session, ) diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py index 5dfd8d74bd..9690e0a3bd 100644 --- a/tests/benchmark/utils.py +++ b/tests/benchmark/utils.py @@ -25,12 +25,12 @@ class BenchmarkConfig: project_id: str dataset_id: str - session: bigframes.Session + session: bigframes.Session | None benchmark_suffix: str | None table_id: str | None = None -def get_configuration(include_table_id=False) -> BenchmarkConfig: +def get_configuration(include_table_id=False, start_session=True) -> BenchmarkConfig: parser = argparse.ArgumentParser() parser.add_argument( "--project_id", @@ -65,7 +65,7 @@ def get_configuration(include_table_id=False) -> BenchmarkConfig: ) args = parser.parse_args() - session = _initialize_session(_str_to_bool(args.ordered)) + session = _initialize_session(_str_to_bool(args.ordered)) if start_session else None return BenchmarkConfig( project_id=args.project_id,