5050from bigframes .core import guid , identifiers , local_data , nodes , ordering , utils
5151import bigframes .core as core
5252import bigframes .core .blocks as blocks
53+ import bigframes .core .events
5354import bigframes .core .schema as schemata
5455import bigframes .dtypes
5556import bigframes .formatting_helpers as formatting_helpers
@@ -499,6 +500,7 @@ def read_gbq_table( # type: ignore[overload-overlap]
499500 force_total_order : Optional [bool ] = ...,
500501 n_rows : Optional [int ] = None ,
501502 index_col_in_columns : bool = False ,
503+ publish_execution : bool = True ,
502504 ) -> dataframe .DataFrame :
503505 ...
504506
@@ -522,6 +524,7 @@ def read_gbq_table(
522524 force_total_order : Optional [bool ] = ...,
523525 n_rows : Optional [int ] = None ,
524526 index_col_in_columns : bool = False ,
527+ publish_execution : bool = True ,
525528 ) -> pandas .Series :
526529 ...
527530
@@ -544,6 +547,7 @@ def read_gbq_table(
544547 force_total_order : Optional [bool ] = None ,
545548 n_rows : Optional [int ] = None ,
546549 index_col_in_columns : bool = False ,
550+ publish_execution : bool = True ,
547551 ) -> dataframe .DataFrame | pandas .Series :
548552 """Read a BigQuery table into a BigQuery DataFrames DataFrame.
549553
@@ -603,8 +607,12 @@ def read_gbq_table(
603607 when the index is selected from the data columns (e.g., in a
604608 ``read_csv`` scenario). The column will be used as the
605609 DataFrame's index and removed from the list of value columns.
610+ publish_execution (bool, optional):
611+ If True, sends an execution started and stopped event if this
612+ causes a query. Set to False if using read_gbq_table from
613+ another function that is reporting execution.
606614 """
607- import bigframes ._tools . strings
615+ import bigframes .core . events
608616 import bigframes .dataframe as dataframe
609617
610618 # ---------------------------------
@@ -768,12 +776,26 @@ def read_gbq_table(
768776 # TODO(b/338065601): Provide a way to assume uniqueness and avoid this
769777 # check.
770778 primary_key = bf_read_gbq_table .infer_unique_columns (
771- bqclient = self ._bqclient ,
772779 table = table ,
773780 index_cols = index_cols ,
774- # If non in strict ordering mode, don't go through overhead of scanning index column(s) to determine if unique
775- metadata_only = not self ._scan_index_uniqueness ,
776781 )
782+
783+ # If non in strict ordering mode, don't go through overhead of scanning index column(s) to determine if unique
784+ if not primary_key and self ._scan_index_uniqueness and index_cols :
785+ if publish_execution :
786+ bigframes .core .events .publisher .send (
787+ bigframes .core .events .ExecutionStarted (),
788+ )
789+ primary_key = bf_read_gbq_table .check_if_index_columns_are_unique (
790+ self ._bqclient ,
791+ table = table ,
792+ index_cols = index_cols ,
793+ )
794+ if publish_execution :
795+ bigframes .core .events .publisher .send (
796+ bigframes .core .events .ExecutionFinished (),
797+ )
798+
777799 schema = schemata .ArraySchema .from_bq_table (table )
778800 if not include_all_columns :
779801 schema = schema .select (index_cols + columns )
@@ -991,6 +1013,12 @@ def read_gbq_query(
9911013 query_job , list (columns ), index_cols
9921014 )
9931015
1016+ # We want to make sure we show progress when we actually do execute a
1017+ # query. Since we have got this far, we know it's not a dry run.
1018+ bigframes .core .events .publisher .send (
1019+ bigframes .core .events .ExecutionStarted (),
1020+ )
1021+
9941022 query_job_for_metrics : Optional [bigquery .QueryJob ] = None
9951023 destination : Optional [bigquery .TableReference ] = None
9961024
@@ -1046,20 +1074,28 @@ def read_gbq_query(
10461074 # makes sense to download the results beyond the first page, even if
10471075 # there is a job and destination table available.
10481076 if query_job_for_metrics is None and rows is not None :
1049- return bf_read_gbq_query .create_dataframe_from_row_iterator (
1077+ df = bf_read_gbq_query .create_dataframe_from_row_iterator (
10501078 rows ,
10511079 session = self ._session ,
10521080 index_col = index_col ,
10531081 columns = columns ,
10541082 )
1083+ bigframes .core .events .publisher .send (
1084+ bigframes .core .events .ExecutionFinished (),
1085+ )
1086+ return df
10551087
10561088 # We already checked rows, so if there's no destination table, then
10571089 # there are no results to return.
10581090 if destination is None :
1059- return bf_read_gbq_query .create_dataframe_from_query_job_stats (
1091+ df = bf_read_gbq_query .create_dataframe_from_query_job_stats (
10601092 query_job_for_metrics ,
10611093 session = self ._session ,
10621094 )
1095+ bigframes .core .events .publisher .send (
1096+ bigframes .core .events .ExecutionFinished (),
1097+ )
1098+ return df
10631099
10641100 # If the query was DDL or DML, return some job metadata. See
10651101 # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type
@@ -1070,10 +1106,14 @@ def read_gbq_query(
10701106 query_job_for_metrics is not None
10711107 and not bf_read_gbq_query .should_return_query_results (query_job_for_metrics )
10721108 ):
1073- return bf_read_gbq_query .create_dataframe_from_query_job_stats (
1109+ df = bf_read_gbq_query .create_dataframe_from_query_job_stats (
10741110 query_job_for_metrics ,
10751111 session = self ._session ,
10761112 )
1113+ bigframes .core .events .publisher .send (
1114+ bigframes .core .events .ExecutionFinished (),
1115+ )
1116+ return df
10771117
10781118 # Speed up counts by getting counts from result metadata.
10791119 if rows is not None :
@@ -1083,16 +1123,21 @@ def read_gbq_query(
10831123 else :
10841124 n_rows = None
10851125
1086- return self .read_gbq_table (
1126+ df = self .read_gbq_table (
10871127 f"{ destination .project } .{ destination .dataset_id } .{ destination .table_id } " ,
10881128 index_col = index_col ,
10891129 columns = columns ,
10901130 use_cache = configuration ["query" ]["useQueryCache" ],
10911131 force_total_order = force_total_order ,
10921132 n_rows = n_rows ,
1133+ publish_execution = False ,
10931134 # max_results and filters are omitted because they are already
10941135 # handled by to_query(), above.
10951136 )
1137+ bigframes .core .events .publisher .send (
1138+ bigframes .core .events .ExecutionFinished (),
1139+ )
1140+ return df
10961141
10971142 def _query_to_destination (
10981143 self ,
0 commit comments