6565
6666import bigframes ._config .bigquery_options as bigquery_options
6767import bigframes .constants as constants
68- from bigframes .core import log_adapter
6968import bigframes .core as core
7069import bigframes .core .blocks as blocks
7170import bigframes .core .compile
8483
8584# Even though the ibis.backends.bigquery import is unused, it's needed
8685# to register new and replacement ops with the Ibis BigQuery backend.
87- import third_party .bigframes_vendored .ibis .backends .bigquery # noqa
8886import third_party .bigframes_vendored .ibis .expr .operations as vendored_ibis_ops
8987import third_party .bigframes_vendored .pandas .io .gbq as third_party_pandas_gbq
9088import third_party .bigframes_vendored .pandas .io .parquet as third_party_pandas_parquet
@@ -161,6 +159,8 @@ def __init__(
161159 else :
162160 self ._location = context .location
163161
162+ self ._bq_kms_key_name = context .kms_key_name
163+
164164 # Instantiate a clients provider to help with cloud clients that will be
165165 # used in the future operations in the session
166166 if clients_provider :
@@ -172,9 +172,17 @@ def __init__(
172172 use_regional_endpoints = context .use_regional_endpoints ,
173173 credentials = context .credentials ,
174174 application_name = context .application_name ,
175+ bq_kms_key_name = self ._bq_kms_key_name ,
175176 )
176177
177178 self ._create_bq_datasets ()
179+
180+ # TODO(shobs): Remove this logic after https://github.com/ibis-project/ibis/issues/8494
181+ # has been fixed. The ibis client changes the default query job config
182+ # so we are going to remember the current config and restore it after
183+ # the ibis client has been created
184+ original_default_query_job_config = self .bqclient .default_query_job_config
185+
178186 self .ibis_client = typing .cast (
179187 ibis_bigquery .Backend ,
180188 ibis .bigquery .connect (
@@ -184,6 +192,9 @@ def __init__(
184192 ),
185193 )
186194
195+ self .bqclient .default_query_job_config = original_default_query_job_config
196+
197+ # Resolve the BQ connection for remote function and Vertex AI integration
187198 self ._bq_connection = context .bq_connection or _BIGFRAMES_DEFAULT_CONNECTION_ID
188199
189200 # Now that we're starting the session, don't allow the options to be
@@ -929,19 +940,21 @@ def _read_pandas_load_job(
929940 pandas_dataframe_copy .columns = pandas .Index (new_col_ids )
930941 pandas_dataframe_copy [ordering_col ] = np .arange (pandas_dataframe_copy .shape [0 ])
931942
943+ job_config = self ._prepare_load_job_config ()
944+
932945 # Specify the datetime dtypes, which is auto-detected as timestamp types.
933946 schema : list [bigquery .SchemaField ] = []
934947 for column , dtype in zip (pandas_dataframe .columns , pandas_dataframe .dtypes ):
935948 if dtype == "timestamp[us][pyarrow]" :
936949 schema .append (
937950 bigquery .SchemaField (column , bigquery .enums .SqlTypeNames .DATETIME )
938951 )
952+ job_config .schema = schema
939953
940954 # Clustering probably not needed anyways as pandas tables are small
941955 cluster_cols = [ordering_col ]
942-
943- job_config = bigquery .LoadJobConfig (schema = schema )
944956 job_config .clustering_fields = cluster_cols
957+
945958 job_config .labels = {"bigframes-api" : api_name }
946959
947960 load_table_destination = bigframes_io .random_table (self ._anonymous_dataset )
@@ -1061,7 +1074,7 @@ def read_csv(
10611074 f"{ constants .FEEDBACK_LINK } "
10621075 )
10631076
1064- job_config = bigquery . LoadJobConfig ()
1077+ job_config = self . _prepare_load_job_config ()
10651078 job_config .create_disposition = bigquery .CreateDisposition .CREATE_IF_NEEDED
10661079 job_config .source_format = bigquery .SourceFormat .CSV
10671080 job_config .write_disposition = bigquery .WriteDisposition .WRITE_EMPTY
@@ -1136,7 +1149,7 @@ def read_parquet(
11361149 table = bigframes_io .random_table (self ._anonymous_dataset )
11371150
11381151 if engine == "bigquery" :
1139- job_config = bigquery . LoadJobConfig ()
1152+ job_config = self . _prepare_load_job_config ()
11401153 job_config .create_disposition = bigquery .CreateDisposition .CREATE_IF_NEEDED
11411154 job_config .source_format = bigquery .SourceFormat .PARQUET
11421155 job_config .write_disposition = bigquery .WriteDisposition .WRITE_EMPTY
@@ -1194,7 +1207,7 @@ def read_json(
11941207 "'lines' keyword is only valid when 'orient' is 'records'."
11951208 )
11961209
1197- job_config = bigquery . LoadJobConfig ()
1210+ job_config = self . _prepare_load_job_config ()
11981211 job_config .create_disposition = bigquery .CreateDisposition .CREATE_IF_NEEDED
11991212 job_config .source_format = bigquery .SourceFormat .NEWLINE_DELIMITED_JSON
12001213 job_config .write_disposition = bigquery .WriteDisposition .WRITE_EMPTY
@@ -1518,36 +1531,84 @@ def read_gbq_function(
15181531 session = self ,
15191532 )
15201533
1534+ def _prepare_query_job_config (
1535+ self ,
1536+ job_config : Optional [bigquery .QueryJobConfig ] = None ,
1537+ ) -> bigquery .QueryJobConfig :
1538+ if job_config is None :
1539+ job_config = bigquery .QueryJobConfig ()
1540+ else :
1541+ # Create a copy so that we don't mutate the original config passed
1542+ job_config = typing .cast (
1543+ bigquery .QueryJobConfig ,
1544+ bigquery .QueryJobConfig .from_api_repr (job_config .to_api_repr ()),
1545+ )
1546+
1547+ if bigframes .options .compute .maximum_bytes_billed is not None :
1548+ job_config .maximum_bytes_billed = (
1549+ bigframes .options .compute .maximum_bytes_billed
1550+ )
1551+
1552+ if self ._bq_kms_key_name :
1553+ job_config .destination_encryption_configuration = (
1554+ bigquery .EncryptionConfiguration (kms_key_name = self ._bq_kms_key_name )
1555+ )
1556+
1557+ return job_config
1558+
1559+ def _prepare_load_job_config (self ) -> bigquery .LoadJobConfig :
1560+ # Create a copy so that we don't mutate the original config passed
1561+ job_config = bigquery .LoadJobConfig ()
1562+
1563+ if self ._bq_kms_key_name :
1564+ job_config .destination_encryption_configuration = (
1565+ bigquery .EncryptionConfiguration (kms_key_name = self ._bq_kms_key_name )
1566+ )
1567+
1568+ return job_config
1569+
1570+ def _prepare_copy_job_config (self ) -> bigquery .CopyJobConfig :
1571+ # Create a copy so that we don't mutate the original config passed
1572+ job_config = bigquery .CopyJobConfig ()
1573+
1574+ if self ._bq_kms_key_name :
1575+ job_config .destination_encryption_configuration = (
1576+ bigquery .EncryptionConfiguration (kms_key_name = self ._bq_kms_key_name )
1577+ )
1578+
1579+ return job_config
1580+
15211581 def _start_query (
15221582 self ,
15231583 sql : str ,
15241584 job_config : Optional [bigquery .job .QueryJobConfig ] = None ,
15251585 max_results : Optional [int ] = None ,
15261586 ) -> Tuple [bigquery .table .RowIterator , bigquery .QueryJob ]:
15271587 """
1528- Starts query job and waits for results.
1588+ Starts BigQuery query job and waits for results.
15291589 """
1530- job_config = self ._prepare_job_config (job_config )
1531- api_methods = log_adapter .get_and_reset_api_methods ()
1532- job_config .labels = bigframes_io .create_job_configs_labels (
1533- job_configs_labels = job_config .labels , api_methods = api_methods
1590+ job_config = self ._prepare_query_job_config (job_config )
1591+ return bigframes .session ._io .bigquery .start_query_with_client (
1592+ self .bqclient , sql , job_config , max_results
15341593 )
15351594
1536- try :
1537- query_job = self .bqclient .query (sql , job_config = job_config )
1538- except google .api_core .exceptions .Forbidden as ex :
1539- if "Drive credentials" in ex .message :
1540- ex .message += "\n Check https://cloud.google.com/bigquery/docs/query-drive-data#Google_Drive_permissions."
1541- raise
1595+ def _start_query_create_model (
1596+ self ,
1597+ sql : str ,
1598+ ) -> Tuple [bigquery .table .RowIterator , bigquery .QueryJob ]:
1599+ """
1600+ Starts BigQuery ML CREATE MODEL query job and waits for results.
1601+ """
1602+ job_config = self ._prepare_query_job_config ()
15421603
1543- opts = bigframes . options . display
1544- if opts . progress_bar is not None and not query_job . configuration . dry_run :
1545- results_iterator = formatting_helpers . wait_for_query_job (
1546- query_job , max_results , opts . progress_bar
1547- )
1548- else :
1549- results_iterator = query_job . result ( max_results = max_results )
1550- return results_iterator , query_job
1604+ # BQML expects kms_key_name through OPTIONS and not through job config,
1605+ # so we must reset any encryption set in the job config
1606+ # https://cloud.google.com/bigquery/docs/customer-managed-encryption#encrypt-model
1607+ job_config . destination_encryption_configuration = None
1608+
1609+ return bigframes . session . _io . bigquery . start_query_with_client (
1610+ self . bqclient , sql , job_config
1611+ )
15511612
15521613 def _cache_with_cluster_cols (
15531614 self , array_value : core .ArrayValue , cluster_cols : typing .Sequence [str ]
@@ -1696,19 +1757,6 @@ def _start_generic_job(self, job: formatting_helpers.GenericJob):
16961757 else :
16971758 job .result ()
16981759
1699- def _prepare_job_config (
1700- self , job_config : Optional [bigquery .QueryJobConfig ] = None
1701- ) -> bigquery .QueryJobConfig :
1702- if job_config is None :
1703- job_config = self .bqclient .default_query_job_config
1704- if job_config is None :
1705- job_config = bigquery .QueryJobConfig ()
1706- if bigframes .options .compute .maximum_bytes_billed is not None :
1707- job_config .maximum_bytes_billed = (
1708- bigframes .options .compute .maximum_bytes_billed
1709- )
1710- return job_config
1711-
17121760
17131761def connect (context : Optional [bigquery_options .BigQueryOptions ] = None ) -> Session :
17141762 return Session (context )
0 commit comments