diff --git a/samples/dbt/.dbt.yml b/samples/dbt/.dbt.yml index 98053bfc37..a2fd2ffd4c 100644 --- a/samples/dbt/.dbt.yml +++ b/samples/dbt/.dbt.yml @@ -1,3 +1,17 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + dbt_sample_project: outputs: dev: # The target environment name (e.g., dev, prod) diff --git a/samples/dbt/dbt_sample_project/dbt_project.yml b/samples/dbt/dbt_sample_project/dbt_project.yml index d12098a18a..aef376e1fc 100644 --- a/samples/dbt/dbt_sample_project/dbt_project.yml +++ b/samples/dbt/dbt_sample_project/dbt_project.yml @@ -1,3 +1,16 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # Name your project! Project names should contain only lowercase characters # and underscores. A good package name should reflect your organization's diff --git a/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py b/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py index 4c8ddf8f6c..e397549afe 100644 --- a/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py +++ b/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py @@ -1,3 +1,17 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # This example demonstrates one of the most general usages of transforming raw # BigQuery data into a processed table using a dbt Python model with BigFrames. # See more from: https://cloud.google.com/bigquery/docs/dataframes-dbt. @@ -32,7 +46,13 @@ def model(dbt, session): table = "bigquery-public-data.epa_historical_air_quality.temperature_hourly_summary" # Define the specific columns to select from the BigQuery table. - columns = ["state_name", "county_name", "date_local", "time_local", "sample_measurement"] + columns = [ + "state_name", + "county_name", + "date_local", + "time_local", + "sample_measurement", + ] # Read data from the specified BigQuery table into a BigFrames DataFrame. df = session.read_gbq(table, columns=columns) @@ -44,14 +64,16 @@ def model(dbt, session): # Group the DataFrame by 'state_name', 'county_name', and 'date_local'. For # each group, calculate the minimum and maximum of the 'sample_measurement' # column. The result will be a BigFrames DataFrame with a MultiIndex. - result = df.groupby(["state_name", "county_name", "date_local"])["sample_measurement"]\ - .agg(["min", "max"]) + result = df.groupby(["state_name", "county_name", "date_local"])[ + "sample_measurement" + ].agg(["min", "max"]) # Rename some columns and convert the MultiIndex of the 'result' DataFrame # into regular columns. This flattens the DataFrame so 'state_name', # 'county_name', and 'date_local' become regular columns again. - result = result.rename(columns={'min': 'min_temperature', 'max': 'max_temperature'})\ - .reset_index() + result = result.rename( + columns={"min": "min_temperature", "max": "max_temperature"} + ).reset_index() # Return the processed BigFrames DataFrame. # In a dbt Python model, this DataFrame will be materialized as a table diff --git a/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py b/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py index 019e503393..3795d0eee9 100644 --- a/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py +++ b/samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py @@ -1,6 +1,20 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # This example demonstrates how to build an **incremental dbt Python model** # using BigFrames. -# +# # Incremental models are essential for efficiently processing large datasets by # only transforming new or changed data, rather than reprocessing the entire # dataset every time. If the target table already exists, dbt will perform a @@ -13,8 +27,6 @@ # directly within BigQuery, leveraging BigQuery's scalability. -import bigframes.pandas as bpd - def model(dbt, session): # Optional: override settings from dbt_project.yml. # When both are set, dbt.config takes precedence over dbt_project.yml. @@ -24,9 +36,9 @@ def model(dbt, session): submission_method="bigframes", # Materialize this model as an 'incremental' table. This tells dbt to # only process new or updated data on subsequent runs. - materialized='incremental', + materialized="incremental", # Use MERGE strategy to update rows during incremental runs. - incremental_strategy='merge', + incremental_strategy="merge", # Define the composite key that uniquely identifies a row in the # target table. This key is used by the 'merge' strategy to match # existing rows for updates during incremental runs. @@ -41,7 +53,7 @@ def model(dbt, session): # Define a BigFrames UDF to generate a temperature description. # BigFrames UDFs allow you to define custom Python logic that executes # directly within BigQuery. This is powerful for complex transformations. - @bpd.udf(dataset='dbt_sample_dataset', name='describe_udf') + @session.udf(dataset="dbt_sample_dataset", name="describe_udf") def describe( max_temperature: float, min_temperature: float,