1- # This example demonstrates how to build an incremental model.
2- #
3- # It applies lightweight, row-level logic to update or insert records into a
4- # target BigQuery table. If the target table already exists, dbt will perform a
1+ # This example demonstrates how to build an **incremental dbt Python model**
2+ # using BigFrames.
3+ #
4+ # Incremental models are essential for efficiently processing large datasets by
5+ # only transforming new or changed data, rather than reprocessing the entire
6+ # dataset every time. If the target table already exists, dbt will perform a
57# merge based on the specified unique keys; otherwise, it will create a new
68# table automatically.
79#
8- # It also defines and applies a BigFrames UDF to add a descriptive summary
9- # column based on temperature data.
10+ # This model also showcases the definition and application of a **BigFrames
11+ # User-Defined Function (UDF)** to add a descriptive summary column based on
12+ # temperature data. BigFrames UDFs allow you to execute custom Python logic
13+ # directly within BigQuery, leveraging BigQuery's scalability.
1014
1115
1216import bigframes .pandas as bpd
@@ -15,20 +19,28 @@ def model(dbt, session):
1519 # Optional: override settings from dbt_project.yml.
1620 # When both are set, dbt.config takes precedence over dbt_project.yml.
1721 dbt .config (
18- # Use BigFrames mode to execute the Python model.
22+ # Use BigFrames mode to execute this Python model. This enables
23+ # pandas-like operations directly on BigQuery data.
1924 submission_method = "bigframes" ,
20- # Materialize as an incremental model.
25+ # Materialize this model as an 'incremental' table. This tells dbt to
26+ # only process new or updated data on subsequent runs.
2127 materialized = 'incremental' ,
2228 # Use MERGE strategy to update rows during incremental runs.
2329 incremental_strategy = 'merge' ,
24- # Composite key to match existing rows for updates.
30+ # Define the composite key that uniquely identifies a row in the
31+ # target table. This key is used by the 'merge' strategy to match
32+ # existing rows for updates during incremental runs.
2533 unique_key = ["state_name" , "county_name" , "date_local" ],
2634 )
2735
28- # Reference an upstream dbt model or table as a DataFrame input.
36+ # Reference an upstream dbt model or an existing BigQuery table as a
37+ # BigFrames DataFrame. It allows you to seamlessly use the output of another
38+ # dbt model as input to this one.
2939 df = dbt .ref ("dbt_bigframes_code_sample_1" )
3040
3141 # Define a BigFrames UDF to generate a temperature description.
42+ # BigFrames UDFs allow you to define custom Python logic that executes
43+ # directly within BigQuery. This is powerful for complex transformations.
3244 @bpd .udf (dataset = 'dbt_sample_dataset' , name = 'describe_udf' )
3345 def describe (
3446 max_temperature : float ,
@@ -48,5 +60,8 @@ def describe(
4860 # Apply the UDF using combine and store the result in a column "describe".
4961 df ["describe" ] = df ["max_temperature" ].combine (df ["min_temperature" ], describe )
5062
51- # Return the transformed DataFrame as the final dbt model output.
63+ # Return the transformed BigFrames DataFrame.
64+ # This DataFrame will be the final output of your incremental dbt model.
65+ # On subsequent runs, only new or changed rows will be processed and merged
66+ # into the target BigQuery table based on the `unique_key`.
5267 return df
0 commit comments