Skip to content

Commit 86183c9

Browse files
committed
improve comments
1 parent 55a6fa4 commit 86183c9

File tree

2 files changed

+48
-23
lines changed

2 files changed

+48
-23
lines changed

dbt_bigframes_integration/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,31 @@
1-
# This example demonstrates one of the most general usage of transforming raw
2-
# BigQuery data into a processed table using dbt in BigFrames mode.
1+
# This example demonstrates one of the most general usages of transforming raw
2+
# BigQuery data into a processed table using a dbt Python model with BigFrames.
3+
# See more from: https://cloud.google.com/bigquery/docs/dataframes-dbt.
34
#
4-
# Key defaults when using BigFrames in dbt:
5-
# - The default materialization is 'table' unless specified otherwise.
6-
# - The default timeout for the job is 3600 seconds (60 minutes).
5+
# Key defaults when using BigFrames in a dbt Python model for BigQuery:
6+
# - The default materialization is 'table' unless specified otherwise. This
7+
# means dbt will create a new BigQuery table from the result of this model.
8+
# - The default timeout for the job is 3600 seconds (60 minutes). This can be
9+
# adjusted if your processing requires more time.
710
# - If no runtime template is provided, dbt will automatically create and reuse
8-
# a default one.
11+
# a default one for executing the Python code in BigQuery.
912
#
10-
# This code sample shows a basic pattern for reading a BigQuery public dataset,
11-
# processing it using pandas-like operations, and outputting a cleaned table.
13+
# BigFrames provides a pandas-like API for BigQuery data, enabling familiar
14+
# data manipulation directly within your dbt project. This code sample
15+
# illustrates a basic pattern for:
16+
# 1. Reading data from an existing BigQuery dataset.
17+
# 2. Processing it using pandas-like DataFrame operations powered by BigFrames.
18+
# 3. Outputting a cleaned and transformed table, managed by dbt.
1219

1320

1421
def model(dbt, session):
15-
# Optional: override settings from dbt_project.yml. When both are set,
16-
# dbt.config takes precedence over dbt_project.yml.
17-
# Use BigFrames mode to execute the Python model.
22+
# Optional: Override settings from your dbt_project.yml file.
23+
# When both are set, dbt.config takes precedence over dbt_project.yml.
24+
#
25+
# Use `dbt.config(submission_method="bigframes")` to tell dbt to execute
26+
# this Python model using BigQuery DataFrames (BigFrames). This allows you
27+
# to write pandas-like code that operates directly on BigQuery data
28+
# without needing to pull all data into memory.
1829
dbt.config(submission_method="bigframes")
1930

2031
# Define the BigQuery table path from which to read data.
@@ -24,7 +35,6 @@ def model(dbt, session):
2435
columns = ["state_name", "county_name", "date_local", "time_local", "sample_measurement"]
2536

2637
# Read data from the specified BigQuery table into a BigFrames DataFrame.
27-
# BigFrames allows you to interact with BigQuery tables using a pandas-like API.
2838
df = session.read_gbq(table, columns=columns)
2939

3040
# Sort the DataFrame by the specified columns. This prepares the data for

dbt_bigframes_integration/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1-
# This example demonstrates how to build an incremental model.
2-
#
3-
# It applies lightweight, row-level logic to update or insert records into a
4-
# target BigQuery table. If the target table already exists, dbt will perform a
1+
# This example demonstrates how to build an **incremental dbt Python model**
2+
# using BigFrames.
3+
#
4+
# Incremental models are essential for efficiently processing large datasets by
5+
# only transforming new or changed data, rather than reprocessing the entire
6+
# dataset every time. If the target table already exists, dbt will perform a
57
# merge based on the specified unique keys; otherwise, it will create a new
68
# table automatically.
79
#
8-
# It also defines and applies a BigFrames UDF to add a descriptive summary
9-
# column based on temperature data.
10+
# This model also showcases the definition and application of a **BigFrames
11+
# User-Defined Function (UDF)** to add a descriptive summary column based on
12+
# temperature data. BigFrames UDFs allow you to execute custom Python logic
13+
# directly within BigQuery, leveraging BigQuery's scalability.
1014

1115

1216
import bigframes.pandas as bpd
@@ -15,20 +19,28 @@ def model(dbt, session):
1519
# Optional: override settings from dbt_project.yml.
1620
# When both are set, dbt.config takes precedence over dbt_project.yml.
1721
dbt.config(
18-
# Use BigFrames mode to execute the Python model.
22+
# Use BigFrames mode to execute this Python model. This enables
23+
# pandas-like operations directly on BigQuery data.
1924
submission_method="bigframes",
20-
# Materialize as an incremental model.
25+
# Materialize this model as an 'incremental' table. This tells dbt to
26+
# only process new or updated data on subsequent runs.
2127
materialized='incremental',
2228
# Use MERGE strategy to update rows during incremental runs.
2329
incremental_strategy='merge',
24-
# Composite key to match existing rows for updates.
30+
# Define the composite key that uniquely identifies a row in the
31+
# target table. This key is used by the 'merge' strategy to match
32+
# existing rows for updates during incremental runs.
2533
unique_key=["state_name", "county_name", "date_local"],
2634
)
2735

28-
# Reference an upstream dbt model or table as a DataFrame input.
36+
# Reference an upstream dbt model or an existing BigQuery table as a
37+
# BigFrames DataFrame. It allows you to seamlessly use the output of another
38+
# dbt model as input to this one.
2939
df = dbt.ref("dbt_bigframes_code_sample_1")
3040

3141
# Define a BigFrames UDF to generate a temperature description.
42+
# BigFrames UDFs allow you to define custom Python logic that executes
43+
# directly within BigQuery. This is powerful for complex transformations.
3244
@bpd.udf(dataset='dbt_sample_dataset', name='describe_udf')
3345
def describe(
3446
max_temperature: float,
@@ -48,5 +60,8 @@ def describe(
4860
# Apply the UDF using combine and store the result in a column "describe".
4961
df["describe"] = df["max_temperature"].combine(df["min_temperature"], describe)
5062

51-
# Return the transformed DataFrame as the final dbt model output.
63+
# Return the transformed BigFrames DataFrame.
64+
# This DataFrame will be the final output of your incremental dbt model.
65+
# On subsequent runs, only new or changed rows will be processed and merged
66+
# into the target BigQuery table based on the `unique_key`.
5267
return df

0 commit comments

Comments
 (0)