improve comments

jialuoo · jialuoo · commit 86183c964c2c · 2025-07-10T20:12:20.000Z
diff --git a/dbt_bigframes_integration/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py b/dbt_bigframes_integration/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py
@@ -1,20 +1,31 @@
-# This example demonstrates one of the most general usage of transforming raw
-# BigQuery data into a processed table using dbt in BigFrames mode.
+# This example demonstrates one of the most general usages of transforming raw
+# BigQuery data into a processed table using a dbt Python model with BigFrames.
+# See more from: https://cloud.google.com/bigquery/docs/dataframes-dbt.
 #
-# Key defaults when using BigFrames in dbt:
-# - The default materialization is 'table' unless specified otherwise.
-# - The default timeout for the job is 3600 seconds (60 minutes).
+# Key defaults when using BigFrames in a dbt Python model for BigQuery:
+# - The default materialization is 'table' unless specified otherwise. This
+#   means dbt will create a new BigQuery table from the result of this model.
+# - The default timeout for the job is 3600 seconds (60 minutes). This can be
+#   adjusted if your processing requires more time.
 # - If no runtime template is provided, dbt will automatically create and reuse
-#   a default one.
+#   a default one for executing the Python code in BigQuery.
 #
-# This code sample shows a basic pattern for reading a BigQuery public dataset,
-# processing it using pandas-like operations, and outputting a cleaned table.
+# BigFrames provides a pandas-like API for BigQuery data, enabling familiar
+# data manipulation directly within your dbt project. This code sample
+# illustrates a basic pattern for:
+# 1. Reading data from an existing BigQuery dataset.
+# 2. Processing it using pandas-like DataFrame operations powered by BigFrames.
+# 3. Outputting a cleaned and transformed table, managed by dbt.
 
 
 def model(dbt, session):
-    # Optional: override settings from dbt_project.yml. When both are set,
-    # dbt.config takes precedence over dbt_project.yml.
-    # Use BigFrames mode to execute the Python model.
+    # Optional: Override settings from your dbt_project.yml file.
+    # When both are set, dbt.config takes precedence over dbt_project.yml.
+    #
+    # Use `dbt.config(submission_method="bigframes")` to tell dbt to execute
+    # this Python model using BigQuery DataFrames (BigFrames). This allows you
+    # to write pandas-like code that operates directly on BigQuery data
+    # without needing to pull all data into memory.
     dbt.config(submission_method="bigframes")
 
     # Define the BigQuery table path from which to read data.
@@ -24,7 +35,6 @@ def model(dbt, session):
     columns = ["state_name", "county_name", "date_local", "time_local", "sample_measurement"]
 
     # Read data from the specified BigQuery table into a BigFrames DataFrame.
-    # BigFrames allows you to interact with BigQuery tables using a pandas-like API.
     df = session.read_gbq(table, columns=columns)
 
     # Sort the DataFrame by the specified columns. This prepares the data for
diff --git a/dbt_bigframes_integration/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py b/dbt_bigframes_integration/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py
@@ -1,12 +1,16 @@
-# This example demonstrates how to build an incremental model.
-#
-# It applies lightweight, row-level logic to update or insert records into a
-# target BigQuery table. If the target table already exists, dbt will perform a
+# This example demonstrates how to build an **incremental dbt Python model**
+# using BigFrames.
+# 
+# Incremental models are essential for efficiently processing large datasets by
+# only transforming new or changed data, rather than reprocessing the entire
+# dataset every time. If the target table already exists, dbt will perform a
 # merge based on the specified unique keys; otherwise, it will create a new
 # table automatically.
 #
-# It also defines and applies a BigFrames UDF to add a descriptive summary
-# column based on temperature data.
+# This model also showcases the definition and application of a **BigFrames
+# User-Defined Function (UDF)** to add a descriptive summary column based on
+# temperature data. BigFrames UDFs allow you to execute custom Python logic
+# directly within BigQuery, leveraging BigQuery's scalability.
 
 
 import bigframes.pandas as bpd
@@ -15,20 +19,28 @@ def model(dbt, session):
     # Optional: override settings from dbt_project.yml.
     # When both are set, dbt.config takes precedence over dbt_project.yml.
     dbt.config(
-        # Use BigFrames mode to execute the Python model.
+        # Use BigFrames mode to execute this Python model. This enables
+        # pandas-like operations directly on BigQuery data.
         submission_method="bigframes",
-        # Materialize as an incremental model.
+        # Materialize this model as an 'incremental' table. This tells dbt to
+        # only process new or updated data on subsequent runs.
         materialized='incremental',
         # Use MERGE strategy to update rows during incremental runs.
         incremental_strategy='merge',
-        # Composite key to match existing rows for updates.
+        # Define the composite key that uniquely identifies a row in the
+        # target table. This key is used by the 'merge' strategy to match
+        # existing rows for updates during incremental runs.
         unique_key=["state_name", "county_name", "date_local"],
     )
 
-    # Reference an upstream dbt model or table as a DataFrame input.
+    # Reference an upstream dbt model or an existing BigQuery table as a
+    # BigFrames DataFrame. It allows you to seamlessly use the output of another
+    # dbt model as input to this one.
     df = dbt.ref("dbt_bigframes_code_sample_1")
 
     # Define a BigFrames UDF to generate a temperature description.
+    # BigFrames UDFs allow you to define custom Python logic that executes
+    # directly within BigQuery. This is powerful for complex transformations.
     @bpd.udf(dataset='dbt_sample_dataset', name='describe_udf')
     def describe(
         max_temperature: float,
@@ -48,5 +60,8 @@ def describe(
     # Apply the UDF using combine and store the result in a column "describe".
     df["describe"] = df["max_temperature"].combine(df["min_temperature"], describe)
 
-    # Return the transformed DataFrame as the final dbt model output.
+    # Return the transformed BigFrames DataFrame.
+    # This DataFrame will be the final output of your incremental dbt model.
+    # On subsequent runs, only new or changed rows will be processed and merged
+    # into the target BigQuery table based on the `unique_key`.
     return df