databricks · denik · Apr 1, 2025 · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025
@@ -106,6 +106,8 @@ jobs:
         with:
           version: "0.9.1"
           args: "format --check"
+      - name: Run whitespace check
+        run: python tools/validate_whitespace.py
 
   validate-bundle-schema:
     needs: cleanups

@@ -2,4 +2,4 @@
     "recommendations": [
         "bierner.markdown-mermaid"
     ]
-}
+}
@@ -0,0 +1,45 @@
+# Files to exempt from whitespace check. Can contain patterns in Python's glob module format.
+# Generated files:
+.codegen/_openapi_sha
+.release_metadata.json
+bundle/schema/jsonschema.json
+experimental/python/docs/images/databricks-logo.svg
+**/*.dist-info/METADATA
+**/*.dist-info/WHEEL
+
+# Binary files
+**/*.zip
+**/*.whl
+
+# "bundle run" has trailing whitespace:
+acceptance/bundle/integration_whl/*/output.txt
+
+# "bundle init" has trailing whitespace:
+acceptance/bundle/templates-machinery/helpers-error/output.txt
+
+# "bundle deploy" with apps has trailing whitespace:
+acceptance/bundle/apps/config_section/output.txt
+integration/bundle/testdata/apps/bundle_deploy.txt
+
+# Extra whitespace within the default template:
+acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff
+acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml
+acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml
+
+# Extra whitespace in command help:
+acceptance/cmd/workspace/apps/output.txt
+
+# Extra whitespace in generated commands:
+cmd/workspace/**/*.go
+cmd/account/**/*.go
+
+# Extra whitespace in generated Python models
+experimental/python/databricks/bundles/*/_models/*.py
+
+# Developed elsewhere:
+internal/genkit/tagging.py
+
+# Docsgen:
+bundle/docsgen/output/resources.md
+bundle/docsgen/output/reference.md
+bundle/docsgen/testdata/anchors.md
@@ -6,4 +6,3 @@ resources:
     cluster1:
       cluster_name: "DEFAULT Test Cluster"
       spark_version: '16.2.x-scala2.12'
-
@@ -9,7 +9,6 @@ resources:
       cluster_name: "DEFAULT Test Cluster"
       spark_version: '16.2.x-scala2.12'
 
-
 >>> [CLI] clusters get [CLUSTER-ID]
 {
   "cluster_name": "DEFAULT Test Cluster"

@@ -9,4 +9,3 @@ resources:
       embed_credentials: true
       file_path: "sample-dashboard.lvdash.json"
       parent_path: /Users/$CURRENT_USER_NAME
-
@@ -8,4 +8,3 @@ resources:
       warehouse_id: $TEST_DEFAULT_WAREHOUSE_ID
       embed_credentials: true
       file_path: "sample-dashboard.lvdash.json"
-
@@ -24,6 +24,3 @@ trace $CLI bundle deployment unbind dashboard1
 trace $CLI bundle deploy --auto-approve
 
 trace $CLI lakeview get "${DASHBOARD_ID}" | jq '{display_name, lifecycle_state}'
-
-
-
@@ -5,4 +5,3 @@ resources:
   experiments:
     experiment1:
       name: $EXPERIMENT_NAME
-
@@ -1,5 +1,5 @@
 
-=== Bind experiment test: 
+=== Bind experiment test:
 === Substitute variables in the template
 === Create a pre-defined experiment
 === Bind experiment: Updating deployment state...
@@ -27,5 +27,5 @@ Destroy complete!
   "lifecycle_stage": "active"
 }
 
-=== Test cleanup: 
+=== Test cleanup:
 === Delete the pre-defined experiment: 0
@@ -1,4 +1,4 @@
-title "Bind experiment test: "
+title "Bind experiment test:"
 
 title "Substitute variables in the template"
 BUNDLE_NAME_SUFFIX=$(uuid)
@@ -16,7 +16,7 @@ title "Create a pre-defined experiment"
 EXPERIMENT_ID=$($CLI experiments create-experiment "${EXPERIMENT_NAME}" | jq -r '.experiment_id')
 
 cleanupRemoveExperiment() {
-    title "Test cleanup: "
+    title "Test cleanup:"
     title "Delete the pre-defined experiment: "
     $CLI experiments delete-experiment ${EXPERIMENT_ID}
     echo $?

@@ -29,4 +29,4 @@ Response.Body = '''
 Pattern = "POST /api/2.0/mlflow/experiments/update"
 
 [[Server]]
-Pattern = "POST /api/2.0/mlflow/experiments/delete"
+Pattern = "POST /api/2.0/mlflow/experiments/delete"
@@ -7,4 +7,3 @@ resources:
       name: $MODEL_NAME
       catalog_name: $CATALOG_NAME
       schema_name: $SCHEMA_NAME
-
@@ -8,7 +8,6 @@ resources:
       catalog_name: main
       schema_name: test-schema-rmodel-[UUID]
 
-
 >>> [CLI] schemas create test-schema-rmodel-[UUID] main
 {
   "full_name": "main.test-schema-rmodel-[UUID]",

@@ -33,4 +33,3 @@ trace $CLI bundle destroy --auto-approve
 
 # Read the pre-defined model again (expecting it still exists and is not deleted):
 trace $CLI registered-models get "${MODEL_FULL_NAME}" | jq '{full_name, schema_name, name}'
-
@@ -7,4 +7,3 @@ resources:
       name: $SCHEMA_NAME
       catalog_name: main
       comment: This schema was created from DABs
-
@@ -1,6 +1,6 @@
 
-=== Bind schema test: 
-=== Substitute variables in the template: 
+=== Bind schema test:
+=== Substitute variables in the template:
 === Create a pre-defined schema: {
   "full_name": "main.test-schema-[UUID]",
   "catalog_name": "main"
@@ -33,5 +33,5 @@ Destroy complete!
   "comment": "This schema was created from DABs"
 }
 
-=== Test cleanup: 
+=== Test cleanup:
 === Delete the pre-defined schema test-schema-[UUID]: 0
@@ -1,6 +1,6 @@
-title "Bind schema test: "
+title "Bind schema test:"
 
-title "Substitute variables in the template: "
+title "Substitute variables in the template:"
 export BUNDLE_NAME_SUFFIX=$(uuid)
 
 export SCHEMA_NAME="test-schema-$(uuid)"
@@ -14,7 +14,7 @@ CATALOG_NAME=main
 $CLI schemas create ${SCHEMA_NAME} ${CATALOG_NAME} | jq '{full_name, catalog_name}'
 
 cleanupRemoveSchema() {
-    title "Test cleanup: "
+    title "Test cleanup:"
     title "Delete the pre-defined schema ${SCHEMA_NAME}: "
     $CLI schemas delete ${CATALOG_NAME}.${SCHEMA_NAME}
     echo $?

@@ -27,4 +27,4 @@ Response.Body = '''
 Pattern = "PATCH /api/2.1/unity-catalog/schemas/{schema_fullname}"
 
 [[Server]]
-Pattern = "DELETE /api/2.1/unity-catalog/schemas/{schema_fullname}"
+Pattern = "DELETE /api/2.1/unity-catalog/schemas/{schema_fullname}"
@@ -5,7 +5,7 @@
   "catalog_name": "main"
 }
 
-=== Create a pre-defined volume: 
+=== Create a pre-defined volume:
 >>> [CLI] bundle deployment bind volume1 main.test-schema-[UUID].volume-[UUID] --auto-approve
 Updating deployment state...
 Successfully bound volume with an id 'main.test-schema-[UUID].volume-[UUID]'. Run 'bundle deploy' to deploy changes to your workspace

@@ -13,7 +13,7 @@ VOLUME_TYPE="MANAGED"
 
 trace $CLI schemas create "${SCHEMA_NAME}" ${CATALOG_NAME} | jq '{full_name, catalog_name}'
 
-title "Create a pre-defined volume: "
+title "Create a pre-defined volume:"
 VOLUME_FULL_NAME=$($CLI volumes create "${CATALOG_NAME}" "${SCHEMA_NAME}" "${VOLUME_NAME}" "${VOLUME_TYPE}" | jq -r '.full_name')
 
 cleanupRemoveVolume() {

@@ -1,4 +1,3 @@
 include:
   - b.yml
   - c.yml
-
@@ -4,4 +4,4 @@ Hello!
     "userName": "[USERNAME]"
 }
 
-Footer
+Footer
@@ -4,4 +4,4 @@ Hello!
     "userName": "[USERNAME]"
 }
 
-Footer
+Footer
@@ -8,8 +8,8 @@ echo Hello! >> out_dir_a/output.txt
 echo Hello! >> out_dir_b/output.txt
 
 curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me >> out_dir_a/output.txt
-printf "\n\nFooter" >> out_dir_a/output.txt
-printf '{\n    "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n    "userName": "[USERNAME]"\n}\n\nFooter' >> out_dir_b/output.txt
+printf "\n\nFooter\n" >> out_dir_a/output.txt
+printf '{\n    "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n    "userName": "[USERNAME]"\n}\n\nFooter\n' >> out_dir_b/output.txt
 
 # Unlike regular diff, diff.py will apply replacements first before doing the comparison
 trace diff.py out_dir_a out_dir_b

@@ -10,7 +10,7 @@ docs:
 
 lint:
 	# check if lock matches the project metadata
-	uv lock --check 
+	uv lock --check
 
 	uv run ruff check $(sources)
 	uv run pyright

@@ -28,11 +28,11 @@ class Bundle:
     """
     Values of bundle variables resolved for selected target. Bundle variables are defined in databricks.yml.
     For accessing variables as structured data, use :meth:`resolve_variable`.
-    
+
     Example:
-    
+
     .. code-block:: yaml
-    
+
         variables:
           default_dbr_version:
             description: Default version of Databricks Runtime

@@ -1 +1 @@
-Main-Class: PrintArgs
+Main-Class: PrintArgs
@@ -10,4 +10,3 @@ resources:
       name: test-schema-{{.unique_id}}
       catalog_name: main
       comment: This schema was created from DABs
-
@@ -6,4 +6,4 @@ This file only template directives; it is skipped for the actual output.
 
 {{if eq .project_name "dbt"}}
 {{fail "Project name 'dbt' is not supported"}}
-{{end}}
+{{end}}
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+import os
+import re
+import sys
+import glob
+import subprocess
+
+
+def load_ignores():
+    ignores = set()
+    fail = False
+    for ind, line in enumerate(open(".wsignore")):
+        line = line.strip()
+        if not line:
+            continue
+        if line.startswith("#"):
+            continue
+        expanded = glob.glob(line, recursive=True)
+        if len(expanded) == 0:
+            print(f".wsignore:{ind + 1}: No matches for line: {line}")
+            fail = True
+        ignores.update(expanded)
+    if fail:
+        sys.exit(1)
+    return ignores
+
+
+def count_trailing_newlines(s):
+    match = re.search(r"(\n+)$", s)
+    return len(match.group(1)) if match else 0
+
+
+def validate_contents(data):
+    if not data:
+        return
+    try:
+        text = data.decode("utf")
+    except Exception as ex:
+        yield f" Failed to decode utf-8: {ex}"
+        return
+
+    for i, line in enumerate(text.split("\n")):
+        if not line:
+            continue
+        if line.strip() == "":
+            yield f"{i + 1}: Whitespace-only line"
+            continue
+        if line.rstrip() != line:
+            yield f"{i + 1}: Trailing whitespace {line[-200:]!r}"
+
+    newlines = count_trailing_newlines(text)
+
+    if newlines == 0:
+        yield " File does not end with a newline"
+
+    if newlines >= 2:
+        yield f" {newlines} newlines at the end"
+
+
+def main():
+    quiet = "-q" in sys.argv
+    files = subprocess.check_output(["git", "ls-files"], encoding="utf-8").split()
+    ignores = load_ignores()
+    n_checked = 0
+    n_skipped = 0
+    n_errored = 0
+    for f in files:
+        if not os.path.isfile(f):
+            n_skipped += 1
+            continue
+        if f in ignores:
+            n_skipped += 1
+            continue
+        data = open(f, "rb").read()
+        error = False
+        for msg in validate_contents(data):
+            print(f"{f}:{msg}")
+            error = True
+        n_checked += 1
+        n_errored += 1 if error else 0
+
+    if not quiet:
+        sys.stderr.write(f"{n_checked} checked, {n_skipped} skipped, {n_errored} failed.\n")
+    sys.exit(1 if n_errored else 0)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except BrokenPipeError:
+        sys.exit(1)
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,4 +2,4 @@ @@
         "recommendations": [
             "bierner.markdown-mermaid"
         ]
-    }
+    }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,4 +6,3 @@ resources:
		cluster1:
		cluster_name: "DEFAULT Test Cluster"
		spark_version: '16.2.x-scala2.12'
Original file line number	Diff line number	Diff line change
Expand Up		@@ -9,4 +9,3 @@ resources:
		embed_credentials: true
		file_path: "sample-dashboard.lvdash.json"
		parent_path: /Users/$CURRENT_USER_NAME
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,4 +8,3 @@ resources:
		warehouse_id: $TEST_DEFAULT_WAREHOUSE_ID
		embed_credentials: true
		file_path: "sample-dashboard.lvdash.json"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -24,6 +24,3 @@ trace $CLI bundle deployment unbind dashboard1
		trace $CLI bundle deploy --auto-approve

		trace $CLI lakeview get "${DASHBOARD_ID}" \| jq '{display_name, lifecycle_state}'
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,4 +5,3 @@ resources:
		experiments:
		experiment1:
		name: $EXPERIMENT_NAME
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,4 +7,3 @@ resources:
		name: $MODEL_NAME
		catalog_name: $CATALOG_NAME
		schema_name: $SCHEMA_NAME
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,4 +33,3 @@ trace $CLI bundle destroy --auto-approve

		# Read the pre-defined model again (expecting it still exists and is not deleted):
		trace $CLI registered-models get "${MODEL_FULL_NAME}" \| jq '{full_name, schema_name, name}'
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,4 +7,3 @@ resources:
		name: $SCHEMA_NAME
		catalog_name: main
		comment: This schema was created from DABs
Original file line number	Diff line number	Diff line change
Expand Up		@@ -10,4 +10,3 @@ resources:
		name: test-schema-{{.unique_id}}
		catalog_name: main
		comment: This schema was created from DABs