diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index ab904a2215..b5db7f79bb 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -106,6 +106,8 @@ jobs: with: version: "0.9.1" args: "format --check" + - name: Run whitespace check + run: python tools/validate_whitespace.py validate-bundle-schema: needs: cleanups diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 9bf6938707..92cb5a1e90 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -2,4 +2,4 @@ "recommendations": [ "bierner.markdown-mermaid" ] -} \ No newline at end of file +} diff --git a/.wsignore b/.wsignore new file mode 100644 index 0000000000..0de8777938 --- /dev/null +++ b/.wsignore @@ -0,0 +1,45 @@ +# Files to exempt from whitespace check. Can contain patterns in Python's glob module format. +# Generated files: +.codegen/_openapi_sha +.release_metadata.json +bundle/schema/jsonschema.json +experimental/python/docs/images/databricks-logo.svg +**/*.dist-info/METADATA +**/*.dist-info/WHEEL + +# Binary files +**/*.zip +**/*.whl + +# "bundle run" has trailing whitespace: +acceptance/bundle/integration_whl/*/output.txt + +# "bundle init" has trailing whitespace: +acceptance/bundle/templates-machinery/helpers-error/output.txt + +# "bundle deploy" with apps has trailing whitespace: +acceptance/bundle/apps/config_section/output.txt +integration/bundle/testdata/apps/bundle_deploy.txt + +# Extra whitespace within the default template: +acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff +acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml +acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml + +# Extra whitespace in command help: +acceptance/cmd/workspace/apps/output.txt + +# Extra whitespace in generated commands: +cmd/workspace/**/*.go +cmd/account/**/*.go + +# Extra whitespace in generated Python models +experimental/python/databricks/bundles/*/_models/*.py + +# Developed elsewhere: +internal/genkit/tagging.py + +# Docsgen: +bundle/docsgen/output/resources.md +bundle/docsgen/output/reference.md +bundle/docsgen/testdata/anchors.md diff --git a/acceptance/bundle/deployment/bind/cluster/databricks.yml.tmpl b/acceptance/bundle/deployment/bind/cluster/databricks.yml.tmpl index 6206f0ad31..c470f3be41 100644 --- a/acceptance/bundle/deployment/bind/cluster/databricks.yml.tmpl +++ b/acceptance/bundle/deployment/bind/cluster/databricks.yml.tmpl @@ -6,4 +6,3 @@ resources: cluster1: cluster_name: "DEFAULT Test Cluster" spark_version: '16.2.x-scala2.12' - diff --git a/acceptance/bundle/deployment/bind/cluster/output.txt b/acceptance/bundle/deployment/bind/cluster/output.txt index 6510019470..64e6aec123 100644 --- a/acceptance/bundle/deployment/bind/cluster/output.txt +++ b/acceptance/bundle/deployment/bind/cluster/output.txt @@ -9,7 +9,6 @@ resources: cluster_name: "DEFAULT Test Cluster" spark_version: '16.2.x-scala2.12' - >>> [CLI] clusters get [CLUSTER-ID] { "cluster_name": "DEFAULT Test Cluster" diff --git a/acceptance/bundle/deployment/bind/dashboard/databricks.yml.tmpl b/acceptance/bundle/deployment/bind/dashboard/databricks.yml.tmpl index d0541fda3a..86db67bcd3 100644 --- a/acceptance/bundle/deployment/bind/dashboard/databricks.yml.tmpl +++ b/acceptance/bundle/deployment/bind/dashboard/databricks.yml.tmpl @@ -9,4 +9,3 @@ resources: embed_credentials: true file_path: "sample-dashboard.lvdash.json" parent_path: /Users/$CURRENT_USER_NAME - diff --git a/acceptance/bundle/deployment/bind/dashboard/recreation/databricks.yml.tmpl b/acceptance/bundle/deployment/bind/dashboard/recreation/databricks.yml.tmpl index ab2971880b..f30be2b622 100644 --- a/acceptance/bundle/deployment/bind/dashboard/recreation/databricks.yml.tmpl +++ b/acceptance/bundle/deployment/bind/dashboard/recreation/databricks.yml.tmpl @@ -8,4 +8,3 @@ resources: warehouse_id: $TEST_DEFAULT_WAREHOUSE_ID embed_credentials: true file_path: "sample-dashboard.lvdash.json" - diff --git a/acceptance/bundle/deployment/bind/dashboard/recreation/script b/acceptance/bundle/deployment/bind/dashboard/recreation/script index e2467a6504..8fc4825323 100644 --- a/acceptance/bundle/deployment/bind/dashboard/recreation/script +++ b/acceptance/bundle/deployment/bind/dashboard/recreation/script @@ -24,6 +24,3 @@ trace $CLI bundle deployment unbind dashboard1 trace $CLI bundle deploy --auto-approve trace $CLI lakeview get "${DASHBOARD_ID}" | jq '{display_name, lifecycle_state}' - - - diff --git a/acceptance/bundle/deployment/bind/experiment/databricks.yml b/acceptance/bundle/deployment/bind/experiment/databricks.yml index c77da2d887..a718b4f965 100644 --- a/acceptance/bundle/deployment/bind/experiment/databricks.yml +++ b/acceptance/bundle/deployment/bind/experiment/databricks.yml @@ -5,4 +5,3 @@ resources: experiments: experiment1: name: $EXPERIMENT_NAME - diff --git a/acceptance/bundle/deployment/bind/experiment/output.txt b/acceptance/bundle/deployment/bind/experiment/output.txt index 65dff7911f..77c08e9de0 100644 --- a/acceptance/bundle/deployment/bind/experiment/output.txt +++ b/acceptance/bundle/deployment/bind/experiment/output.txt @@ -1,5 +1,5 @@ -=== Bind experiment test: +=== Bind experiment test: === Substitute variables in the template === Create a pre-defined experiment === Bind experiment: Updating deployment state... @@ -27,5 +27,5 @@ Destroy complete! "lifecycle_stage": "active" } -=== Test cleanup: +=== Test cleanup: === Delete the pre-defined experiment: 0 diff --git a/acceptance/bundle/deployment/bind/experiment/script b/acceptance/bundle/deployment/bind/experiment/script index 46a631aea0..1fe18aaf99 100644 --- a/acceptance/bundle/deployment/bind/experiment/script +++ b/acceptance/bundle/deployment/bind/experiment/script @@ -1,4 +1,4 @@ -title "Bind experiment test: " +title "Bind experiment test:" title "Substitute variables in the template" BUNDLE_NAME_SUFFIX=$(uuid) @@ -16,7 +16,7 @@ title "Create a pre-defined experiment" EXPERIMENT_ID=$($CLI experiments create-experiment "${EXPERIMENT_NAME}" | jq -r '.experiment_id') cleanupRemoveExperiment() { - title "Test cleanup: " + title "Test cleanup:" title "Delete the pre-defined experiment: " $CLI experiments delete-experiment ${EXPERIMENT_ID} echo $? diff --git a/acceptance/bundle/deployment/bind/experiment/test.toml b/acceptance/bundle/deployment/bind/experiment/test.toml index 4bc8834719..5f52a75f15 100644 --- a/acceptance/bundle/deployment/bind/experiment/test.toml +++ b/acceptance/bundle/deployment/bind/experiment/test.toml @@ -29,4 +29,4 @@ Response.Body = ''' Pattern = "POST /api/2.0/mlflow/experiments/update" [[Server]] -Pattern = "POST /api/2.0/mlflow/experiments/delete" \ No newline at end of file +Pattern = "POST /api/2.0/mlflow/experiments/delete" diff --git a/acceptance/bundle/deployment/bind/registered-model/databricks.yml.tmpl b/acceptance/bundle/deployment/bind/registered-model/databricks.yml.tmpl index 31116962ad..911b894e55 100644 --- a/acceptance/bundle/deployment/bind/registered-model/databricks.yml.tmpl +++ b/acceptance/bundle/deployment/bind/registered-model/databricks.yml.tmpl @@ -7,4 +7,3 @@ resources: name: $MODEL_NAME catalog_name: $CATALOG_NAME schema_name: $SCHEMA_NAME - diff --git a/acceptance/bundle/deployment/bind/registered-model/output.txt b/acceptance/bundle/deployment/bind/registered-model/output.txt index 03ee2dd73d..c27cd4d1d9 100644 --- a/acceptance/bundle/deployment/bind/registered-model/output.txt +++ b/acceptance/bundle/deployment/bind/registered-model/output.txt @@ -8,7 +8,6 @@ resources: catalog_name: main schema_name: test-schema-rmodel-[UUID] - >>> [CLI] schemas create test-schema-rmodel-[UUID] main { "full_name": "main.test-schema-rmodel-[UUID]", diff --git a/acceptance/bundle/deployment/bind/registered-model/script b/acceptance/bundle/deployment/bind/registered-model/script index 01e6243916..eb90a73898 100644 --- a/acceptance/bundle/deployment/bind/registered-model/script +++ b/acceptance/bundle/deployment/bind/registered-model/script @@ -33,4 +33,3 @@ trace $CLI bundle destroy --auto-approve # Read the pre-defined model again (expecting it still exists and is not deleted): trace $CLI registered-models get "${MODEL_FULL_NAME}" | jq '{full_name, schema_name, name}' - diff --git a/acceptance/bundle/deployment/bind/schema/databricks.yml b/acceptance/bundle/deployment/bind/schema/databricks.yml index e8e0f150cd..ed5cc2811f 100644 --- a/acceptance/bundle/deployment/bind/schema/databricks.yml +++ b/acceptance/bundle/deployment/bind/schema/databricks.yml @@ -7,4 +7,3 @@ resources: name: $SCHEMA_NAME catalog_name: main comment: This schema was created from DABs - diff --git a/acceptance/bundle/deployment/bind/schema/output.txt b/acceptance/bundle/deployment/bind/schema/output.txt index 1a6b9701f9..6475626bbd 100644 --- a/acceptance/bundle/deployment/bind/schema/output.txt +++ b/acceptance/bundle/deployment/bind/schema/output.txt @@ -1,6 +1,6 @@ -=== Bind schema test: -=== Substitute variables in the template: +=== Bind schema test: +=== Substitute variables in the template: === Create a pre-defined schema: { "full_name": "main.test-schema-[UUID]", "catalog_name": "main" @@ -33,5 +33,5 @@ Destroy complete! "comment": "This schema was created from DABs" } -=== Test cleanup: +=== Test cleanup: === Delete the pre-defined schema test-schema-[UUID]: 0 diff --git a/acceptance/bundle/deployment/bind/schema/script b/acceptance/bundle/deployment/bind/schema/script index 10763fa811..d13ec0037b 100644 --- a/acceptance/bundle/deployment/bind/schema/script +++ b/acceptance/bundle/deployment/bind/schema/script @@ -1,6 +1,6 @@ -title "Bind schema test: " +title "Bind schema test:" -title "Substitute variables in the template: " +title "Substitute variables in the template:" export BUNDLE_NAME_SUFFIX=$(uuid) export SCHEMA_NAME="test-schema-$(uuid)" @@ -14,7 +14,7 @@ CATALOG_NAME=main $CLI schemas create ${SCHEMA_NAME} ${CATALOG_NAME} | jq '{full_name, catalog_name}' cleanupRemoveSchema() { - title "Test cleanup: " + title "Test cleanup:" title "Delete the pre-defined schema ${SCHEMA_NAME}: " $CLI schemas delete ${CATALOG_NAME}.${SCHEMA_NAME} echo $? diff --git a/acceptance/bundle/deployment/bind/schema/test.toml b/acceptance/bundle/deployment/bind/schema/test.toml index ff8321da2e..f1320225dc 100644 --- a/acceptance/bundle/deployment/bind/schema/test.toml +++ b/acceptance/bundle/deployment/bind/schema/test.toml @@ -27,4 +27,4 @@ Response.Body = ''' Pattern = "PATCH /api/2.1/unity-catalog/schemas/{schema_fullname}" [[Server]] -Pattern = "DELETE /api/2.1/unity-catalog/schemas/{schema_fullname}" \ No newline at end of file +Pattern = "DELETE /api/2.1/unity-catalog/schemas/{schema_fullname}" diff --git a/acceptance/bundle/deployment/bind/volume/output.txt b/acceptance/bundle/deployment/bind/volume/output.txt index 716d010f11..3bce635fbc 100644 --- a/acceptance/bundle/deployment/bind/volume/output.txt +++ b/acceptance/bundle/deployment/bind/volume/output.txt @@ -5,7 +5,7 @@ "catalog_name": "main" } -=== Create a pre-defined volume: +=== Create a pre-defined volume: >>> [CLI] bundle deployment bind volume1 main.test-schema-[UUID].volume-[UUID] --auto-approve Updating deployment state... Successfully bound volume with an id 'main.test-schema-[UUID].volume-[UUID]'. Run 'bundle deploy' to deploy changes to your workspace diff --git a/acceptance/bundle/deployment/bind/volume/script b/acceptance/bundle/deployment/bind/volume/script index 2dbcbdc7e2..83d58fcf21 100644 --- a/acceptance/bundle/deployment/bind/volume/script +++ b/acceptance/bundle/deployment/bind/volume/script @@ -13,7 +13,7 @@ VOLUME_TYPE="MANAGED" trace $CLI schemas create "${SCHEMA_NAME}" ${CATALOG_NAME} | jq '{full_name, catalog_name}' -title "Create a pre-defined volume: " +title "Create a pre-defined volume:" VOLUME_FULL_NAME=$($CLI volumes create "${CATALOG_NAME}" "${SCHEMA_NAME}" "${VOLUME_NAME}" "${VOLUME_TYPE}" | jq -r '.full_name') cleanupRemoveVolume() { diff --git a/acceptance/bundle/includes/include_outside_root/a.yml b/acceptance/bundle/includes/include_outside_root/a.yml index b4addb99b2..0e51e0b026 100644 --- a/acceptance/bundle/includes/include_outside_root/a.yml +++ b/acceptance/bundle/includes/include_outside_root/a.yml @@ -1,4 +1,3 @@ include: - b.yml - c.yml - diff --git a/acceptance/selftest/diff/out_dir_a/output.txt b/acceptance/selftest/diff/out_dir_a/output.txt index 303c1867b4..be98b796b6 100644 --- a/acceptance/selftest/diff/out_dir_a/output.txt +++ b/acceptance/selftest/diff/out_dir_a/output.txt @@ -4,4 +4,4 @@ Hello! "userName": "[USERNAME]" } -Footer \ No newline at end of file +Footer diff --git a/acceptance/selftest/diff/out_dir_b/output.txt b/acceptance/selftest/diff/out_dir_b/output.txt index f4f01af139..28734470b9 100644 --- a/acceptance/selftest/diff/out_dir_b/output.txt +++ b/acceptance/selftest/diff/out_dir_b/output.txt @@ -4,4 +4,4 @@ Hello! "userName": "[USERNAME]" } -Footer \ No newline at end of file +Footer diff --git a/acceptance/selftest/diff/script b/acceptance/selftest/diff/script index bf499f3d75..23f0791309 100644 --- a/acceptance/selftest/diff/script +++ b/acceptance/selftest/diff/script @@ -8,8 +8,8 @@ echo Hello! >> out_dir_a/output.txt echo Hello! >> out_dir_b/output.txt curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me >> out_dir_a/output.txt -printf "\n\nFooter" >> out_dir_a/output.txt -printf '{\n "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n "userName": "[USERNAME]"\n}\n\nFooter' >> out_dir_b/output.txt +printf "\n\nFooter\n" >> out_dir_a/output.txt +printf '{\n "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n "userName": "[USERNAME]"\n}\n\nFooter\n' >> out_dir_b/output.txt # Unlike regular diff, diff.py will apply replacements first before doing the comparison trace diff.py out_dir_a out_dir_b diff --git a/experimental/python/Makefile b/experimental/python/Makefile index fb7341166e..0d69f7ddc4 100644 --- a/experimental/python/Makefile +++ b/experimental/python/Makefile @@ -10,7 +10,7 @@ docs: lint: # check if lock matches the project metadata - uv lock --check + uv lock --check uv run ruff check $(sources) uv run pyright diff --git a/experimental/python/databricks/bundles/core/_bundle.py b/experimental/python/databricks/bundles/core/_bundle.py index b5a25c073c..fc33f215ef 100644 --- a/experimental/python/databricks/bundles/core/_bundle.py +++ b/experimental/python/databricks/bundles/core/_bundle.py @@ -28,11 +28,11 @@ class Bundle: """ Values of bundle variables resolved for selected target. Bundle variables are defined in databricks.yml. For accessing variables as structured data, use :meth:`resolve_variable`. - + Example: - + .. code-block:: yaml - + variables: default_dbr_version: description: Default version of Databricks Runtime diff --git a/integration/bundle/bundles/spark_jar_task/template/{{.project_name}}/META-INF/MANIFEST.MF b/integration/bundle/bundles/spark_jar_task/template/{{.project_name}}/META-INF/MANIFEST.MF index 40b023dbd6..c292de340a 100644 --- a/integration/bundle/bundles/spark_jar_task/template/{{.project_name}}/META-INF/MANIFEST.MF +++ b/integration/bundle/bundles/spark_jar_task/template/{{.project_name}}/META-INF/MANIFEST.MF @@ -1 +1 @@ -Main-Class: PrintArgs \ No newline at end of file +Main-Class: PrintArgs diff --git a/integration/bundle/bundles/uc_schema_only/template/databricks.yml.tmpl b/integration/bundle/bundles/uc_schema_only/template/databricks.yml.tmpl index 39c64f12bc..fe629cfc68 100644 --- a/integration/bundle/bundles/uc_schema_only/template/databricks.yml.tmpl +++ b/integration/bundle/bundles/uc_schema_only/template/databricks.yml.tmpl @@ -10,4 +10,3 @@ resources: name: test-schema-{{.unique_id}} catalog_name: main comment: This schema was created from DABs - diff --git a/libs/template/templates/dbt-sql/template/__preamble.tmpl b/libs/template/templates/dbt-sql/template/__preamble.tmpl index b770b5ef94..9d6d4e5027 100644 --- a/libs/template/templates/dbt-sql/template/__preamble.tmpl +++ b/libs/template/templates/dbt-sql/template/__preamble.tmpl @@ -6,4 +6,4 @@ This file only template directives; it is skipped for the actual output. {{if eq .project_name "dbt"}} {{fail "Project name 'dbt' is not supported"}} -{{end}} \ No newline at end of file +{{end}} diff --git a/tools/validate_whitespace.py b/tools/validate_whitespace.py new file mode 100755 index 0000000000..6c11d256f0 --- /dev/null +++ b/tools/validate_whitespace.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +import os +import re +import sys +import glob +import subprocess + + +def load_ignores(): + ignores = set() + fail = False + for ind, line in enumerate(open(".wsignore")): + line = line.strip() + if not line: + continue + if line.startswith("#"): + continue + expanded = glob.glob(line, recursive=True) + if len(expanded) == 0: + print(f".wsignore:{ind + 1}: No matches for line: {line}") + fail = True + ignores.update(expanded) + if fail: + sys.exit(1) + return ignores + + +def count_trailing_newlines(s): + match = re.search(r"(\n+)$", s) + return len(match.group(1)) if match else 0 + + +def validate_contents(data): + if not data: + return + try: + text = data.decode("utf") + except Exception as ex: + yield f" Failed to decode utf-8: {ex}" + return + + for i, line in enumerate(text.split("\n")): + if not line: + continue + if line.strip() == "": + yield f"{i + 1}: Whitespace-only line" + continue + if line.rstrip() != line: + yield f"{i + 1}: Trailing whitespace {line[-200:]!r}" + + newlines = count_trailing_newlines(text) + + if newlines == 0: + yield " File does not end with a newline" + + if newlines >= 2: + yield f" {newlines} newlines at the end" + + +def main(): + quiet = "-q" in sys.argv + files = subprocess.check_output(["git", "ls-files"], encoding="utf-8").split() + ignores = load_ignores() + n_checked = 0 + n_skipped = 0 + n_errored = 0 + for f in files: + if not os.path.isfile(f): + n_skipped += 1 + continue + if f in ignores: + n_skipped += 1 + continue + data = open(f, "rb").read() + error = False + for msg in validate_contents(data): + print(f"{f}:{msg}") + error = True + n_checked += 1 + n_errored += 1 if error else 0 + + if not quiet: + sys.stderr.write(f"{n_checked} checked, {n_skipped} skipped, {n_errored} failed.\n") + sys.exit(1 if n_errored else 0) + + +if __name__ == "__main__": + try: + main() + except BrokenPipeError: + sys.exit(1)