diff --git a/acceptance/bundle/artifacts/issue_3109/output.txt b/acceptance/bundle/artifacts/issue_3109/output.txt index 4daf472fd4..ff6b9968ce 100644 --- a/acceptance/bundle/artifacts/issue_3109/output.txt +++ b/acceptance/bundle/artifacts/issue_3109/output.txt @@ -1,3 +1,7 @@ +Warning: required field "entry_point" is not set + at resources.jobs.job1.tasks[0].python_wheel_task + in src/job1/databricks_job/resources/job1.yaml:16:17 + Building wheels... Uploading dist/my_default_python-0.0.1-py3-none-any.whl... Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/dev/files... diff --git a/acceptance/bundle/debug/direct/out.stderr.txt b/acceptance/bundle/debug/direct/out.stderr.txt index 2f51e4ddd5..d3e3d47a6a 100644 --- a/acceptance/bundle/debug/direct/out.stderr.txt +++ b/acceptance/bundle/debug/direct/out.stderr.txt @@ -50,6 +50,7 @@ 10:07:59 Debug: Apply pid=12345 mutator=ProcessStaticResources mutator=TranslatePathsDashboards 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources) 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators) +10:07:59 Debug: Apply pid=12345 mutator=validate:required 10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions 10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths 10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning diff --git a/acceptance/bundle/debug/direct/output.txt b/acceptance/bundle/debug/direct/output.txt index 269cfc8203..85294af45b 100644 --- a/acceptance/bundle/debug/direct/output.txt +++ b/acceptance/bundle/debug/direct/output.txt @@ -14,7 +14,7 @@ Validation OK! +>>> [CLI] bundle validate --debug 10:07:59 Info: start pid=12345 version=[DEV_VERSION] args="[CLI], bundle, validate, --debug" 10:07:59 Debug: Found bundle root at [TEST_TMP_DIR] (file [TEST_TMP_DIR]/databricks.yml) pid=12345 -@@ -60,8 +62,4 @@ +@@ -61,8 +63,4 @@ 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotateJobs 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotatePipelines -10:07:59 Debug: Apply pid=12345 mutator=terraform.Initialize diff --git a/acceptance/bundle/debug/tf/out.stderr.txt b/acceptance/bundle/debug/tf/out.stderr.txt index 0fe8151633..18c6a2cfb5 100644 --- a/acceptance/bundle/debug/tf/out.stderr.txt +++ b/acceptance/bundle/debug/tf/out.stderr.txt @@ -48,6 +48,7 @@ 10:07:59 Debug: Apply pid=12345 mutator=ProcessStaticResources mutator=TranslatePathsDashboards 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources) 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators) +10:07:59 Debug: Apply pid=12345 mutator=validate:required 10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions 10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths 10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt index ee6d2c4acc..6b819fe9c0 100644 --- a/acceptance/bundle/paths/fallback/output.txt +++ b/acceptance/bundle/paths/fallback/output.txt @@ -1,5 +1,10 @@ >>> [CLI] bundle validate -t development -o json +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[2].python_wheel_task + in resources/my_job.yml:33:13 + override_job.yml:29:17 + Warn: path ../src/notebook.py is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_job.yml:9:32). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. Warn: path ../src/file.py is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_job.yml:13:30). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. Warn: path ../src/dbt_project is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_job.yml:17:36). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. @@ -10,6 +15,10 @@ Warn: path ../src/notebook2.py is defined relative to the [TEST_TMP_DIR]/resourc Warn: path ../src/file2.py is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_pipeline.yml:10:23). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. >>> [CLI] bundle validate -t error +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[2].python_wheel_task + in resources/my_job.yml:33:13 + Error: notebook "resources/this value is overridden" not found. Local notebook references are expected to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] @@ -19,6 +28,6 @@ Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/fallback/error -Found 1 error +Found 1 error and 1 warning Exit code: 1 diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt index 432169fddb..39747336a7 100644 --- a/acceptance/bundle/paths/nominal/output.txt +++ b/acceptance/bundle/paths/nominal/output.txt @@ -1,7 +1,52 @@ >>> [CLI] bundle validate -t development -o json +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[4].python_wheel_task + in resources/my_job.yml:33:13 + override_job.yml:29:17 + +Warning: required field "inputs" is not set + at resources.jobs.my_job.tasks[1].for_each_task + in resources/my_job.yml:47:13 + override_job.yml:45:17 + +Warning: required field "inputs" is not set + at resources.jobs.my_job.tasks[2].for_each_task + in resources/my_job.yml:53:13 + override_job.yml:51:17 + +Warning: required field "task_key" is not set + at resources.jobs.my_job.tasks[1].for_each_task.task + in resources/my_job.yml:48:15 + override_job.yml:46:19 + +Warning: required field "task_key" is not set + at resources.jobs.my_job.tasks[2].for_each_task.task + in resources/my_job.yml:54:15 + override_job.yml:52:19 + >>> [CLI] bundle validate -t error +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[4].python_wheel_task + in resources/my_job.yml:33:13 + +Warning: required field "inputs" is not set + at resources.jobs.my_job.tasks[1].for_each_task + in resources/my_job.yml:47:13 + +Warning: required field "inputs" is not set + at resources.jobs.my_job.tasks[2].for_each_task + in resources/my_job.yml:53:13 + +Warning: required field "task_key" is not set + at resources.jobs.my_job.tasks[1].for_each_task.task + in resources/my_job.yml:48:15 + +Warning: required field "task_key" is not set + at resources.jobs.my_job.tasks[2].for_each_task.task + in resources/my_job.yml:54:15 + Error: notebook "resources/this value is overridden" not found. Local notebook references are expected to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] @@ -11,6 +56,6 @@ Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/nominal/error -Found 1 error +Found 1 error and 5 warnings Exit code: 1 diff --git a/acceptance/bundle/quality_monitor/output.txt b/acceptance/bundle/quality_monitor/output.txt index 8a7f64ef23..278e116606 100644 --- a/acceptance/bundle/quality_monitor/output.txt +++ b/acceptance/bundle/quality_monitor/output.txt @@ -1,5 +1,13 @@ >>> [CLI] bundle validate -o json -t development +Warning: required field "quartz_cron_expression" is not set + at resources.quality_monitors.my_monitor.schedule + in databricks.yml:17:9 + +Warning: required field "timezone_id" is not set + at resources.quality_monitors.my_monitor.schedule + in databricks.yml:17:9 + { "mode": "development", "quality_monitors": { diff --git a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt index 0a70ed17a0..e134fa92e3 100644 --- a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt +++ b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt @@ -35,6 +35,10 @@ } === resources.models.rname === +Warning: required field "name" is not set + at resources.models.rname + in databricks.yml:6:12 + { "models": { "rname": { @@ -54,6 +58,18 @@ } === resources.registered_models.rname === +Warning: required field "catalog_name" is not set + at resources.registered_models.rname + in databricks.yml:6:12 + +Warning: required field "name" is not set + at resources.registered_models.rname + in databricks.yml:6:12 + +Warning: required field "schema_name" is not set + at resources.registered_models.rname + in databricks.yml:6:12 + { "registered_models": { "rname": {} @@ -61,6 +77,18 @@ } === resources.quality_monitors.rname === +Warning: required field "assets_dir" is not set + at resources.quality_monitors.rname + in databricks.yml:6:12 + +Warning: required field "output_schema_name" is not set + at resources.quality_monitors.rname + in databricks.yml:6:12 + +Warning: required field "table_name" is not set + at resources.quality_monitors.rname + in databricks.yml:6:12 + { "quality_monitors": { "rname": {} @@ -68,6 +96,14 @@ } === resources.schemas.rname === +Warning: required field "catalog_name" is not set + at resources.schemas.rname + in databricks.yml:6:12 + +Warning: required field "name" is not set + at resources.schemas.rname + in databricks.yml:6:12 + { "schemas": { "rname": {} @@ -75,6 +111,18 @@ } === resources.volumes.rname === +Warning: required field "catalog_name" is not set + at resources.volumes.rname + in databricks.yml:6:12 + +Warning: required field "name" is not set + at resources.volumes.rname + in databricks.yml:6:12 + +Warning: required field "schema_name" is not set + at resources.volumes.rname + in databricks.yml:6:12 + { "volumes": { "rname": { @@ -102,6 +150,14 @@ } === resources.apps.rname === +Warning: required field "name" is not set + at resources.apps.rname + in databricks.yml:6:12 + +Warning: required field "source_code_path" is not set + at resources.apps.rname + in databricks.yml:6:12 + Error: Missing app source code path in databricks.yml:6:12 @@ -129,6 +185,10 @@ app resource 'rname' is missing required source_code_path field } === resources.secret_scopes.rname === +Warning: required field "name" is not set + at resources.secret_scopes.rname + in databricks.yml:6:12 + { "secret_scopes": { "rname": { diff --git a/acceptance/bundle/validate/empty_resources/with_grants/output.txt b/acceptance/bundle/validate/empty_resources/with_grants/output.txt index df86892dbb..70c438c113 100644 --- a/acceptance/bundle/validate/empty_resources/with_grants/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_grants/output.txt @@ -47,6 +47,10 @@ Warning: unknown field: grants at resources.models.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.models.rname + in databricks.yml:7:7 + { "models": { "rname": { @@ -70,6 +74,18 @@ Warning: unknown field: grants } === resources.registered_models.rname === +Warning: required field "catalog_name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + +Warning: required field "name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + +Warning: required field "schema_name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + { "registered_models": { "rname": { @@ -83,6 +99,18 @@ Warning: unknown field: grants at resources.quality_monitors.rname in databricks.yml:7:7 +Warning: required field "assets_dir" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + +Warning: required field "output_schema_name" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + +Warning: required field "table_name" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + { "quality_monitors": { "rname": {} @@ -90,6 +118,14 @@ Warning: unknown field: grants } === resources.schemas.rname === +Warning: required field "catalog_name" is not set + at resources.schemas.rname + in databricks.yml:7:7 + +Warning: required field "name" is not set + at resources.schemas.rname + in databricks.yml:7:7 + { "schemas": { "rname": { @@ -99,6 +135,18 @@ Warning: unknown field: grants } === resources.volumes.rname === +Warning: required field "catalog_name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + +Warning: required field "name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + +Warning: required field "schema_name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + { "volumes": { "rname": { @@ -139,6 +187,14 @@ Warning: unknown field: grants at resources.apps.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.apps.rname + in databricks.yml:7:7 + +Warning: required field "source_code_path" is not set + at resources.apps.rname + in databricks.yml:7:7 + Error: Missing app source code path in databricks.yml:7:7 @@ -174,6 +230,10 @@ Warning: unknown field: grants at resources.secret_scopes.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.secret_scopes.rname + in databricks.yml:7:7 + { "secret_scopes": { "rname": { diff --git a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt index e5d97196f7..c7af0f9133 100644 --- a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt @@ -35,6 +35,10 @@ } === resources.models.rname === +Warning: required field "name" is not set + at resources.models.rname + in databricks.yml:7:7 + { "models": { "rname": { @@ -58,6 +62,18 @@ Warning: unknown field: permissions at resources.registered_models.rname in databricks.yml:7:7 +Warning: required field "catalog_name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + +Warning: required field "name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + +Warning: required field "schema_name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + { "registered_models": { "rname": {} @@ -69,6 +85,18 @@ Warning: unknown field: permissions at resources.quality_monitors.rname in databricks.yml:7:7 +Warning: required field "assets_dir" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + +Warning: required field "output_schema_name" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + +Warning: required field "table_name" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + { "quality_monitors": { "rname": {} @@ -80,6 +108,14 @@ Warning: unknown field: permissions at resources.schemas.rname in databricks.yml:7:7 +Warning: required field "catalog_name" is not set + at resources.schemas.rname + in databricks.yml:7:7 + +Warning: required field "name" is not set + at resources.schemas.rname + in databricks.yml:7:7 + { "schemas": { "rname": {} @@ -91,6 +127,18 @@ Warning: unknown field: permissions at resources.volumes.rname in databricks.yml:7:7 +Warning: required field "catalog_name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + +Warning: required field "name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + +Warning: required field "schema_name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + { "volumes": { "rname": { @@ -120,6 +168,14 @@ Warning: unknown field: permissions } === resources.apps.rname === +Warning: required field "name" is not set + at resources.apps.rname + in databricks.yml:7:7 + +Warning: required field "source_code_path" is not set + at resources.apps.rname + in databricks.yml:7:7 + Error: Missing app source code path in databricks.yml:7:7 @@ -147,6 +203,10 @@ app resource 'rname' is missing required source_code_path field } === resources.secret_scopes.rname === +Warning: required field "name" is not set + at resources.secret_scopes.rname + in databricks.yml:7:7 + { "secret_scopes": { "rname": { diff --git a/acceptance/bundle/validate/models/missing_name/output.txt b/acceptance/bundle/validate/models/missing_name/output.txt index 4f189effdf..77dfd61284 100644 --- a/acceptance/bundle/validate/models/missing_name/output.txt +++ b/acceptance/bundle/validate/models/missing_name/output.txt @@ -1,7 +1,11 @@ +Warning: required field "name" is not set + at resources.models.mymodel + in databricks.yml:5:14 + Name: test-bundle Target: default Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default -Validation OK! +Found 1 warning diff --git a/acceptance/bundle/validate/models/user_id/output.txt b/acceptance/bundle/validate/models/user_id/output.txt index 8c0d8db730..95c7ddde0e 100644 --- a/acceptance/bundle/validate/models/user_id/output.txt +++ b/acceptance/bundle/validate/models/user_id/output.txt @@ -2,10 +2,14 @@ Warning: unknown field: user_id at resources.models.mymodel in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.models.mymodel + in databricks.yml:7:7 + Name: test-bundle Target: default Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default -Found 1 warning +Found 2 warnings diff --git a/acceptance/bundle/validate/presets_name_prefix/output.txt b/acceptance/bundle/validate/presets_name_prefix/output.txt index e81ea99190..986b72a823 100644 --- a/acceptance/bundle/validate/presets_name_prefix/output.txt +++ b/acceptance/bundle/validate/presets_name_prefix/output.txt @@ -3,6 +3,10 @@ name_prefix: "prefix-" >>> [CLI] bundle validate -o json +Warning: required field "catalog_name" is not set + at resources.schemas.schema1 + in databricks.yml:14:7 + { "jobs": { "job1": { @@ -39,6 +43,10 @@ name_prefix: "[prefix]" >>> [CLI] bundle validate -o json +Warning: required field "catalog_name" is not set + at resources.schemas.schema1 + in databricks.yml:14:7 + { "jobs": { "job1": { @@ -75,6 +83,10 @@ name_prefix: "" >>> [CLI] bundle validate -o json +Warning: required field "catalog_name" is not set + at resources.schemas.schema1 + in databricks.yml:14:7 + { "jobs": { "job1": { diff --git a/acceptance/bundle/validate/required/databricks.yml b/acceptance/bundle/validate/required/databricks.yml new file mode 100644 index 0000000000..fbda26fa81 --- /dev/null +++ b/acceptance/bundle/validate/required/databricks.yml @@ -0,0 +1,32 @@ +artifacts: + my_artifact: + files: + - {} + +resources: + # Required field name is missing. + models: + my_model_1: + description: "hello" + + # Empty string should not trigger a warning. + my_model_2: + name: "" + + jobs: + my_job_1: + tasks: + # job_id being 0 should not trigger a warning. + - task_key: "task_key1" + run_job_task: + job_id: 0 + + # job_id not being set should trigger a warning. + - task_key: "task_key2" + run_job_task: + + # Catalog name and schema name are required. + # but are not set. + volumes: + my_volume: + name: "baz" diff --git a/acceptance/bundle/validate/required/out.test.toml b/acceptance/bundle/validate/required/out.test.toml new file mode 100644 index 0000000000..8f3575be7b --- /dev/null +++ b/acceptance/bundle/validate/required/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] diff --git a/acceptance/bundle/validate/required/output.txt b/acceptance/bundle/validate/required/output.txt new file mode 100644 index 0000000000..6b6d3a144b --- /dev/null +++ b/acceptance/bundle/validate/required/output.txt @@ -0,0 +1,29 @@ + +>>> [CLI] bundle validate +Warning: required field "catalog_name" is not set + at resources.volumes.my_volume + in databricks.yml:23:7 + +Warning: required field "job_id" is not set + at resources.jobs.my_job_1.tasks[1].run_job_task + in databricks.yml:14:25 + +Warning: required field "name" is not set + at resources.models.my_model_1 + in databricks.yml:18:7 + +Warning: required field "schema_name" is not set + at resources.volumes.my_volume + in databricks.yml:23:7 + +Warning: required field "source" is not set + at artifacts.my_artifact.files[0] + in databricks.yml:4:9 + +Name: test-bundle +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default + +Found 5 warnings diff --git a/acceptance/bundle/validate/required/script b/acceptance/bundle/validate/required/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/validate/required/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/acceptance/bundle/validate/volume_defaults/output.txt b/acceptance/bundle/validate/volume_defaults/output.txt index f128a32958..ca01eb5457 100644 --- a/acceptance/bundle/validate/volume_defaults/output.txt +++ b/acceptance/bundle/validate/volume_defaults/output.txt @@ -1,3 +1,39 @@ +Warning: required field "catalog_name" is not set + at resources.volumes.v3 + in databricks.yml:10:7 + +Warning: required field "catalog_name" is not set + at resources.volumes.v1 + in databricks.yml:6:7 + +Warning: required field "catalog_name" is not set + at resources.volumes.v2 + in databricks.yml:8:7 + +Warning: required field "name" is not set + at resources.volumes.v3 + in databricks.yml:10:7 + +Warning: required field "name" is not set + at resources.volumes.v1 + in databricks.yml:6:7 + +Warning: required field "name" is not set + at resources.volumes.v2 + in databricks.yml:8:7 + +Warning: required field "schema_name" is not set + at resources.volumes.v3 + in databricks.yml:10:7 + +Warning: required field "schema_name" is not set + at resources.volumes.v1 + in databricks.yml:6:7 + +Warning: required field "schema_name" is not set + at resources.volumes.v2 + in databricks.yml:8:7 + { "v1": { "volume_type": "" diff --git a/acceptance/bundle/variables/prepend-workspace-var/output.txt b/acceptance/bundle/variables/prepend-workspace-var/output.txt index bed722a3b3..201dc769af 100644 --- a/acceptance/bundle/variables/prepend-workspace-var/output.txt +++ b/acceptance/bundle/variables/prepend-workspace-var/output.txt @@ -1,4 +1,15 @@ /Workspace should be prepended on all paths, but it is not the case: +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[0].python_wheel_task + in databricks.yml:23:13 + +Warning: required field "name" is not set + at bundle + +Warning: required field "package_name" is not set + at resources.jobs.my_job.tasks[0].python_wheel_task + in databricks.yml:23:13 + { "bundle": { "environment": "dev", diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go new file mode 100644 index 0000000000..87cea83532 --- /dev/null +++ b/bundle/config/validate/required.go @@ -0,0 +1,84 @@ +package validate + +import ( + "context" + "fmt" + "slices" + "sort" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/internal/validation/generated" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type required struct{} + +func Required() bundle.Mutator { + return &required{} +} + +func (f *required) Name() string { + return "validate:required" +} + +func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + diags := diag.Diagnostics{} + + // Generate prefix tree for all required fields. + trie := &dyn.TrieNode{} + for k := range generated.RequiredFields { + pattern, err := dyn.NewPatternFromString(k) + if err != nil { + return diag.FromErr(fmt.Errorf("invalid pattern %q for required field validation: %w", k, err)) + } + + err = trie.Insert(pattern) + if err != nil { + return diag.FromErr(fmt.Errorf("failed to insert pattern %q into trie: %w", k, err)) + } + } + + err := dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { + // If the path is not found in the prefix tree, we do not need to validate any required + // fields in it. + pattern, ok := trie.SearchPath(p) + if !ok { + return nil + } + + cloneP := slices.Clone(p) + + fields := generated.RequiredFields[pattern.String()] + for _, field := range fields { + vv := v.Get(field) + if vv.Kind() == dyn.KindInvalid || vv.Kind() == dyn.KindNil { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("required field %q is not set", field), + Locations: v.Locations(), + Paths: []dyn.Path{cloneP}, + }) + } + } + return nil + }) + if err != nil { + return diag.FromErr(err) + } + + // Sort diagnostics to make them deterministic + sort.Slice(diags, func(i, j int) bool { + // First sort by summary + if diags[i].Summary != diags[j].Summary { + return diags[i].Summary < diags[j].Summary + } + + // Finally sort by locations as a tie breaker if summaries are the same. + iLocs := fmt.Sprintf("%v", diags[i].Locations) + jLocs := fmt.Sprintf("%v", diags[j].Locations) + return iLocs < jLocs + }) + + return diags +} diff --git a/bundle/internal/bundletest/benchmark.go b/bundle/internal/bundletest/benchmark.go index 8b109f5dc4..6a6b40e4c7 100644 --- a/bundle/internal/bundletest/benchmark.go +++ b/bundle/internal/bundletest/benchmark.go @@ -235,3 +235,29 @@ func BundleV(b *testing.B, numJobs int) dyn.Value { return myBundle.Config.Value() } + +func Bundle(b *testing.B, numJobs int) *bundle.Bundle { + allJobs := map[string]*resources.Job{} + for i := range numJobs { + job := jobs.JobSettings{} + err := json.Unmarshal([]byte(jobExample), &job) + require.NoError(b, err) + + allJobs[strconv.Itoa(i)] = &resources.Job{ + JobSettings: job, + } + } + + myBundle := bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: allJobs, + }, + }, + } + + // Apply noop mutator to initialize the bundle value. + bundle.ApplyFuncContext(context.Background(), &myBundle, func(ctx context.Context, b *bundle.Bundle) {}) + + return &myBundle +} diff --git a/bundle/internal/bundletest/mutator_benchmark_test.go b/bundle/internal/bundletest/mutator_benchmark_test.go new file mode 100644 index 0000000000..035231c7d6 --- /dev/null +++ b/bundle/internal/bundletest/mutator_benchmark_test.go @@ -0,0 +1,74 @@ +package bundletest + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/validate" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/assert" +) + +func benchmarkRequiredMutator(b *testing.B, numJobs int) { + myBundle := Bundle(b, numJobs) + + var diags diag.Diagnostics + for b.Loop() { + diags = bundle.Apply(context.Background(), myBundle, validate.Required()) + } + assert.NotEmpty(b, diags) +} + +func benchmarkWalkReadOnlyBaseline(b *testing.B, numJobs int) { + myBundle := Bundle(b, numJobs) + + for b.Loop() { + var paths []dyn.Path + bundle.ApplyFuncContext(context.Background(), myBundle, func(ctx context.Context, b *bundle.Bundle) { + _ = dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { + paths = append(paths, p) + return nil + }) + }) + } +} + +func benchmarkNoopBaseline(b *testing.B, numJobs int) { + myBundle := Bundle(b, numJobs) + + for b.Loop() { + bundle.ApplyFuncContext(context.Background(), myBundle, func(ctx context.Context, b *bundle.Bundle) {}) + } +} + +// This benchmark took 823ms to run on 10th July 2025. +func BenchmarkWalkReadOnlyBaseline(b *testing.B) { + benchmarkWalkReadOnlyBaseline(b, 10000) +} + +// This benchmark took 774ms to run on 10th July 2025. +func BenchmarkNoopBaseline(b *testing.B) { + benchmarkNoopBaseline(b, 10000) +} + +// This benchmark took 996ms to run on 10th July 2025. +func BenchmarkValidateRequired10000(b *testing.B) { + benchmarkRequiredMutator(b, 10000) +} + +// This benchmark took 98ms to run on 10th July 2025. +func BenchmarkValidateRequired1000(b *testing.B) { + benchmarkRequiredMutator(b, 1000) +} + +// This benchmark took 10ms to run on 10th July 2025. +func BenchmarkValidateRequired100(b *testing.B) { + benchmarkRequiredMutator(b, 100) +} + +// This benchmark took 1.1ms to run on 10th July 2025. +func BenchmarkValidateRequired10(b *testing.B) { + benchmarkRequiredMutator(b, 10) +} diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index d499f19de9..597302c38d 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -148,6 +148,10 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // After PythonMutator, mutators must not change bundle resources, or such changes are not // going to be visible in Python code. + // Validate all required fields are set. This is run after variable interpolation and PyDABs mutators + // since they can also set and modify resources. + validate.Required(), + // Reads (typed): b.Config.Permissions (checks if current user or their groups have CAN_MANAGE permissions) // Reads (typed): b.Config.Workspace.CurrentUser (gets current user information) // Provides diagnostic recommendations if the current deployment identity isn't explicitly granted CAN_MANAGE permissions