diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 7ae78034fd..03ba1c2551 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -13,5 +13,6 @@ ### Bundles * Changed logic for resolving `${resources...}` references. Previously this would be done by terraform at deploy time. Now if it references a field that is present in the config, it will be done by DABs during bundle loading ([#3370](https://github.com/databricks/cli/pull/3370)) * Add support for tagging pipelines ([#3086](https://github.com/databricks/cli/pull/3086)) +* Add warning for when an invalid value is specified for an enum field ([#3050](https://github.com/databricks/cli/pull/3050)) ### API Changes diff --git a/acceptance/bundle/artifacts/shell/invalid/output.txt b/acceptance/bundle/artifacts/shell/invalid/output.txt index 60a69cda00..e9fa6ad465 100644 --- a/acceptance/bundle/artifacts/shell/invalid/output.txt +++ b/acceptance/bundle/artifacts/shell/invalid/output.txt @@ -1,5 +1,9 @@ >>> [CLI] bundle deploy +Warning: invalid value "invalid" for enum field. Valid values are [bash sh cmd] + at artifacts.my_artifact.executable + in databricks.yml:6:17 + Building my_artifact... Error: invalid is not supported as an artifact executable, options are: bash, sh or cmd diff --git a/acceptance/bundle/debug/direct/out.stderr.txt b/acceptance/bundle/debug/direct/out.stderr.txt index 06cc0754e7..8aacf837a4 100644 --- a/acceptance/bundle/debug/direct/out.stderr.txt +++ b/acceptance/bundle/debug/direct/out.stderr.txt @@ -52,6 +52,7 @@ 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources) 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators) 10:07:59 Debug: Apply pid=12345 mutator=validate:required +10:07:59 Debug: Apply pid=12345 mutator=validate:enum 10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions 10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths 10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning diff --git a/acceptance/bundle/debug/direct/output.txt b/acceptance/bundle/debug/direct/output.txt index 26a86de7b0..ddb7505f89 100644 --- a/acceptance/bundle/debug/direct/output.txt +++ b/acceptance/bundle/debug/direct/output.txt @@ -14,7 +14,7 @@ Validation OK! +>>> [CLI] bundle validate --debug 10:07:59 Info: start pid=12345 version=[DEV_VERSION] args="[CLI], bundle, validate, --debug" 10:07:59 Debug: Found bundle root at [TEST_TMP_DIR] (file [TEST_TMP_DIR]/databricks.yml) pid=12345 -@@ -62,8 +64,4 @@ +@@ -63,8 +65,4 @@ 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotateJobs 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotatePipelines -10:07:59 Debug: Apply pid=12345 mutator=terraform.Initialize diff --git a/acceptance/bundle/debug/tf/out.stderr.txt b/acceptance/bundle/debug/tf/out.stderr.txt index 12b5439696..99c0d4688f 100644 --- a/acceptance/bundle/debug/tf/out.stderr.txt +++ b/acceptance/bundle/debug/tf/out.stderr.txt @@ -50,6 +50,7 @@ 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources) 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators) 10:07:59 Debug: Apply pid=12345 mutator=validate:required +10:07:59 Debug: Apply pid=12345 mutator=validate:enum 10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions 10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths 10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning diff --git a/acceptance/bundle/deploy/jobs/task-source/output.txt b/acceptance/bundle/deploy/jobs/task-source/output.txt index 56bce29363..09995ca089 100644 --- a/acceptance/bundle/deploy/jobs/task-source/output.txt +++ b/acceptance/bundle/deploy/jobs/task-source/output.txt @@ -1,5 +1,9 @@ >>> [CLI] bundle deploy +Warning: invalid value "github" for enum field. Valid values are [awsCodeCommit azureDevOpsServices bitbucketCloud bitbucketServer gitHub gitHubEnterprise gitLab gitLabEnterpriseEdition] + at resources.jobs.git_job.git_source.git_provider + in databricks.yml:11:23 + Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/task-source/default/files... Deploying resources... Updating deployment state... diff --git a/acceptance/bundle/validate/enum/databricks.yml b/acceptance/bundle/validate/enum/databricks.yml new file mode 100644 index 0000000000..19a8df8b34 --- /dev/null +++ b/acceptance/bundle/validate/enum/databricks.yml @@ -0,0 +1,56 @@ +variables: + my_variable: + type: "complex" + default: + key: "value" + + my_variable_invalid: + type: "INVALID_TYPE" + default: "value" + + my_variable_type_missing: + default: "value" + + my_variable_type_empty: + type: "" + default: "value" + +resources: + jobs: + my_job_valid: + tasks: + - task_key: "task1" + # Valid enum value + run_if: "ALL_SUCCESS" + notebook_task: + # Valid enum value + source: "GIT" + notebook_path: "/path/to/notebook" + new_cluster: + # Valid enum values + runtime_engine: "PHOTON" + data_security_mode: "SINGLE_USER" + aws_attributes: + availability: "ON_DEMAND" + ebs_volume_type: "GENERAL_PURPOSE_SSD" + node_type_id: "i3.xlarge" + num_workers: 1 + + my_job_invalid: + tasks: + - task_key: "task2" + # Invalid enum value - should trigger warning + run_if: "INVALID_CONDITION" + notebook_task: + # Invalid enum value - should trigger warning + source: "INVALID_SOURCE" + notebook_path: "/path/to/notebook" + new_cluster: + # Invalid enum values - should trigger warnings + runtime_engine: "INVALID_ENGINE" + data_security_mode: "INVALID_MODE" + aws_attributes: + availability: "INVALID_AVAILABILITY" + ebs_volume_type: "INVALID_VOLUME_TYPE" + node_type_id: "i3.xlarge" + num_workers: 1 diff --git a/acceptance/bundle/validate/enum/out.test.toml b/acceptance/bundle/validate/enum/out.test.toml new file mode 100644 index 0000000000..8f3575be7b --- /dev/null +++ b/acceptance/bundle/validate/enum/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] diff --git a/acceptance/bundle/validate/enum/output.txt b/acceptance/bundle/validate/enum/output.txt new file mode 100644 index 0000000000..2943e29b3f --- /dev/null +++ b/acceptance/bundle/validate/enum/output.txt @@ -0,0 +1,37 @@ + +>>> [CLI] bundle validate +Warning: invalid value "INVALID_AVAILABILITY" for enum field. Valid values are [ON_DEMAND SPOT SPOT_WITH_FALLBACK] + at resources.jobs.my_job_invalid.tasks[0].new_cluster.aws_attributes.availability + in databricks.yml:9:29 + +Warning: invalid value "INVALID_CONDITION" for enum field. Valid values are [ALL_DONE ALL_FAILED ALL_SUCCESS AT_LEAST_ONE_FAILED AT_LEAST_ONE_SUCCESS NONE_FAILED] + at resources.jobs.my_job_invalid.tasks[0].run_if + in databricks.yml:18:19 + +Warning: invalid value "INVALID_ENGINE" for enum field. Valid values are [NULL PHOTON STANDARD] + at resources.jobs.my_job_invalid.tasks[0].new_cluster.runtime_engine + in databricks.yml:14:29 + +Warning: invalid value "INVALID_MODE" for enum field. Valid values are [DATA_SECURITY_MODE_AUTO DATA_SECURITY_MODE_DEDICATED DATA_SECURITY_MODE_STANDARD LEGACY_PASSTHROUGH LEGACY_SINGLE_USER LEGACY_SINGLE_USER_STANDARD LEGACY_TABLE_ACL NONE SINGLE_USER USER_ISOLATION] + at resources.jobs.my_job_invalid.tasks[0].new_cluster.data_security_mode + in databricks.yml:11:33 + +Warning: invalid value "INVALID_SOURCE" for enum field. Valid values are [GIT WORKSPACE] + at resources.jobs.my_job_invalid.tasks[0].notebook_task.source + in databricks.yml:17:21 + +Warning: invalid value "INVALID_TYPE" for enum field. Valid values are [complex] + at variables.my_variable_invalid.type + in databricks.yml:42:11 + +Warning: invalid value "INVALID_VOLUME_TYPE" for enum field. Valid values are [GENERAL_PURPOSE_SSD THROUGHPUT_OPTIMIZED_HDD] + at resources.jobs.my_job_invalid.tasks[0].new_cluster.aws_attributes.ebs_volume_type + in databricks.yml:10:32 + +Name: test-bundle +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default + +Found 7 warnings diff --git a/acceptance/bundle/validate/enum/script b/acceptance/bundle/validate/enum/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/validate/enum/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/acceptance/bundle/validate/volume_defaults/output.txt b/acceptance/bundle/validate/volume_defaults/output.txt index ca01eb5457..60cd668c7d 100644 --- a/acceptance/bundle/validate/volume_defaults/output.txt +++ b/acceptance/bundle/validate/volume_defaults/output.txt @@ -34,6 +34,14 @@ Warning: required field "schema_name" is not set at resources.volumes.v2 in databricks.yml:8:7 +Warning: invalid value "" for enum field. Valid values are [EXTERNAL MANAGED] + at resources.volumes.v1.volume_type + in databricks.yml:6:20 + +Warning: invalid value "already-set" for enum field. Valid values are [EXTERNAL MANAGED] + at resources.volumes.v2.volume_type + in databricks.yml:8:20 + { "v1": { "volume_type": "" diff --git a/bundle/config/validate/enum.go b/bundle/config/validate/enum.go new file mode 100644 index 0000000000..0dacaf116c --- /dev/null +++ b/bundle/config/validate/enum.go @@ -0,0 +1,95 @@ +package validate + +import ( + "context" + "fmt" + "slices" + "sort" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/internal/validation/generated" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type enum struct{} + +func Enum() bundle.Mutator { + return &enum{} +} + +func (f *enum) Name() string { + return "validate:enum" +} + +func (f *enum) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + diags := diag.Diagnostics{} + + // Generate prefix tree for all enum fields. + trie := &dyn.TrieNode{} + for k := range generated.EnumFields { + pattern, err := dyn.NewPatternFromString(k) + if err != nil { + return diag.FromErr(fmt.Errorf("invalid pattern %q for enum field validation: %w", k, err)) + } + + err = trie.Insert(pattern) + if err != nil { + return diag.FromErr(fmt.Errorf("failed to insert pattern %q into trie: %w", k, err)) + } + } + + err := dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { + // If the path is not found in the prefix tree, we do not need to validate any enum + // fields in it. + pattern, ok := trie.SearchPath(p) + if !ok { + return nil + } + + // Get the string value for comparison + strValue, ok := v.AsString() + if !ok { + return nil + } + + // Get valid values for this pattern + validValues := generated.EnumFields[pattern.String()] + + // Check if the value is in the list of valid enum values + validValue := slices.Contains(validValues, strValue) + + if !validValue { + // p is a slice of path components. We need to clone it before using it in diagnostics + // since the WalkReadOnly function will mutate it while walking the config tree. + cloneP := slices.Clone(p) + + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("invalid value %q for enum field. Valid values are %v", strValue, validValues), + Locations: v.Locations(), + Paths: []dyn.Path{cloneP}, + }) + } + + return nil + }) + if err != nil { + return diag.FromErr(err) + } + + // Sort diagnostics to make them deterministic + sort.Slice(diags, func(i, j int) bool { + // First sort by summary + if diags[i].Summary != diags[j].Summary { + return diags[i].Summary < diags[j].Summary + } + + // Then sort by locations as a tie breaker if summaries are the same. + iLocs := fmt.Sprintf("%v", diags[i].Locations) + jLocs := fmt.Sprintf("%v", diags[j].Locations) + return iLocs < jLocs + }) + + return diags +} diff --git a/bundle/internal/bundletest/benchmark.go b/bundle/internal/bundletest/benchmark.go index 6a6b40e4c7..1041995756 100644 --- a/bundle/internal/bundletest/benchmark.go +++ b/bundle/internal/bundletest/benchmark.go @@ -250,6 +250,11 @@ func Bundle(b *testing.B, numJobs int) *bundle.Bundle { myBundle := bundle.Bundle{ Config: config.Root{ + Artifacts: config.Artifacts{ + "artifact.whl": { + Type: "unsupported_type", + }, + }, Resources: config.Resources{ Jobs: allJobs, }, diff --git a/bundle/internal/bundletest/benchmark_test.go b/bundle/internal/bundletest/benchmark_test.go index 05999daf6f..75b121e095 100644 --- a/bundle/internal/bundletest/benchmark_test.go +++ b/bundle/internal/bundletest/benchmark_test.go @@ -3,6 +3,7 @@ package bundletest import ( "testing" + "github.com/databricks/cli/bundle/internal/validation/generated" "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" ) @@ -30,3 +31,33 @@ func BenchmarkWalk(b *testing.B) { assert.NoError(b, err) } } + +// This took 49 microseconds to run on 6th Aug 2025. +func BenchmarkEnumPrefixTree(b *testing.B) { + for b.Loop() { + // Generate prefix tree for all enum fields. + trie := &dyn.TrieNode{} + for k := range generated.EnumFields { + pattern, err := dyn.NewPatternFromString(k) + assert.NoError(b, err) + + err = trie.Insert(pattern) + assert.NoError(b, err) + } + } +} + +// This took 15 microseconds to run on 6th Aug 2025. +func BenchmarkRequiredPrefixTree(b *testing.B) { + for b.Loop() { + // Generate prefix tree for all required fields. + trie := &dyn.TrieNode{} + for k := range generated.RequiredFields { + pattern, err := dyn.NewPatternFromString(k) + assert.NoError(b, err) + + err = trie.Insert(pattern) + assert.NoError(b, err) + } + } +} diff --git a/bundle/internal/bundletest/mutator_benchmark_test.go b/bundle/internal/bundletest/mutator_benchmark_test.go index 035231c7d6..d49f644623 100644 --- a/bundle/internal/bundletest/mutator_benchmark_test.go +++ b/bundle/internal/bundletest/mutator_benchmark_test.go @@ -21,6 +21,16 @@ func benchmarkRequiredMutator(b *testing.B, numJobs int) { assert.NotEmpty(b, diags) } +func benchmarkEnumMutator(b *testing.B, numJobs int) { + myBundle := Bundle(b, numJobs) + + var diags diag.Diagnostics + for b.Loop() { + diags = bundle.Apply(context.Background(), myBundle, validate.Enum()) + } + assert.NotEmpty(b, diags) +} + func benchmarkWalkReadOnlyBaseline(b *testing.B, numJobs int) { myBundle := Bundle(b, numJobs) @@ -72,3 +82,13 @@ func BenchmarkValidateRequired100(b *testing.B) { func BenchmarkValidateRequired10(b *testing.B) { benchmarkRequiredMutator(b, 10) } + +// This benchmark took 840ms to run on 6th Aug 2025. +func BenchmarkValidateEnum10000(b *testing.B) { + benchmarkEnumMutator(b, 10000) +} + +// This benchmark took 84ms to run on 6th Aug 2025. +func BenchmarkValidateEnum1000(b *testing.B) { + benchmarkEnumMutator(b, 1000) +} diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 909a454a71..c916b667cf 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -149,6 +149,9 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // since they can also set and modify resources. validate.Required(), + // Validate that all fields with enum values specified are set to a valid value. + validate.Enum(), + // Reads (typed): b.Config.Permissions (checks if current user or their groups have CAN_MANAGE permissions) // Reads (typed): b.Config.Workspace.CurrentUser (gets current user information) // Provides diagnostic recommendations if the current deployment identity isn't explicitly granted CAN_MANAGE permissions