Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
### Bundles
* Changed logic for resolving `${resources...}` references. Previously this would be done by terraform at deploy time. Now if it references a field that is present in the config, it will be done by DABs during bundle loading ([#3370](https://github.com/databricks/cli/pull/3370))
* Add support for tagging pipelines ([#3086](https://github.com/databricks/cli/pull/3086))
* Add warning for when an invalid value is specified for an enum field ([#3050](https://github.com/databricks/cli/pull/3050))

### API Changes
4 changes: 4 additions & 0 deletions acceptance/bundle/artifacts/shell/invalid/output.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@

>>> [CLI] bundle deploy
Warning: invalid value "invalid" for enum field. Valid values are [bash sh cmd]
at artifacts.my_artifact.executable
in databricks.yml:6:17

Building my_artifact...
Error: invalid is not supported as an artifact executable, options are: bash, sh or cmd

Expand Down
1 change: 1 addition & 0 deletions acceptance/bundle/debug/direct/out.stderr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources)
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators)
10:07:59 Debug: Apply pid=12345 mutator=validate:required
10:07:59 Debug: Apply pid=12345 mutator=validate:enum
10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions
10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths
10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning
Expand Down
2 changes: 1 addition & 1 deletion acceptance/bundle/debug/direct/output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Validation OK!
+>>> [CLI] bundle validate --debug
10:07:59 Info: start pid=12345 version=[DEV_VERSION] args="[CLI], bundle, validate, --debug"
10:07:59 Debug: Found bundle root at [TEST_TMP_DIR] (file [TEST_TMP_DIR]/databricks.yml) pid=12345
@@ -62,8 +64,4 @@
@@ -63,8 +65,4 @@
10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotateJobs
10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotatePipelines
-10:07:59 Debug: Apply pid=12345 mutator=terraform.Initialize
Expand Down
1 change: 1 addition & 0 deletions acceptance/bundle/debug/tf/out.stderr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources)
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators)
10:07:59 Debug: Apply pid=12345 mutator=validate:required
10:07:59 Debug: Apply pid=12345 mutator=validate:enum
10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions
10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths
10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning
Expand Down
4 changes: 4 additions & 0 deletions acceptance/bundle/deploy/jobs/task-source/output.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@

>>> [CLI] bundle deploy
Warning: invalid value "github" for enum field. Valid values are [awsCodeCommit azureDevOpsServices bitbucketCloud bitbucketServer gitHub gitHubEnterprise gitLab gitLabEnterpriseEdition]
at resources.jobs.git_job.git_source.git_provider
in databricks.yml:11:23

Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/task-source/default/files...
Deploying resources...
Updating deployment state...
Expand Down
56 changes: 56 additions & 0 deletions acceptance/bundle/validate/enum/databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
variables:
my_variable:
type: "complex"
default:
key: "value"

my_variable_invalid:
type: "INVALID_TYPE"
default: "value"

my_variable_type_missing:
default: "value"

my_variable_type_empty:
type: ""
default: "value"

resources:
jobs:
my_job_valid:
tasks:
- task_key: "task1"
# Valid enum value
run_if: "ALL_SUCCESS"
notebook_task:
# Valid enum value
source: "GIT"
notebook_path: "/path/to/notebook"
new_cluster:
# Valid enum values
runtime_engine: "PHOTON"
data_security_mode: "SINGLE_USER"
aws_attributes:
availability: "ON_DEMAND"
ebs_volume_type: "GENERAL_PURPOSE_SSD"
node_type_id: "i3.xlarge"
num_workers: 1

my_job_invalid:
tasks:
- task_key: "task2"
# Invalid enum value - should trigger warning
run_if: "INVALID_CONDITION"
notebook_task:
# Invalid enum value - should trigger warning
source: "INVALID_SOURCE"
notebook_path: "/path/to/notebook"
new_cluster:
# Invalid enum values - should trigger warnings
runtime_engine: "INVALID_ENGINE"
data_security_mode: "INVALID_MODE"
aws_attributes:
availability: "INVALID_AVAILABILITY"
ebs_volume_type: "INVALID_VOLUME_TYPE"
node_type_id: "i3.xlarge"
num_workers: 1
5 changes: 5 additions & 0 deletions acceptance/bundle/validate/enum/out.test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Local = true
Cloud = false

[EnvMatrix]
DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"]
37 changes: 37 additions & 0 deletions acceptance/bundle/validate/enum/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

>>> [CLI] bundle validate
Warning: invalid value "INVALID_AVAILABILITY" for enum field. Valid values are [ON_DEMAND SPOT SPOT_WITH_FALLBACK]
at resources.jobs.my_job_invalid.tasks[0].new_cluster.aws_attributes.availability
in databricks.yml:9:29

Warning: invalid value "INVALID_CONDITION" for enum field. Valid values are [ALL_DONE ALL_FAILED ALL_SUCCESS AT_LEAST_ONE_FAILED AT_LEAST_ONE_SUCCESS NONE_FAILED]
at resources.jobs.my_job_invalid.tasks[0].run_if
in databricks.yml:18:19

Warning: invalid value "INVALID_ENGINE" for enum field. Valid values are [NULL PHOTON STANDARD]
at resources.jobs.my_job_invalid.tasks[0].new_cluster.runtime_engine
in databricks.yml:14:29

Warning: invalid value "INVALID_MODE" for enum field. Valid values are [DATA_SECURITY_MODE_AUTO DATA_SECURITY_MODE_DEDICATED DATA_SECURITY_MODE_STANDARD LEGACY_PASSTHROUGH LEGACY_SINGLE_USER LEGACY_SINGLE_USER_STANDARD LEGACY_TABLE_ACL NONE SINGLE_USER USER_ISOLATION]
at resources.jobs.my_job_invalid.tasks[0].new_cluster.data_security_mode
in databricks.yml:11:33

Warning: invalid value "INVALID_SOURCE" for enum field. Valid values are [GIT WORKSPACE]
at resources.jobs.my_job_invalid.tasks[0].notebook_task.source
in databricks.yml:17:21

Warning: invalid value "INVALID_TYPE" for enum field. Valid values are [complex]
at variables.my_variable_invalid.type
in databricks.yml:42:11

Warning: invalid value "INVALID_VOLUME_TYPE" for enum field. Valid values are [GENERAL_PURPOSE_SSD THROUGHPUT_OPTIMIZED_HDD]
at resources.jobs.my_job_invalid.tasks[0].new_cluster.aws_attributes.ebs_volume_type
in databricks.yml:10:32

Name: test-bundle
Target: default
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default

Found 7 warnings
1 change: 1 addition & 0 deletions acceptance/bundle/validate/enum/script
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
trace $CLI bundle validate
8 changes: 8 additions & 0 deletions acceptance/bundle/validate/volume_defaults/output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ Warning: required field "schema_name" is not set
at resources.volumes.v2
in databricks.yml:8:7

Warning: invalid value "" for enum field. Valid values are [EXTERNAL MANAGED]
at resources.volumes.v1.volume_type
in databricks.yml:6:20

Warning: invalid value "already-set" for enum field. Valid values are [EXTERNAL MANAGED]
at resources.volumes.v2.volume_type
in databricks.yml:8:20

{
"v1": {
"volume_type": ""
Expand Down
95 changes: 95 additions & 0 deletions bundle/config/validate/enum.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package validate

import (
"context"
"fmt"
"slices"
"sort"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/internal/validation/generated"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)

type enum struct{}

func Enum() bundle.Mutator {
return &enum{}
}

func (f *enum) Name() string {
return "validate:enum"
}

func (f *enum) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
diags := diag.Diagnostics{}

// Generate prefix tree for all enum fields.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious how much time does it take. Can we add debug message with stats? "Prefix tree with 125 nodes generated in 125ms"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same for the actual validation below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the PR to include benchmarks for this. The times are pretty comparable to the required validation, with a really big bundle taking around 800ms.

The prefix tree itself takes 50 microseconds to generate.

We also now have actual hard data for per mutator times. From what I recall, the required validation rarely takes long enough to even register in the minimum 1ms threshold. We can monitor actual validation times that customers see to determine if this is something we need to optimize.

trie := &dyn.TrieNode{}
for k := range generated.EnumFields {
pattern, err := dyn.NewPatternFromString(k)
if err != nil {
return diag.FromErr(fmt.Errorf("invalid pattern %q for enum field validation: %w", k, err))
}

err = trie.Insert(pattern)
if err != nil {
return diag.FromErr(fmt.Errorf("failed to insert pattern %q into trie: %w", k, err))
}
}

err := dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error {
// If the path is not found in the prefix tree, we do not need to validate any enum
// fields in it.
pattern, ok := trie.SearchPath(p)
if !ok {
return nil
}

// Get the string value for comparison
strValue, ok := v.AsString()
if !ok {
return nil
}

// Get valid values for this pattern
validValues := generated.EnumFields[pattern.String()]

// Check if the value is in the list of valid enum values
validValue := slices.Contains(validValues, strValue)

if !validValue {
// p is a slice of path components. We need to clone it before using it in diagnostics
// since the WalkReadOnly function will mutate it while walking the config tree.
cloneP := slices.Clone(p)

diags = diags.Append(diag.Diagnostic{
Severity: diag.Warning,
Summary: fmt.Sprintf("invalid value %q for enum field. Valid values are %v", strValue, validValues),
Locations: v.Locations(),
Paths: []dyn.Path{cloneP},
})
}

return nil
})
if err != nil {
return diag.FromErr(err)
}

// Sort diagnostics to make them deterministic
sort.Slice(diags, func(i, j int) bool {
// First sort by summary
if diags[i].Summary != diags[j].Summary {
return diags[i].Summary < diags[j].Summary
}

// Then sort by locations as a tie breaker if summaries are the same.
iLocs := fmt.Sprintf("%v", diags[i].Locations)
jLocs := fmt.Sprintf("%v", diags[j].Locations)
return iLocs < jLocs
})

return diags
}
5 changes: 5 additions & 0 deletions bundle/internal/bundletest/benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,11 @@ func Bundle(b *testing.B, numJobs int) *bundle.Bundle {

myBundle := bundle.Bundle{
Config: config.Root{
Artifacts: config.Artifacts{
"artifact.whl": {
Type: "unsupported_type",
},
},
Resources: config.Resources{
Jobs: allJobs,
},
Expand Down
31 changes: 31 additions & 0 deletions bundle/internal/bundletest/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package bundletest
import (
"testing"

"github.com/databricks/cli/bundle/internal/validation/generated"
"github.com/databricks/cli/libs/dyn"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -30,3 +31,33 @@ func BenchmarkWalk(b *testing.B) {
assert.NoError(b, err)
}
}

// This took 49 microseconds to run on 6th Aug 2025.
func BenchmarkEnumPrefixTree(b *testing.B) {
for b.Loop() {
// Generate prefix tree for all enum fields.
trie := &dyn.TrieNode{}
for k := range generated.EnumFields {
pattern, err := dyn.NewPatternFromString(k)
assert.NoError(b, err)

err = trie.Insert(pattern)
assert.NoError(b, err)
}
}
}

// This took 15 microseconds to run on 6th Aug 2025.
func BenchmarkRequiredPrefixTree(b *testing.B) {
for b.Loop() {
// Generate prefix tree for all required fields.
trie := &dyn.TrieNode{}
for k := range generated.RequiredFields {
pattern, err := dyn.NewPatternFromString(k)
assert.NoError(b, err)

err = trie.Insert(pattern)
assert.NoError(b, err)
}
}
}
20 changes: 20 additions & 0 deletions bundle/internal/bundletest/mutator_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ func benchmarkRequiredMutator(b *testing.B, numJobs int) {
assert.NotEmpty(b, diags)
}

func benchmarkEnumMutator(b *testing.B, numJobs int) {
myBundle := Bundle(b, numJobs)

var diags diag.Diagnostics
for b.Loop() {
diags = bundle.Apply(context.Background(), myBundle, validate.Enum())
}
assert.NotEmpty(b, diags)
}

func benchmarkWalkReadOnlyBaseline(b *testing.B, numJobs int) {
myBundle := Bundle(b, numJobs)

Expand Down Expand Up @@ -72,3 +82,13 @@ func BenchmarkValidateRequired100(b *testing.B) {
func BenchmarkValidateRequired10(b *testing.B) {
benchmarkRequiredMutator(b, 10)
}

// This benchmark took 840ms to run on 6th Aug 2025.
func BenchmarkValidateEnum10000(b *testing.B) {
benchmarkEnumMutator(b, 10000)
}

// This benchmark took 84ms to run on 6th Aug 2025.
func BenchmarkValidateEnum1000(b *testing.B) {
benchmarkEnumMutator(b, 1000)
}
3 changes: 3 additions & 0 deletions bundle/phases/initialize.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ func Initialize(ctx context.Context, b *bundle.Bundle) {
// since they can also set and modify resources.
validate.Required(),

// Validate that all fields with enum values specified are set to a valid value.
validate.Enum(),

// Reads (typed): b.Config.Permissions (checks if current user or their groups have CAN_MANAGE permissions)
// Reads (typed): b.Config.Workspace.CurrentUser (gets current user information)
// Provides diagnostic recommendations if the current deployment identity isn't explicitly granted CAN_MANAGE permissions
Expand Down
Loading