Skip to content

Commit 2c6f68b

Browse files
authored
[Python] Initialize and normalize resources before/after PythonMutator (#2590)
## Changes Initialize and normalize resources before/after `PythonMutator`. Create `ResourceProcessor` that applies `initialize` and `normalize` for resources. It should be used by mutators after they have added or modified resources. - when a new resource its normalized and then initialized - each resource is initialized only once - each time a resource is updated, it's normalized Implementation takes a slice of bundle configuration containing added/modified resources and runs a set of mutators on it. After that, only resources are merged back. If initialize or normalize resource mutators change bundle configuration outside if "resources", such changes are discarded. Stacked on top of #2637 ## Why Using `ResourceProcessor,` we fix problems with `PythonMutator` where either input or output wasn't normalized, default configuration wasn't applied, or it was impossible to override the default configuration (e.g., remove the `[dev` prefix). ## Tests Acceptance and unit tests
1 parent 80a10b8 commit 2c6f68b

18 files changed

+660
-180
lines changed

acceptance/bundle/debug/out.stderr.txt

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -34,30 +34,17 @@
3434
10:07:59 Debug: Apply pid=12345 mutator=PrependWorkspacePrefix
3535
10:07:59 Debug: Apply pid=12345 mutator=RewriteWorkspacePrefix
3636
10:07:59 Debug: Apply pid=12345 mutator=SetVariables
37-
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load)
38-
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(init)
39-
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources)
40-
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators)
4137
10:07:59 Debug: Apply pid=12345 mutator=ResolveVariableReferences
4238
10:07:59 Debug: Apply pid=12345 mutator=ResolveResourceReferences
4339
10:07:59 Debug: Apply pid=12345 mutator=ResolveVariableReferences
44-
10:07:59 Debug: Apply pid=12345 mutator=ResolveVariableReferences(resources)
40+
10:07:59 Debug: Apply pid=12345 mutator=ApplyTargetMode
41+
10:07:59 Debug: Apply pid=12345 mutator=ProcessStaticResources
4542
10:07:59 Debug: Apply pid=12345 mutator=NormalizePaths
46-
10:07:59 Debug: Apply pid=12345 mutator=ExpandPipelineGlobPaths
47-
10:07:59 Debug: Apply pid=12345 mutator=MergeJobClusters
48-
10:07:59 Debug: Apply pid=12345 mutator=MergeJobParameters
49-
10:07:59 Debug: Apply pid=12345 mutator=MergeJobTasks
50-
10:07:59 Debug: Apply pid=12345 mutator=MergePipelineClusters
51-
10:07:59 Debug: Apply pid=12345 mutator=MergeApps
52-
10:07:59 Debug: Apply pid=12345 mutator=CaptureSchemaDependency
43+
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load)
44+
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(init)
45+
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources)
46+
10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators)
5347
10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions
54-
10:07:59 Debug: Apply pid=12345 mutator=SetRunAs
55-
10:07:59 Debug: Apply pid=12345 mutator=OverrideCompute
56-
10:07:59 Debug: Apply pid=12345 mutator=ConfigureDashboardDefaults
57-
10:07:59 Debug: Apply pid=12345 mutator=ConfigureVolumeDefaults
58-
10:07:59 Debug: Apply pid=12345 mutator=ApplyTargetMode
59-
10:07:59 Debug: Apply pid=12345 mutator=ApplyPresets
60-
10:07:59 Debug: Apply pid=12345 mutator=DefaultQueueing
6148
10:07:59 Debug: Apply pid=12345 mutator=ConfigureWSFS
6249
10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths
6350
10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning
@@ -66,8 +53,6 @@
6653
10:07:59 Debug: Apply pid=12345 mutator=apps.Validate
6754
10:07:59 Debug: Apply pid=12345 mutator=ValidateTargetMode
6855
10:07:59 Debug: Apply pid=12345 mutator=ValidateSharedRootPermissions
69-
10:07:59 Debug: Apply pid=12345 mutator=ApplyBundlePermissions
70-
10:07:59 Debug: Apply pid=12345 mutator=FilterCurrentUserFromPermissions
7156
10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotateJobs
7257
10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotatePipelines
7358
10:07:59 Debug: Apply pid=12345 mutator=terraform.Initialize

acceptance/bundle/override/job_tasks/output.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"name": "job",
3+
"permissions": [],
34
"queue": {
45
"enabled": true
56
},
@@ -35,6 +36,7 @@ Error: file test1.py not found
3536
Exit code: 1
3637
{
3738
"name": "job",
39+
"permissions": [],
3840
"queue": {
3941
"enabled": true
4042
},

acceptance/bundle/python/resolve-variable/databricks.yml

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,8 @@ variables:
1313
default: "abc"
1414
int_variable:
1515
default: 42
16-
# FIXME doesn't work, resolve variables before running PythonMutator
17-
#
18-
# > cannot interpolate non-string value: ${var.int_variable}
19-
#
20-
# nested_variable:
21-
# default: ${var.string_variable} ${var.int_variable}
16+
nested_variable:
17+
default: ${var.string_variable} ${var.string_variable}
2218
bool_variable_true:
2319
default: true
2420
bool_variable_false:
@@ -31,12 +27,12 @@ variables:
3127
default:
3228
task_key: "abc"
3329
notebook_task:
34-
notebook_path: "cde"
30+
notebook_path: "/Workspace/cde"
3531
complex_list_variable:
3632
default:
3733
- task_key: "abc"
3834
notebook_task:
39-
notebook_path: "cde"
35+
notebook_path: "/Workspace/cde"
4036
- task_key: "def"
4137
notebook_task:
42-
notebook_path: "ghi"
38+
notebook_path: "/Workspace/ghi"

acceptance/bundle/python/resolve-variable/output.txt

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,21 @@
2222
"queue": {
2323
"enabled": true
2424
},
25-
"tags": {}
25+
"tags": {},
26+
"tasks": [
27+
{
28+
"notebook_task": {
29+
"notebook_path": "/Workspace/cde"
30+
},
31+
"task_key": "abc"
32+
},
33+
{
34+
"notebook_task": {
35+
"notebook_path": "/Workspace/ghi"
36+
},
37+
"task_key": "def"
38+
}
39+
]
2640
}
2741
}
2842
}

acceptance/bundle/python/resolve-variable/resources.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,8 @@ def load_resources(bundle: Bundle) -> Resources:
2222
int_variable = bundle.resolve_variable(Variables.int_variable)
2323
assert int_variable == 42
2424

25-
# FIXME resolve variables before running PythonMutator
26-
# nested_variable = bundle.resolve_variable(Variables.nested_variable)
27-
# assert nested_variable == "abc 42"
25+
nested_variable = bundle.resolve_variable(Variables.nested_variable)
26+
assert nested_variable == "abc abc"
2827

2928
bool_variable_true = bundle.resolve_variable(Variables.bool_variable_true)
3029
assert bool_variable_true
@@ -39,26 +38,24 @@ def load_resources(bundle: Bundle) -> Resources:
3938
assert dict_variable == {"a": 1, "b": 2}
4039

4140
complex_variable = bundle.resolve_variable(Variables.complex_variable)
42-
assert complex_variable == Task(task_key="abc", notebook_task=NotebookTask(notebook_path="cde"))
41+
assert complex_variable == Task(
42+
task_key="abc",
43+
notebook_task=NotebookTask(notebook_path="/Workspace/cde"),
44+
)
4345

4446
complex_list_variable = bundle.resolve_variable(Variables.complex_list_variable)
4547
print(complex_list_variable)
4648
assert complex_list_variable == [
47-
Task(task_key="abc", notebook_task=NotebookTask(notebook_path="cde")),
48-
Task(task_key="def", notebook_task=NotebookTask(notebook_path="ghi")),
49+
Task(task_key="abc", notebook_task=NotebookTask(notebook_path="/Workspace/cde")),
50+
Task(task_key="def", notebook_task=NotebookTask(notebook_path="/Workspace/ghi")),
4951
]
5052

5153
resources = Resources()
5254
resources.add_job(
5355
"my_job",
5456
Job(
5557
name=Variables.string_variable,
56-
# FIXME we can't output complex variables in place where we override locations
57-
# with bundle root path. See loadOutput in python_mutator.go
58-
#
59-
# > failed to update locations: expected a sequence at "resources.jobs.my_job.tasks", found string
60-
#
61-
# tasks=Variables.complex_list_variable,
58+
tasks=Variables.complex_list_variable,
6259
),
6360
)
6461

bundle/config/mutator/python/python_locations_test.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -153,16 +153,6 @@ func TestLoadOutput(t *testing.T) {
153153
name, err := dyn.Get(value, "resources.jobs.my_job.name")
154154
require.NoError(t, err)
155155
require.Equal(t, []dyn.Location{location}, name.Locations())
156-
157-
// until we implement path normalization, we have to keep locations of values
158-
// that change semantic depending on their location
159-
//
160-
// note: it's important to have absolute path including 'bundleRoot'
161-
// because mutator pipeline already has expanded locations into absolute path
162-
notebookPath, err := dyn.Get(value, "resources.jobs.my_job.tasks[0].notebook_task.notebook_path")
163-
require.NoError(t, err)
164-
require.Len(t, notebookPath.Locations(), 1)
165-
require.Equal(t, filepath.Join(bundleRoot, generatedFileName), notebookPath.Locations()[0].File)
166156
}
167157

168158
func TestParsePythonLocations_absolutePath(t *testing.T) {

bundle/config/mutator/python/python_mutator.go

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ import (
1313
"reflect"
1414
"strings"
1515

16-
"github.com/databricks/cli/libs/log"
16+
"github.com/databricks/cli/bundle/config/mutator/resourcemutator"
1717

18-
"github.com/databricks/cli/bundle/config/mutator/paths"
18+
"github.com/databricks/cli/libs/log"
1919

2020
"github.com/databricks/databricks-sdk-go/logger"
2121
"github.com/fatih/color"
@@ -200,6 +200,7 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
200200

201201
// mutateDiags is used because Mutate returns 'error' instead of 'diag.Diagnostics'
202202
var mutateDiags diag.Diagnostics
203+
var result applyPythonOutputResult
203204
mutateDiagsHasError := errors.New("unexpected error")
204205

205206
err = b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) {
@@ -224,9 +225,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
224225
return dyn.InvalidValue, mutateDiagsHasError
225226
}
226227

227-
newRoot, result, err := applyPythonOutput(leftRoot, rightRoot)
228+
newRoot, result0, err := applyPythonOutput(leftRoot, rightRoot)
229+
result = result0
228230
if err != nil {
229-
return dyn.InvalidValue, err
231+
return dyn.InvalidValue, fmt.Errorf("internal error when merging output of Python mutator: %w", err)
230232
}
231233

232234
for _, resourceKey := range result.AddedResources.ToArray() {
@@ -261,10 +263,21 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
261263
panic("mutateDiags has no error, but error is expected")
262264
}
263265

266+
return mutateDiags
267+
} else {
268+
mutateDiags = mutateDiags.Extend(diag.FromErr(err))
269+
}
270+
271+
if mutateDiags.HasError() {
264272
return mutateDiags
265273
}
266274

267-
return mutateDiags.Extend(diag.FromErr(err))
275+
mutateDiags = mutateDiags.Extend(resourcemutator.NormalizeAndInitializeResources(ctx, b, result.AddedResources))
276+
if mutateDiags.HasError() {
277+
return mutateDiags
278+
}
279+
280+
return mutateDiags.Extend(resourcemutator.NormalizeResources(ctx, b, result.UpdatedResources))
268281
}
269282

270283
func createCacheDir(ctx context.Context) (string, error) {
@@ -453,21 +466,6 @@ func loadOutput(rootPath string, outputFile io.Reader, locations *pythonLocation
453466
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err))
454467
}
455468

456-
// paths are resolved relative to locations of their values, if we change location
457-
// we have to update each path, until we simplify that, we don't update locations
458-
// for such values, so we don't change how paths are resolved
459-
//
460-
// we can remove this once we:
461-
// - add variable interpolation before and after PythonMutator
462-
// - implement path normalization (aka path normal form)
463-
_, err = paths.VisitJobPaths(generated, func(p dyn.Path, mode paths.TranslateMode, v dyn.Value) (dyn.Value, error) {
464-
putPythonLocation(locations, p, v.Location())
465-
return v, nil
466-
})
467-
if err != nil {
468-
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
469-
}
470-
471469
// generated has dyn.Location as if it comes from generated YAML file
472470
// earlier we loaded locations.json with source locations in Python code
473471
generatedWithLocations, err := mergePythonLocations(generated, locations)

bundle/config/mutator/apply_target_mode.go renamed to bundle/config/mutator/resourcemutator/apply_target_mode.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package mutator
1+
package resourcemutator
22

33
import (
44
"context"

0 commit comments

Comments
 (0)