From 415631e008d486f99eec9355f6110c4e73ab55d4 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 13 May 2025 20:38:20 +0200 Subject: [PATCH 1/2] [Python] Fix unicode support --- .../python/unicode-support/databricks.yml | 19 ++++++++ .../bundle/python/unicode-support/output.txt | 45 +++++++++++++++++++ .../python/unicode-support/resources.py | 13 ++++++ .../bundle/python/unicode-support/script | 6 +++ .../bundle/python/unicode-support/test.toml | 8 ++++ .../python/databricks/bundles/build.py | 14 +++--- .../python/databricks_tests/test_build.py | 9 ++++ 7 files changed, 107 insertions(+), 7 deletions(-) create mode 100644 acceptance/bundle/python/unicode-support/databricks.yml create mode 100644 acceptance/bundle/python/unicode-support/output.txt create mode 100644 acceptance/bundle/python/unicode-support/resources.py create mode 100644 acceptance/bundle/python/unicode-support/script create mode 100644 acceptance/bundle/python/unicode-support/test.toml diff --git a/acceptance/bundle/python/unicode-support/databricks.yml b/acceptance/bundle/python/unicode-support/databricks.yml new file mode 100644 index 0000000000..17a0838231 --- /dev/null +++ b/acceptance/bundle/python/unicode-support/databricks.yml @@ -0,0 +1,19 @@ +bundle: + name: my_project + +sync: { paths: [] } # don't need to copy files + +experimental: + python: + resources: + - "resources:load_resources" + +resources: + jobs: + job_1: + name: "🔥🔥🔥" + +variables: + my_variable: + default: "my_variable" + description: "🔥🔥🔥" diff --git a/acceptance/bundle/python/unicode-support/output.txt b/acceptance/bundle/python/unicode-support/output.txt new file mode 100644 index 0000000000..fc57d570c3 --- /dev/null +++ b/acceptance/bundle/python/unicode-support/output.txt @@ -0,0 +1,45 @@ + +>>> uv run --with-requirements requirements-latest.txt --no-cache -q [CLI] bundle validate --output json +Warning: This is a warning message with unicode characters: 🔥🔥🔥 + +{ + "variables": { + "my_variable": { + "default": "my_variable", + "description": "🔥🔥🔥", + "value": "my_variable" + } + }, + "resources": { + "jobs": { + "job_1": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_project/default/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "max_concurrent_runs": 1, + "name": "🔥🔥🔥", + "permissions": [], + "queue": { + "enabled": true + } + }, + "job_2": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_project/default/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "max_concurrent_runs": 1, + "name": "🔥🔥🔥", + "permissions": [], + "queue": { + "enabled": true + } + } + } + } +} diff --git a/acceptance/bundle/python/unicode-support/resources.py b/acceptance/bundle/python/unicode-support/resources.py new file mode 100644 index 0000000000..c925a01ff6 --- /dev/null +++ b/acceptance/bundle/python/unicode-support/resources.py @@ -0,0 +1,13 @@ +from databricks.bundles.core import Resources, Diagnostics + + +def load_resources() -> Resources: + resources = Resources() + + resources.add_job("job_2", {"name": "🔥🔥🔥"}) + + resources.add_diagnostics( + Diagnostics.create_warning("This is a warning message with unicode characters: 🔥🔥🔥"), + ) + + return resources diff --git a/acceptance/bundle/python/unicode-support/script b/acceptance/bundle/python/unicode-support/script new file mode 100644 index 0000000000..dedbee0812 --- /dev/null +++ b/acceptance/bundle/python/unicode-support/script @@ -0,0 +1,6 @@ +echo "$DATABRICKS_BUNDLES_WHEEL" > "requirements-latest.txt" + +trace uv run $UV_ARGS -q $CLI bundle validate --output json | \ + jq "pick(.variables, .resources)" + +rm -fr .databricks __pycache__ diff --git a/acceptance/bundle/python/unicode-support/test.toml b/acceptance/bundle/python/unicode-support/test.toml new file mode 100644 index 0000000000..d0ad877fb5 --- /dev/null +++ b/acceptance/bundle/python/unicode-support/test.toml @@ -0,0 +1,8 @@ +Local = true +Cloud = false # tests don't interact with APIs + +[EnvMatrix] +UV_ARGS = [ + # only fixed in the latest version + "--with-requirements requirements-latest.txt --no-cache", +] diff --git a/experimental/python/databricks/bundles/build.py b/experimental/python/databricks/bundles/build.py index 4d7a119a98..3deadff2f6 100644 --- a/experimental/python/databricks/bundles/build.py +++ b/experimental/python/databricks/bundles/build.py @@ -169,7 +169,7 @@ def _apply_mutators_for_type( def python_mutator( args: _Args, ) -> tuple[dict, dict[tuple[str, ...], Location], Diagnostics]: - input = json.load(open(args.input)) + input = json.load(open(args.input, encoding="utf-8")) experimental = input.get("experimental", {}) if experimental.get("pydabs", {}) != {}: @@ -446,14 +446,14 @@ def main(argv: list[str]) -> int: logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) new_bundle, locations, diagnostics = python_mutator(args) - with open(args.diagnostics, "w") as f: + with open(args.diagnostics, "w", encoding="utf-8") as f: _write_diagnostics(f, diagnostics) if locations_path := args.locations: - with open(locations_path, "w") as f: + with open(locations_path, "w", encoding="utf-8") as f: _write_locations(f, locations) - with open(args.output, "w") as f: + with open(args.output, "w", encoding="utf-8") as f: _write_output(f, new_bundle) return 1 if diagnostics.has_error() else 0 @@ -466,12 +466,12 @@ def _write_diagnostics(f: TextIO, diagnostics: Diagnostics) -> None: if obj.get("path"): obj["path"] = ".".join(obj["path"]) - json.dump(obj, f) + json.dump(obj, f, ensure_ascii=False) f.write("\n") def _write_output(f: TextIO, bundle: dict) -> None: - json.dump(bundle, f) + json.dump(bundle, f, ensure_ascii=False) def _relativize_locations( @@ -503,7 +503,7 @@ def _write_locations(f: TextIO, locations: dict[tuple[str, ...], Location]) -> N for path, location in locations.items(): obj = {"path": ".".join(path), **location.as_dict()} - json.dump(obj, f) + json.dump(obj, f, ensure_ascii=False) f.write("\n") diff --git a/experimental/python/databricks_tests/test_build.py b/experimental/python/databricks_tests/test_build.py index 5419d70d18..c0d57ff6a9 100644 --- a/experimental/python/databricks_tests/test_build.py +++ b/experimental/python/databricks_tests/test_build.py @@ -16,6 +16,7 @@ _relativize_location, _write_diagnostics, _write_locations, + _write_output, ) from databricks.bundles.core import ( Bundle, @@ -100,6 +101,14 @@ def test_write_location(): ) +def test_write_output_unicode(): + out = StringIO() + + _write_output(out, {"unicode": "🔥🔥🔥"}) + + assert out.getvalue() == '{"unicode": "🔥🔥🔥"}' + + def test_relativize_location(): file = Path("bar.py").absolute().as_posix() location = Location(file=file, line=42, column=1) From 69d17de66c66910c2b29de3ae71b4153081a42df Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 13 May 2025 20:59:20 +0200 Subject: [PATCH 2/2] Add changelog --- NEXT_CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 81fdfe430d..719e4724c6 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -13,5 +13,6 @@ * Preserve folder structure for app source code in bundle generate ([#2848](https://github.com/databricks/cli/pull/2848)) * Fixed normalising requirements file path in dependencies section ([#2861](https://github.com/databricks/cli/pull/2861)) * Fix default-python template not to add environments when serverless=yes and include\_python=no ([#2866](https://github.com/databricks/cli/pull/2866)) +* Fixed handling of Unicode characters in Python support ([#2873](https://github.com/databricks/cli/pull/2873)) ### API Changes