databricks · pietern · May 14, 2025 · May 13, 2025 · May 13, 2025 · denik
@@ -13,5 +13,6 @@
 * Preserve folder structure for app source code in bundle generate ([#2848](https://github.com/databricks/cli/pull/2848))
 * Fixed normalising requirements file path in dependencies section ([#2861](https://github.com/databricks/cli/pull/2861))
 * Fix default-python template not to add environments when serverless=yes and include\_python=no ([#2866](https://github.com/databricks/cli/pull/2866))
+* Fixed handling of Unicode characters in Python support ([#2873](https://github.com/databricks/cli/pull/2873))
 
 ### API Changes
@@ -0,0 +1,19 @@
+bundle:
+  name: my_project
+
+sync: { paths: [] } # don't need to copy files
+
+experimental:
+  python:
+    resources:
+      - "resources:load_resources"
+
+resources:
+  jobs:
+    job_1:
+      name: "🔥🔥🔥"
+
+variables:
+  my_variable:
+    default: "my_variable"
+    description: "🔥🔥🔥"
@@ -0,0 +1,45 @@
+
+>>> uv run --with-requirements requirements-latest.txt --no-cache -q [CLI] bundle validate --output json
+Warning: This is a warning message with unicode characters: 🔥🔥🔥
+
+{
+  "variables": {
+    "my_variable": {
+      "default": "my_variable",
+      "description": "🔥🔥🔥",
+      "value": "my_variable"
+    }
+  },
+  "resources": {
+    "jobs": {
+      "job_1": {
+        "deployment": {
+          "kind": "BUNDLE",
+          "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_project/default/state/metadata.json"
+        },
+        "edit_mode": "UI_LOCKED",
+        "format": "MULTI_TASK",
+        "max_concurrent_runs": 1,
+        "name": "🔥🔥🔥",
+        "permissions": [],
+        "queue": {
+          "enabled": true
+        }
+      },
+      "job_2": {
+        "deployment": {
+          "kind": "BUNDLE",
+          "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_project/default/state/metadata.json"
+        },
+        "edit_mode": "UI_LOCKED",
+        "format": "MULTI_TASK",
+        "max_concurrent_runs": 1,
+        "name": "🔥🔥🔥",
+        "permissions": [],
+        "queue": {
+          "enabled": true
+        }
+      }
+    }
+  }
+}
@@ -0,0 +1,13 @@
+from databricks.bundles.core import Resources, Diagnostics
+
+
+def load_resources() -> Resources:
+    resources = Resources()
+
+    resources.add_job("job_2", {"name": "🔥🔥🔥"})
+
+    resources.add_diagnostics(
+        Diagnostics.create_warning("This is a warning message with unicode characters: 🔥🔥🔥"),
+    )
+
+    return resources
@@ -0,0 +1,6 @@
+echo "$DATABRICKS_BUNDLES_WHEEL" > "requirements-latest.txt"
+
+trace uv run $UV_ARGS -q $CLI bundle validate --output json | \
+  jq "pick(.variables, .resources)"
+
+rm -fr .databricks __pycache__
@@ -0,0 +1,8 @@
+Local = true
+Cloud = false # tests don't interact with APIs
+
+[EnvMatrix]
+UV_ARGS = [
+    # only fixed in the latest version
+    "--with-requirements requirements-latest.txt --no-cache",
+]
@@ -169,7 +169,7 @@ def _apply_mutators_for_type(
 def python_mutator(
     args: _Args,
 ) -> tuple[dict, dict[tuple[str, ...], Location], Diagnostics]:
-    input = json.load(open(args.input))
+    input = json.load(open(args.input, encoding="utf-8"))
     experimental = input.get("experimental", {})
 
     if experimental.get("pydabs", {}) != {}:
@@ -446,14 +446,14 @@ def main(argv: list[str]) -> int:
     logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
     new_bundle, locations, diagnostics = python_mutator(args)
 
-    with open(args.diagnostics, "w") as f:
+    with open(args.diagnostics, "w", encoding="utf-8") as f:
         _write_diagnostics(f, diagnostics)
 
     if locations_path := args.locations:
-        with open(locations_path, "w") as f:
+        with open(locations_path, "w", encoding="utf-8") as f:
             _write_locations(f, locations)
 
-    with open(args.output, "w") as f:
+    with open(args.output, "w", encoding="utf-8") as f:
         _write_output(f, new_bundle)
 
     return 1 if diagnostics.has_error() else 0
@@ -466,12 +466,12 @@ def _write_diagnostics(f: TextIO, diagnostics: Diagnostics) -> None:
         if obj.get("path"):
             obj["path"] = ".".join(obj["path"])
 
-        json.dump(obj, f)
+        json.dump(obj, f, ensure_ascii=False)
         f.write("\n")
 
 
 def _write_output(f: TextIO, bundle: dict) -> None:
-    json.dump(bundle, f)
+    json.dump(bundle, f, ensure_ascii=False)
 
 
 def _relativize_locations(
@@ -503,7 +503,7 @@ def _write_locations(f: TextIO, locations: dict[tuple[str, ...], Location]) -> N
     for path, location in locations.items():
         obj = {"path": ".".join(path), **location.as_dict()}
 
-        json.dump(obj, f)
+        json.dump(obj, f, ensure_ascii=False)
         f.write("\n")
 
 

@@ -16,6 +16,7 @@
     _relativize_location,
     _write_diagnostics,
     _write_locations,
+    _write_output,
 )
 from databricks.bundles.core import (
     Bundle,
@@ -100,6 +101,14 @@ def test_write_location():
     )
 
 
+def test_write_output_unicode():
+    out = StringIO()
+
+    _write_output(out, {"unicode": "🔥🔥🔥"})
+
+    assert out.getvalue() == '{"unicode": "🔥🔥🔥"}'
+
+
 def test_relativize_location():
     file = Path("bar.py").absolute().as_posix()
     location = Location(file=file, line=42, column=1)