From 30934a30188c72ef14c5861fd7beecfc39434ecc Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Fri, 8 Aug 2025 13:54:01 +0200 Subject: [PATCH] [Python] Add volumes support --- NEXT_CHANGELOG.md | 3 + .../python/volumes-support/databricks.yml | 18 ++++ .../bundle/python/volumes-support/mutators.py | 11 +++ .../python/volumes-support/out.test.toml | 6 ++ .../bundle/python/volumes-support/output.txt | 30 ++++++ .../python/volumes-support/resources.py | 16 ++++ .../bundle/python/volumes-support/script | 6 ++ .../bundle/python/volumes-support/test.toml | 8 ++ .../python/codegen/codegen/packages.py | 2 + .../databricks/bundles/core/__init__.py | 4 +- .../bundles/core/_resource_mutator.py | 33 +++++++ .../databricks/bundles/core/_resource_type.py | 6 ++ .../databricks/bundles/core/_resources.py | 44 +++++++++ .../databricks/bundles/volumes/__init__.py | 25 +++++ .../bundles/volumes/_models/volume.py | 96 +++++++++++++++++++ .../bundles/volumes/_models/volume_grant.py | 40 ++++++++ .../volumes/_models/volume_grant_privilege.py | 16 ++++ .../bundles/volumes/_models/volume_type.py | 14 +++ .../databricks_tests/core/test_resources.py | 19 ++++ 19 files changed, 396 insertions(+), 1 deletion(-) create mode 100644 acceptance/bundle/python/volumes-support/databricks.yml create mode 100644 acceptance/bundle/python/volumes-support/mutators.py create mode 100644 acceptance/bundle/python/volumes-support/out.test.toml create mode 100644 acceptance/bundle/python/volumes-support/output.txt create mode 100644 acceptance/bundle/python/volumes-support/resources.py create mode 100644 acceptance/bundle/python/volumes-support/script create mode 100644 acceptance/bundle/python/volumes-support/test.toml create mode 100644 experimental/python/databricks/bundles/volumes/__init__.py create mode 100644 experimental/python/databricks/bundles/volumes/_models/volume.py create mode 100644 experimental/python/databricks/bundles/volumes/_models/volume_grant.py create mode 100644 experimental/python/databricks/bundles/volumes/_models/volume_grant_privilege.py create mode 100644 experimental/python/databricks/bundles/volumes/_models/volume_type.py diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index d00c200725..d53d3a2816 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -2,6 +2,9 @@ ## Release v0.266.0 +### Notable Changes +* Add support volumes in Python support ([#3383])(https://github.com/databricks/cli/pull/3383)) + ### CLI ### Dependency updates diff --git a/acceptance/bundle/python/volumes-support/databricks.yml b/acceptance/bundle/python/volumes-support/databricks.yml new file mode 100644 index 0000000000..93fb3c698e --- /dev/null +++ b/acceptance/bundle/python/volumes-support/databricks.yml @@ -0,0 +1,18 @@ +bundle: + name: my_project + +sync: {paths: []} # don't need to copy files + +experimental: + python: + resources: + - "resources:load_resources" + mutators: + - "mutators:update_volume" + +resources: + volumes: + my_volume_1: + name: "My Volume" + catalog_name: "my_catalog" + schema_name: "my_schema" diff --git a/acceptance/bundle/python/volumes-support/mutators.py b/acceptance/bundle/python/volumes-support/mutators.py new file mode 100644 index 0000000000..ff3eb1aabe --- /dev/null +++ b/acceptance/bundle/python/volumes-support/mutators.py @@ -0,0 +1,11 @@ +from dataclasses import replace + +from databricks.bundles.core import volume_mutator +from databricks.bundles.volumes import Volume + + +@volume_mutator +def update_volume(volume: Volume) -> Volume: + assert isinstance(volume.name, str) + + return replace(volume, name=f"{volume.name} (updated)") diff --git a/acceptance/bundle/python/volumes-support/out.test.toml b/acceptance/bundle/python/volumes-support/out.test.toml new file mode 100644 index 0000000000..b83fc9611b --- /dev/null +++ b/acceptance/bundle/python/volumes-support/out.test.toml @@ -0,0 +1,6 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] + UV_ARGS = ["--with-requirements requirements-latest.txt --no-cache"] diff --git a/acceptance/bundle/python/volumes-support/output.txt b/acceptance/bundle/python/volumes-support/output.txt new file mode 100644 index 0000000000..9320e9b78c --- /dev/null +++ b/acceptance/bundle/python/volumes-support/output.txt @@ -0,0 +1,30 @@ + +>>> uv run --with-requirements requirements-latest.txt --no-cache -q [CLI] bundle validate --output json +{ + "experimental": { + "python": { + "mutators": [ + "mutators:update_volume" + ], + "resources": [ + "resources:load_resources" + ] + } + }, + "resources": { + "volumes": { + "my_volume_1": { + "catalog_name": "my_catalog", + "name": "My Volume (updated)", + "schema_name": "my_schema", + "volume_type": "MANAGED" + }, + "my_volume_2": { + "catalog_name": "my_catalog_2", + "name": "My Volume (2) (updated)", + "schema_name": "my_schema_2", + "volume_type": "MANAGED" + } + } + } +} diff --git a/acceptance/bundle/python/volumes-support/resources.py b/acceptance/bundle/python/volumes-support/resources.py new file mode 100644 index 0000000000..8d91e99c43 --- /dev/null +++ b/acceptance/bundle/python/volumes-support/resources.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import Resources + + +def load_resources() -> Resources: + resources = Resources() + + resources.add_volume( + "my_volume_2", + { + "name": "My Volume (2)", + "catalog_name": "my_catalog_2", + "schema_name": "my_schema_2", + }, + ) + + return resources diff --git a/acceptance/bundle/python/volumes-support/script b/acceptance/bundle/python/volumes-support/script new file mode 100644 index 0000000000..c94fd42a0c --- /dev/null +++ b/acceptance/bundle/python/volumes-support/script @@ -0,0 +1,6 @@ +echo "$DATABRICKS_BUNDLES_WHEEL" > "requirements-latest.txt" + +trace uv run $UV_ARGS -q $CLI bundle validate --output json | \ + jq "pick(.experimental.python, .resources)" + +rm -fr .databricks __pycache__ diff --git a/acceptance/bundle/python/volumes-support/test.toml b/acceptance/bundle/python/volumes-support/test.toml new file mode 100644 index 0000000000..b43fe72b07 --- /dev/null +++ b/acceptance/bundle/python/volumes-support/test.toml @@ -0,0 +1,8 @@ +Local = true +Cloud = false # tests don't interact with APIs + +[EnvMatrix] +UV_ARGS = [ + # pipelines are only supported in the latest version of the wheel + "--with-requirements requirements-latest.txt --no-cache", +] diff --git a/experimental/python/codegen/codegen/packages.py b/experimental/python/codegen/codegen/packages.py index d0a14396f2..ef4e607ed0 100644 --- a/experimental/python/codegen/codegen/packages.py +++ b/experimental/python/codegen/codegen/packages.py @@ -5,6 +5,7 @@ RESOURCE_NAMESPACE = { "resources.Job": "jobs", "resources.Pipeline": "pipelines", + "resources.Volume": "volumes", } RESOURCE_TYPES = list(RESOURCE_NAMESPACE.keys()) @@ -17,6 +18,7 @@ "jobs", "pipelines", "resources", + "catalog", ] RENAMES = { diff --git a/experimental/python/databricks/bundles/core/__init__.py b/experimental/python/databricks/bundles/core/__init__.py index 3f7bb77038..0f5c3208cd 100644 --- a/experimental/python/databricks/bundles/core/__init__.py +++ b/experimental/python/databricks/bundles/core/__init__.py @@ -13,12 +13,13 @@ "VariableOrList", "VariableOrOptional", "job_mutator", - "pipeline_mutator", "load_resources_from_current_package_module", "load_resources_from_module", "load_resources_from_modules", "load_resources_from_package_module", + "pipeline_mutator", "variables", + "volume_mutator", ] from databricks.bundles.core._bundle import Bundle @@ -39,6 +40,7 @@ ResourceMutator, job_mutator, pipeline_mutator, + volume_mutator, ) from databricks.bundles.core._resources import Resources from databricks.bundles.core._variable import ( diff --git a/experimental/python/databricks/bundles/core/_resource_mutator.py b/experimental/python/databricks/bundles/core/_resource_mutator.py index 5f6a0a2fc4..da0c62691c 100644 --- a/experimental/python/databricks/bundles/core/_resource_mutator.py +++ b/experimental/python/databricks/bundles/core/_resource_mutator.py @@ -8,6 +8,7 @@ if TYPE_CHECKING: from databricks.bundles.jobs._models.job import Job from databricks.bundles.pipelines._models.pipeline import Pipeline + from databricks.bundles.volumes._models.volume import Volume _T = TypeVar("_T", bound=Resource) @@ -127,3 +128,35 @@ def my_pipeline_mutator(bundle: Bundle, pipeline: Pipeline) -> Pipeline: from databricks.bundles.pipelines._models.pipeline import Pipeline return ResourceMutator(resource_type=Pipeline, function=function) + + +@overload +def volume_mutator( + function: Callable[[Bundle, "Volume"], "Volume"], +) -> ResourceMutator["Volume"]: ... + + +@overload +def volume_mutator( + function: Callable[["Volume"], "Volume"], +) -> ResourceMutator["Volume"]: ... + + +def volume_mutator(function: Callable) -> ResourceMutator["Volume"]: + """ + Decorator for defining a volume mutator. Function should return a new instance of the volume with the desired changes, + instead of mutating the input volume. + + Example: + + .. code-block:: python + + @volume_mutator + def my_volume_mutator(bundle: Bundle, volume: Volume) -> Volume: + return replace(volume, name="my_volume") + + :param function: Function that mutates a volume. + """ + from databricks.bundles.volumes._models.volume import Volume + + return ResourceMutator(resource_type=Volume, function=function) diff --git a/experimental/python/databricks/bundles/core/_resource_type.py b/experimental/python/databricks/bundles/core/_resource_type.py index 427ce552c6..1f06c62f4f 100644 --- a/experimental/python/databricks/bundles/core/_resource_type.py +++ b/experimental/python/databricks/bundles/core/_resource_type.py @@ -2,6 +2,7 @@ from typing import Type from databricks.bundles.core._resource import Resource +from databricks.bundles.volumes._models.volume import Volume @dataclass(kw_only=True, frozen=True) @@ -45,4 +46,9 @@ def all(cls) -> tuple["_ResourceType", ...]: plural_name="pipelines", singular_name="pipeline", ), + _ResourceType( + resource_type=Volume, + plural_name="volumes", + singular_name="volume", + ), ) diff --git a/experimental/python/databricks/bundles/core/_resources.py b/experimental/python/databricks/bundles/core/_resources.py index fd776b8852..dd231f3d3c 100644 --- a/experimental/python/databricks/bundles/core/_resources.py +++ b/experimental/python/databricks/bundles/core/_resources.py @@ -8,6 +8,7 @@ if TYPE_CHECKING: from databricks.bundles.jobs._models.job import Job, JobParam from databricks.bundles.pipelines._models.pipeline import Pipeline, PipelineParam + from databricks.bundles.volumes._models.volume import Volume, VolumeParam __all__ = ["Resources"] @@ -57,6 +58,7 @@ def load_resources(bundle: Bundle) -> Resources: def __init__(self): self._jobs = dict[str, "Job"]() self._pipelines = dict[str, "Pipeline"]() + self._volumes = dict[str, "Volume"]() self._locations = dict[tuple[str, ...], Location]() self._diagnostics = Diagnostics() @@ -68,6 +70,10 @@ def jobs(self) -> dict[str, "Job"]: def pipelines(self) -> dict[str, "Pipeline"]: return self._pipelines + @property + def volumes(self) -> dict[str, "Volume"]: + return self._volumes + @property def diagnostics(self) -> Diagnostics: """ @@ -93,6 +99,7 @@ def add_resource( from databricks.bundles.jobs import Job from databricks.bundles.pipelines import Pipeline + from databricks.bundles.volumes import Volume location = location or Location.from_stack_frame(depth=1) @@ -101,6 +108,8 @@ def add_resource( self.add_job(resource_name, resource, location=location) case Pipeline(): self.add_pipeline(resource_name, resource, location=location) + case Volume(): + self.add_volume(resource_name, resource, location=location) case _: raise ValueError(f"Unsupported resource type: {type(resource)}") @@ -168,6 +177,38 @@ def add_pipeline( self._pipelines[resource_name] = pipeline + def add_volume( + self, + resource_name: str, + volume: "VolumeParam", + *, + location: Optional[Location] = None, + ) -> None: + """ + Adds a volume to the collection of resources. Resource name must be unique across all volumes. + + :param resource_name: unique identifier for the volume + :param volume: the volume to add, can be Volume or dict + :param location: optional location of the volume in the source code + """ + from databricks.bundles.volumes import Volume + + volume = _transform(Volume, volume) + path = ("resources", "volumes", resource_name) + location = location or Location.from_stack_frame(depth=1) + + if self._volumes.get(resource_name): + self.add_diagnostic_error( + msg=f"Duplicate resource name '{resource_name}' for a volume. Resource names must be unique.", + location=location, + path=path, + ) + else: + if location: + self.add_location(path, location) + + self._volumes[resource_name] = volume + def add_location(self, path: tuple[str, ...], location: Location) -> None: """ Associate source code location with a path in the bundle configuration. @@ -244,6 +285,9 @@ def add_resources(self, other: "Resources") -> None: for name, pipeline in other.pipelines.items(): self.add_pipeline(name, pipeline) + for name, volume in other.volumes.items(): + self.add_volume(name, volume) + for path, location in other._locations.items(): self.add_location(path, location) diff --git a/experimental/python/databricks/bundles/volumes/__init__.py b/experimental/python/databricks/bundles/volumes/__init__.py new file mode 100644 index 0000000000..177e6480b4 --- /dev/null +++ b/experimental/python/databricks/bundles/volumes/__init__.py @@ -0,0 +1,25 @@ +__all__ = [ + "Volume", + "VolumeDict", + "VolumeGrant", + "VolumeGrantDict", + "VolumeGrantParam", + "VolumeGrantPrivilege", + "VolumeGrantPrivilegeParam", + "VolumeParam", + "VolumeType", + "VolumeTypeParam", +] + + +from databricks.bundles.volumes._models.volume import Volume, VolumeDict, VolumeParam +from databricks.bundles.volumes._models.volume_grant import ( + VolumeGrant, + VolumeGrantDict, + VolumeGrantParam, +) +from databricks.bundles.volumes._models.volume_grant_privilege import ( + VolumeGrantPrivilege, + VolumeGrantPrivilegeParam, +) +from databricks.bundles.volumes._models.volume_type import VolumeType, VolumeTypeParam diff --git a/experimental/python/databricks/bundles/volumes/_models/volume.py b/experimental/python/databricks/bundles/volumes/_models/volume.py new file mode 100644 index 0000000000..65c5c7ab2f --- /dev/null +++ b/experimental/python/databricks/bundles/volumes/_models/volume.py @@ -0,0 +1,96 @@ +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, TypedDict + +from databricks.bundles.core._resource import Resource +from databricks.bundles.core._transform import _transform +from databricks.bundles.core._transform_to_json import _transform_to_json_value +from databricks.bundles.core._variable import ( + VariableOr, + VariableOrList, + VariableOrOptional, +) +from databricks.bundles.volumes._models.volume_grant import ( + VolumeGrant, + VolumeGrantParam, +) +from databricks.bundles.volumes._models.volume_type import VolumeType, VolumeTypeParam + +if TYPE_CHECKING: + from typing_extensions import Self + + +@dataclass(kw_only=True) +class Volume(Resource): + """""" + + catalog_name: VariableOr[str] + """ + The name of the catalog where the schema and the volume are + """ + + name: VariableOr[str] + """ + The name of the volume + """ + + schema_name: VariableOr[str] + """ + The name of the schema where the volume is + """ + + comment: VariableOrOptional[str] = None + """ + The comment attached to the volume + """ + + grants: VariableOrList[VolumeGrant] = field(default_factory=list) + + storage_location: VariableOrOptional[str] = None + """ + The storage location on the cloud + """ + + volume_type: VariableOrOptional[VolumeType] = None + + @classmethod + def from_dict(cls, value: "VolumeDict") -> "Self": + return _transform(cls, value) + + def as_dict(self) -> "VolumeDict": + return _transform_to_json_value(self) # type:ignore + + +class VolumeDict(TypedDict, total=False): + """""" + + catalog_name: VariableOr[str] + """ + The name of the catalog where the schema and the volume are + """ + + name: VariableOr[str] + """ + The name of the volume + """ + + schema_name: VariableOr[str] + """ + The name of the schema where the volume is + """ + + comment: VariableOrOptional[str] + """ + The comment attached to the volume + """ + + grants: VariableOrList[VolumeGrantParam] + + storage_location: VariableOrOptional[str] + """ + The storage location on the cloud + """ + + volume_type: VariableOrOptional[VolumeTypeParam] + + +VolumeParam = VolumeDict | Volume diff --git a/experimental/python/databricks/bundles/volumes/_models/volume_grant.py b/experimental/python/databricks/bundles/volumes/_models/volume_grant.py new file mode 100644 index 0000000000..3bd580371e --- /dev/null +++ b/experimental/python/databricks/bundles/volumes/_models/volume_grant.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, TypedDict + +from databricks.bundles.core._transform import _transform +from databricks.bundles.core._transform_to_json import _transform_to_json_value +from databricks.bundles.core._variable import VariableOr, VariableOrList +from databricks.bundles.volumes._models.volume_grant_privilege import ( + VolumeGrantPrivilege, + VolumeGrantPrivilegeParam, +) + +if TYPE_CHECKING: + from typing_extensions import Self + + +@dataclass(kw_only=True) +class VolumeGrant: + """""" + + principal: VariableOr[str] + + privileges: VariableOrList[VolumeGrantPrivilege] = field(default_factory=list) + + @classmethod + def from_dict(cls, value: "VolumeGrantDict") -> "Self": + return _transform(cls, value) + + def as_dict(self) -> "VolumeGrantDict": + return _transform_to_json_value(self) # type:ignore + + +class VolumeGrantDict(TypedDict, total=False): + """""" + + principal: VariableOr[str] + + privileges: VariableOrList[VolumeGrantPrivilegeParam] + + +VolumeGrantParam = VolumeGrantDict | VolumeGrant diff --git a/experimental/python/databricks/bundles/volumes/_models/volume_grant_privilege.py b/experimental/python/databricks/bundles/volumes/_models/volume_grant_privilege.py new file mode 100644 index 0000000000..d758ca6d82 --- /dev/null +++ b/experimental/python/databricks/bundles/volumes/_models/volume_grant_privilege.py @@ -0,0 +1,16 @@ +from enum import Enum +from typing import Literal + + +class VolumeGrantPrivilege(Enum): + ALL_PRIVILEGES = "ALL_PRIVILEGES" + APPLY_TAG = "APPLY_TAG" + MANAGE = "MANAGE" + READ_VOLUME = "READ_VOLUME" + WRITE_VOLUME = "WRITE_VOLUME" + + +VolumeGrantPrivilegeParam = ( + Literal["ALL_PRIVILEGES", "APPLY_TAG", "MANAGE", "READ_VOLUME", "WRITE_VOLUME"] + | VolumeGrantPrivilege +) diff --git a/experimental/python/databricks/bundles/volumes/_models/volume_type.py b/experimental/python/databricks/bundles/volumes/_models/volume_type.py new file mode 100644 index 0000000000..1b9bcd1089 --- /dev/null +++ b/experimental/python/databricks/bundles/volumes/_models/volume_type.py @@ -0,0 +1,14 @@ +from enum import Enum +from typing import Literal + + +class VolumeType(Enum): + """ + The type of the volume. An external volume is located in the specified external location. A managed volume is located in the default location which is specified by the parent schema, or the parent catalog, or the Metastore. [Learn more](https://docs.databricks.com/aws/en/volumes/managed-vs-external) + """ + + EXTERNAL = "EXTERNAL" + MANAGED = "MANAGED" + + +VolumeTypeParam = Literal["EXTERNAL", "MANAGED"] | VolumeType diff --git a/experimental/python/databricks_tests/core/test_resources.py b/experimental/python/databricks_tests/core/test_resources.py index 8cd721a2b7..a20fe5a4ae 100644 --- a/experimental/python/databricks_tests/core/test_resources.py +++ b/experimental/python/databricks_tests/core/test_resources.py @@ -10,10 +10,12 @@ ResourceMutator, job_mutator, pipeline_mutator, + volume_mutator, ) from databricks.bundles.core._resource_type import _ResourceType from databricks.bundles.jobs._models.job import Job from databricks.bundles.pipelines._models.pipeline import Pipeline +from databricks.bundles.volumes._models.volume import Volume @dataclass(kw_only=True) @@ -44,6 +46,23 @@ class TestCase: ), resource_types[Pipeline], ), + ( + TestCase( + add_resource=Resources.add_volume, + dict_example={ + "name": "My Volume", + "catalog_name": "my_catalog", + "schema_name": "my_schema", + }, + dataclass_example=Volume( + catalog_name="my_catalog", + name="My Volume", + schema_name="my_schema", + ), + mutator=volume_mutator, + ), + resource_types[Volume], + ), ] test_case_ids = [tpe.plural_name for _, tpe in test_cases]