diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index cc3cc33db5..ad2c0ea2ad 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -11,5 +11,6 @@ ### Dependency updates ### Bundles +* Add support for schemas in Python support ([#3389])(https://github.com/databricks/cli/pull/3389)) ### API Changes diff --git a/acceptance/bundle/python/schemas-support/databricks.yml b/acceptance/bundle/python/schemas-support/databricks.yml new file mode 100644 index 0000000000..525f68000f --- /dev/null +++ b/acceptance/bundle/python/schemas-support/databricks.yml @@ -0,0 +1,17 @@ +bundle: + name: my_project + +sync: {paths: []} # don't need to copy files + +experimental: + python: + resources: + - "resources:load_resources" + mutators: + - "mutators:update_schema" + +resources: + schemas: + my_schema_1: + name: "My Schema" + catalog_name: "my_catalog" diff --git a/acceptance/bundle/python/schemas-support/mutators.py b/acceptance/bundle/python/schemas-support/mutators.py new file mode 100644 index 0000000000..95eab74fa1 --- /dev/null +++ b/acceptance/bundle/python/schemas-support/mutators.py @@ -0,0 +1,11 @@ +from dataclasses import replace + +from databricks.bundles.core import schema_mutator +from databricks.bundles.schemas import Schema + + +@schema_mutator +def update_schema(schema: Schema) -> Schema: + assert isinstance(schema.name, str) + + return replace(schema, name=f"{schema.name} (updated)") diff --git a/acceptance/bundle/python/schemas-support/out.test.toml b/acceptance/bundle/python/schemas-support/out.test.toml new file mode 100644 index 0000000000..b83fc9611b --- /dev/null +++ b/acceptance/bundle/python/schemas-support/out.test.toml @@ -0,0 +1,6 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] + UV_ARGS = ["--with-requirements requirements-latest.txt --no-cache"] diff --git a/acceptance/bundle/python/schemas-support/output.txt b/acceptance/bundle/python/schemas-support/output.txt new file mode 100644 index 0000000000..660bf2322b --- /dev/null +++ b/acceptance/bundle/python/schemas-support/output.txt @@ -0,0 +1,26 @@ + +>>> uv run --with-requirements requirements-latest.txt --no-cache -q [CLI] bundle validate --output json +{ + "experimental": { + "python": { + "mutators": [ + "mutators:update_schema" + ], + "resources": [ + "resources:load_resources" + ] + } + }, + "resources": { + "schemas": { + "my_schema_1": { + "catalog_name": "my_catalog", + "name": "My Schema (updated)" + }, + "my_schema_2": { + "catalog_name": "my_catalog_2", + "name": "My Schema (2) (updated)" + } + } + } +} diff --git a/acceptance/bundle/python/schemas-support/resources.py b/acceptance/bundle/python/schemas-support/resources.py new file mode 100644 index 0000000000..4431428e04 --- /dev/null +++ b/acceptance/bundle/python/schemas-support/resources.py @@ -0,0 +1,15 @@ +from databricks.bundles.core import Resources + + +def load_resources() -> Resources: + resources = Resources() + + resources.add_schema( + "my_schema_2", + { + "name": "My Schema (2)", + "catalog_name": "my_catalog_2", + }, + ) + + return resources diff --git a/acceptance/bundle/python/schemas-support/script b/acceptance/bundle/python/schemas-support/script new file mode 100644 index 0000000000..c94fd42a0c --- /dev/null +++ b/acceptance/bundle/python/schemas-support/script @@ -0,0 +1,6 @@ +echo "$DATABRICKS_BUNDLES_WHEEL" > "requirements-latest.txt" + +trace uv run $UV_ARGS -q $CLI bundle validate --output json | \ + jq "pick(.experimental.python, .resources)" + +rm -fr .databricks __pycache__ diff --git a/acceptance/bundle/python/schemas-support/test.toml b/acceptance/bundle/python/schemas-support/test.toml new file mode 100644 index 0000000000..b43fe72b07 --- /dev/null +++ b/acceptance/bundle/python/schemas-support/test.toml @@ -0,0 +1,8 @@ +Local = true +Cloud = false # tests don't interact with APIs + +[EnvMatrix] +UV_ARGS = [ + # pipelines are only supported in the latest version of the wheel + "--with-requirements requirements-latest.txt --no-cache", +] diff --git a/experimental/python/codegen/codegen/packages.py b/experimental/python/codegen/codegen/packages.py index ef4e607ed0..6762097918 100644 --- a/experimental/python/codegen/codegen/packages.py +++ b/experimental/python/codegen/codegen/packages.py @@ -5,6 +5,7 @@ RESOURCE_NAMESPACE = { "resources.Job": "jobs", "resources.Pipeline": "pipelines", + "resources.Schema": "schemas", "resources.Volume": "volumes", } diff --git a/experimental/python/databricks/bundles/core/__init__.py b/experimental/python/databricks/bundles/core/__init__.py index 0f5c3208cd..5c525861ac 100644 --- a/experimental/python/databricks/bundles/core/__init__.py +++ b/experimental/python/databricks/bundles/core/__init__.py @@ -18,6 +18,7 @@ "load_resources_from_modules", "load_resources_from_package_module", "pipeline_mutator", + "schema_mutator", "variables", "volume_mutator", ] @@ -40,6 +41,7 @@ ResourceMutator, job_mutator, pipeline_mutator, + schema_mutator, volume_mutator, ) from databricks.bundles.core._resources import Resources diff --git a/experimental/python/databricks/bundles/core/_resource_mutator.py b/experimental/python/databricks/bundles/core/_resource_mutator.py index da0c62691c..90e8987216 100644 --- a/experimental/python/databricks/bundles/core/_resource_mutator.py +++ b/experimental/python/databricks/bundles/core/_resource_mutator.py @@ -8,6 +8,7 @@ if TYPE_CHECKING: from databricks.bundles.jobs._models.job import Job from databricks.bundles.pipelines._models.pipeline import Pipeline + from databricks.bundles.schemas._models.schema import Schema from databricks.bundles.volumes._models.volume import Volume _T = TypeVar("_T", bound=Resource) @@ -130,6 +131,38 @@ def my_pipeline_mutator(bundle: Bundle, pipeline: Pipeline) -> Pipeline: return ResourceMutator(resource_type=Pipeline, function=function) +@overload +def schema_mutator( + function: Callable[[Bundle, "Schema"], "Schema"], +) -> ResourceMutator["Schema"]: ... + + +@overload +def schema_mutator( + function: Callable[["Schema"], "Schema"], +) -> ResourceMutator["Schema"]: ... + + +def schema_mutator(function: Callable) -> ResourceMutator["Schema"]: + """ + Decorator for defining a schema mutator. Function should return a new instance of the schema with the desired changes, + instead of mutating the input schema. + + Example: + + .. code-block:: python + + @schema_mutator + def my_schema_mutator(bundle: Bundle, schema: Schema) -> Schema: + return replace(schema, name="my_schema") + + :param function: Function that mutates a schema. + """ + from databricks.bundles.schemas._models.schema import Schema + + return ResourceMutator(resource_type=Schema, function=function) + + @overload def volume_mutator( function: Callable[[Bundle, "Volume"], "Volume"], diff --git a/experimental/python/databricks/bundles/core/_resource_type.py b/experimental/python/databricks/bundles/core/_resource_type.py index 1f06c62f4f..9e9bb1bdf8 100644 --- a/experimental/python/databricks/bundles/core/_resource_type.py +++ b/experimental/python/databricks/bundles/core/_resource_type.py @@ -2,7 +2,6 @@ from typing import Type from databricks.bundles.core._resource import Resource -from databricks.bundles.volumes._models.volume import Volume @dataclass(kw_only=True, frozen=True) @@ -34,6 +33,8 @@ def all(cls) -> tuple["_ResourceType", ...]: from databricks.bundles.jobs._models.job import Job from databricks.bundles.pipelines._models.pipeline import Pipeline + from databricks.bundles.schemas._models.schema import Schema + from databricks.bundles.volumes._models.volume import Volume return ( _ResourceType( @@ -51,4 +52,9 @@ def all(cls) -> tuple["_ResourceType", ...]: plural_name="volumes", singular_name="volume", ), + _ResourceType( + resource_type=Schema, + plural_name="schemas", + singular_name="schema", + ), ) diff --git a/experimental/python/databricks/bundles/core/_resources.py b/experimental/python/databricks/bundles/core/_resources.py index dd231f3d3c..698f2a5ca4 100644 --- a/experimental/python/databricks/bundles/core/_resources.py +++ b/experimental/python/databricks/bundles/core/_resources.py @@ -8,6 +8,7 @@ if TYPE_CHECKING: from databricks.bundles.jobs._models.job import Job, JobParam from databricks.bundles.pipelines._models.pipeline import Pipeline, PipelineParam + from databricks.bundles.schemas._models.schema import Schema, SchemaParam from databricks.bundles.volumes._models.volume import Volume, VolumeParam __all__ = ["Resources"] @@ -58,6 +59,7 @@ def load_resources(bundle: Bundle) -> Resources: def __init__(self): self._jobs = dict[str, "Job"]() self._pipelines = dict[str, "Pipeline"]() + self._schemas = dict[str, "Schema"]() self._volumes = dict[str, "Volume"]() self._locations = dict[tuple[str, ...], Location]() self._diagnostics = Diagnostics() @@ -70,6 +72,10 @@ def jobs(self) -> dict[str, "Job"]: def pipelines(self) -> dict[str, "Pipeline"]: return self._pipelines + @property + def schemas(self) -> dict[str, "Schema"]: + return self._schemas + @property def volumes(self) -> dict[str, "Volume"]: return self._volumes @@ -99,6 +105,7 @@ def add_resource( from databricks.bundles.jobs import Job from databricks.bundles.pipelines import Pipeline + from databricks.bundles.schemas import Schema from databricks.bundles.volumes import Volume location = location or Location.from_stack_frame(depth=1) @@ -108,6 +115,8 @@ def add_resource( self.add_job(resource_name, resource, location=location) case Pipeline(): self.add_pipeline(resource_name, resource, location=location) + case Schema(): + self.add_schema(resource_name, resource, location=location) case Volume(): self.add_volume(resource_name, resource, location=location) case _: @@ -177,6 +186,38 @@ def add_pipeline( self._pipelines[resource_name] = pipeline + def add_schema( + self, + resource_name: str, + schema: "SchemaParam", + *, + location: Optional[Location] = None, + ) -> None: + """ + Adds a schema to the collection of resources. Resource name must be unique across all schemas. + + :param resource_name: unique identifier for the schema + :param schema: the schema to add, can be Schema or dict + :param location: optional location of the schema in the source code + """ + from databricks.bundles.schemas import Schema + + schema = _transform(Schema, schema) + path = ("resources", "schemas", resource_name) + location = location or Location.from_stack_frame(depth=1) + + if self._schemas.get(resource_name): + self.add_diagnostic_error( + msg=f"Duplicate resource name '{resource_name}' for a schema. Resource names must be unique.", + location=location, + path=path, + ) + else: + if location: + self.add_location(path, location) + + self._schemas[resource_name] = schema + def add_volume( self, resource_name: str, @@ -285,6 +326,9 @@ def add_resources(self, other: "Resources") -> None: for name, pipeline in other.pipelines.items(): self.add_pipeline(name, pipeline) + for name, schema in other.schemas.items(): + self.add_schema(name, schema) + for name, volume in other.volumes.items(): self.add_volume(name, volume) diff --git a/experimental/python/databricks/bundles/schemas/__init__.py b/experimental/python/databricks/bundles/schemas/__init__.py new file mode 100644 index 0000000000..69e9d4a279 --- /dev/null +++ b/experimental/python/databricks/bundles/schemas/__init__.py @@ -0,0 +1,22 @@ +__all__ = [ + "Schema", + "SchemaDict", + "SchemaGrant", + "SchemaGrantDict", + "SchemaGrantParam", + "SchemaGrantPrivilege", + "SchemaGrantPrivilegeParam", + "SchemaParam", +] + + +from databricks.bundles.schemas._models.schema import Schema, SchemaDict, SchemaParam +from databricks.bundles.schemas._models.schema_grant import ( + SchemaGrant, + SchemaGrantDict, + SchemaGrantParam, +) +from databricks.bundles.schemas._models.schema_grant_privilege import ( + SchemaGrantPrivilege, + SchemaGrantPrivilegeParam, +) diff --git a/experimental/python/databricks/bundles/schemas/_models/schema.py b/experimental/python/databricks/bundles/schemas/_models/schema.py new file mode 100644 index 0000000000..f3de56b1e8 --- /dev/null +++ b/experimental/python/databricks/bundles/schemas/_models/schema.py @@ -0,0 +1,86 @@ +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, TypedDict + +from databricks.bundles.core._resource import Resource +from databricks.bundles.core._transform import _transform +from databricks.bundles.core._transform_to_json import _transform_to_json_value +from databricks.bundles.core._variable import ( + VariableOr, + VariableOrDict, + VariableOrList, + VariableOrOptional, +) +from databricks.bundles.schemas._models.schema_grant import ( + SchemaGrant, + SchemaGrantParam, +) + +if TYPE_CHECKING: + from typing_extensions import Self + + +@dataclass(kw_only=True) +class Schema(Resource): + """""" + + catalog_name: VariableOr[str] + """ + Name of parent catalog. + """ + + name: VariableOr[str] + """ + Name of schema, relative to parent catalog. + """ + + comment: VariableOrOptional[str] = None + """ + User-provided free-form text description. + """ + + grants: VariableOrList[SchemaGrant] = field(default_factory=list) + + properties: VariableOrDict[str] = field(default_factory=dict) + + storage_root: VariableOrOptional[str] = None + """ + Storage root URL for managed tables within schema. + """ + + @classmethod + def from_dict(cls, value: "SchemaDict") -> "Self": + return _transform(cls, value) + + def as_dict(self) -> "SchemaDict": + return _transform_to_json_value(self) # type:ignore + + +class SchemaDict(TypedDict, total=False): + """""" + + catalog_name: VariableOr[str] + """ + Name of parent catalog. + """ + + name: VariableOr[str] + """ + Name of schema, relative to parent catalog. + """ + + comment: VariableOrOptional[str] + """ + User-provided free-form text description. + """ + + grants: VariableOrList[SchemaGrantParam] + + properties: VariableOrDict[str] + + storage_root: VariableOrOptional[str] + """ + Storage root URL for managed tables within schema. + """ + + +SchemaParam = SchemaDict | Schema diff --git a/experimental/python/databricks/bundles/schemas/_models/schema_grant.py b/experimental/python/databricks/bundles/schemas/_models/schema_grant.py new file mode 100644 index 0000000000..4997db89c6 --- /dev/null +++ b/experimental/python/databricks/bundles/schemas/_models/schema_grant.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, TypedDict + +from databricks.bundles.core._transform import _transform +from databricks.bundles.core._transform_to_json import _transform_to_json_value +from databricks.bundles.core._variable import VariableOr, VariableOrList +from databricks.bundles.schemas._models.schema_grant_privilege import ( + SchemaGrantPrivilege, + SchemaGrantPrivilegeParam, +) + +if TYPE_CHECKING: + from typing_extensions import Self + + +@dataclass(kw_only=True) +class SchemaGrant: + """""" + + principal: VariableOr[str] + + privileges: VariableOrList[SchemaGrantPrivilege] = field(default_factory=list) + + @classmethod + def from_dict(cls, value: "SchemaGrantDict") -> "Self": + return _transform(cls, value) + + def as_dict(self) -> "SchemaGrantDict": + return _transform_to_json_value(self) # type:ignore + + +class SchemaGrantDict(TypedDict, total=False): + """""" + + principal: VariableOr[str] + + privileges: VariableOrList[SchemaGrantPrivilegeParam] + + +SchemaGrantParam = SchemaGrantDict | SchemaGrant diff --git a/experimental/python/databricks/bundles/schemas/_models/schema_grant_privilege.py b/experimental/python/databricks/bundles/schemas/_models/schema_grant_privilege.py new file mode 100644 index 0000000000..b0dba59b7d --- /dev/null +++ b/experimental/python/databricks/bundles/schemas/_models/schema_grant_privilege.py @@ -0,0 +1,38 @@ +from enum import Enum +from typing import Literal + + +class SchemaGrantPrivilege(Enum): + ALL_PRIVILEGES = "ALL_PRIVILEGES" + APPLY_TAG = "APPLY_TAG" + CREATE_FUNCTION = "CREATE_FUNCTION" + CREATE_TABLE = "CREATE_TABLE" + CREATE_VOLUME = "CREATE_VOLUME" + MANAGE = "MANAGE" + USE_SCHEMA = "USE_SCHEMA" + EXECUTE = "EXECUTE" + MODIFY = "MODIFY" + REFRESH = "REFRESH" + SELECT = "SELECT" + READ_VOLUME = "READ_VOLUME" + WRITE_VOLUME = "WRITE_VOLUME" + + +SchemaGrantPrivilegeParam = ( + Literal[ + "ALL_PRIVILEGES", + "APPLY_TAG", + "CREATE_FUNCTION", + "CREATE_TABLE", + "CREATE_VOLUME", + "MANAGE", + "USE_SCHEMA", + "EXECUTE", + "MODIFY", + "REFRESH", + "SELECT", + "READ_VOLUME", + "WRITE_VOLUME", + ] + | SchemaGrantPrivilege +) diff --git a/experimental/python/databricks_tests/core/test_resources.py b/experimental/python/databricks_tests/core/test_resources.py index a20fe5a4ae..ccdd7f1d86 100644 --- a/experimental/python/databricks_tests/core/test_resources.py +++ b/experimental/python/databricks_tests/core/test_resources.py @@ -10,11 +10,13 @@ ResourceMutator, job_mutator, pipeline_mutator, + schema_mutator, volume_mutator, ) from databricks.bundles.core._resource_type import _ResourceType from databricks.bundles.jobs._models.job import Job from databricks.bundles.pipelines._models.pipeline import Pipeline +from databricks.bundles.schemas._models.schema import Schema from databricks.bundles.volumes._models.volume import Volume @@ -63,6 +65,15 @@ class TestCase: ), resource_types[Volume], ), + ( + TestCase( + add_resource=Resources.add_schema, + dict_example={"catalog_name": "my_catalog", "name": "my_schema"}, + dataclass_example=Schema(catalog_name="my_catalog", name="my_schema"), + mutator=schema_mutator, + ), + resource_types[Schema], + ), ] test_case_ids = [tpe.plural_name for _, tpe in test_cases]