From 37cc41704988b0f4fe3141f0c50f42740bb89653 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Fri, 25 Apr 2025 10:25:45 +0200 Subject: [PATCH] [Python] Handle deprecation and experimental annotations in codegen --- .github/workflows/push.yml | 3 +- .../codegen/codegen/generated_dataclass.py | 35 +++++--- .../python/codegen/codegen/generated_enum.py | 6 +- .../python/codegen/codegen/jsonschema.py | 17 ++++ .../codegen/codegen/jsonschema_patch.py | 36 +-------- experimental/python/codegen/codegen/main.py | 63 +++++++++++++++ .../codegen_tests/test_generated_dataclass.py | 2 + .../codegen_tests/test_generated_enum.py | 1 + .../bundles/compute/_models/adlsgen2_info.py | 4 +- .../bundles/compute/_models/aws_attributes.py | 16 ++-- .../compute/_models/aws_availability.py | 1 - .../compute/_models/azure_attributes.py | 4 +- .../compute/_models/azure_availability.py | 3 +- .../compute/_models/cluster_log_conf.py | 12 +-- .../bundles/compute/_models/cluster_spec.py | 36 ++++----- .../compute/_models/data_security_mode.py | 13 ++- .../compute/_models/dbfs_storage_info.py | 4 +- .../compute/_models/ebs_volume_type.py | 3 +- .../bundles/compute/_models/environment.py | 14 ++++ .../bundles/compute/_models/gcp_attributes.py | 40 +++++----- .../compute/_models/gcs_storage_info.py | 4 +- .../compute/_models/init_script_info.py | 49 +++++------- .../bundles/compute/_models/library.py | 10 --- .../bundles/compute/_models/runtime_engine.py | 11 --- .../compute/_models/s3_storage_info.py | 4 +- .../compute/_models/volumes_storage_info.py | 10 ++- .../bundles/compute/_models/workload_type.py | 8 +- .../compute/_models/workspace_storage_info.py | 8 +- .../bundles/jobs/_models/compute_config.py | 2 - .../jobs/_models/gen_ai_compute_task.py | 2 - .../databricks/bundles/jobs/_models/job.py | 18 +++-- .../jobs/_models/performance_target.py | 5 +- .../bundles/jobs/_models/permission.py | 80 ------------------- .../bundles/pipelines/_models/__init__.py | 0 .../_models/pipeline_permission_level.py | 4 +- 35 files changed, 255 insertions(+), 273 deletions(-) delete mode 100644 experimental/python/databricks/bundles/jobs/_models/permission.py delete mode 100644 experimental/python/databricks/bundles/pipelines/_models/__init__.py diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index a0fc2a91a5..65adc231d7 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -186,6 +186,5 @@ jobs: if ! ( git diff --exit-code ); then echo "Generated Python code is not up-to-date. Please run 'pushd experimental/python && make codegen' and commit the changes." - # TODO block PR if this fails once diffs are fixed - # exit 1 + exit 1 fi diff --git a/experimental/python/codegen/codegen/generated_dataclass.py b/experimental/python/codegen/codegen/generated_dataclass.py index 37c81a75a2..5a1d5a9d6d 100644 --- a/experimental/python/codegen/codegen/generated_dataclass.py +++ b/experimental/python/codegen/codegen/generated_dataclass.py @@ -6,7 +6,7 @@ import codegen.packages as packages from codegen.code_builder import CodeBuilder -from codegen.jsonschema import Property, Schema +from codegen.jsonschema import Property, Schema, Stage from codegen.packages import is_resource @@ -96,6 +96,12 @@ class GeneratedField: Factory method for creating a default value, used for lists and dicts. """ + experimental: bool + """ + If true, the field is experimental and should not be indexed in docs, and + be marked as experimental in docstring. + """ + def __post_init__(self): if self.default_factory is not None and self.default is not None: raise ValueError("Can't have both default and default_factory", self) @@ -124,6 +130,7 @@ class GeneratedDataclass: fields: list[GeneratedField] extends: list[GeneratedType] + experimental: bool def generate_field( @@ -147,6 +154,7 @@ def generate_field( default=None, default_factory="dict", create_func_default="None", + experimental=prop.stage == Stage.PRIVATE, ) elif field_type.name == "VariableOrList": return GeneratedField( @@ -158,6 +166,7 @@ def generate_field( default=None, default_factory="list", create_func_default="None", + experimental=prop.stage == Stage.PRIVATE, ) elif is_required: return GeneratedField( @@ -169,6 +178,7 @@ def generate_field( default=None, default_factory=None, create_func_default=None, + experimental=prop.stage == Stage.PRIVATE, ) else: return GeneratedField( @@ -180,6 +190,7 @@ def generate_field( default="None", default_factory=None, create_func_default="None", + experimental=prop.stage == Stage.PRIVATE, ) @@ -308,6 +319,7 @@ def generate_dataclass(schema_name: str, schema: Schema) -> GeneratedDataclass: description=schema.description, fields=fields, extends=extends, + experimental=schema.stage == Stage.PRIVATE, ) @@ -347,10 +359,10 @@ def _append_dataclass(b: CodeBuilder, generated: GeneratedDataclass): b.append(":").newline() # FIXME should contain class docstring - if not generated.description: + if not generated.description and not generated.experimental: b.indent().append_triple_quote().append_triple_quote().newline().newline() else: - _append_description(b, generated.description) + _append_description(b, generated.description, generated.experimental) def _append_field(b: CodeBuilder, field: GeneratedField): @@ -428,11 +440,16 @@ def _append_typed_dict(b: CodeBuilder, generated: GeneratedDataclass): b.indent().append_triple_quote().append_triple_quote().newline().newline() -def _append_description(b: CodeBuilder, description: Optional[str]): - if description: +def _append_description(b: CodeBuilder, description: Optional[str], experimental: bool): + if description or experimental: b.indent().append_triple_quote().newline() - for line in description.split("\n"): - b.indent().append(line).newline() + if experimental: + b.indent().append(":meta private: [EXPERIMENTAL]").newline() + if description: + b.indent().newline() + if description: + for line in description.split("\n"): + b.indent().append(line).newline() b.indent().append_triple_quote().newline() @@ -449,7 +466,7 @@ def get_code(generated: GeneratedDataclass) -> str: for field in generated.fields: _append_field(b, field) - _append_description(b, field.description) + _append_description(b, field.description, field.experimental) b.newline() @@ -462,7 +479,7 @@ def get_code(generated: GeneratedDataclass) -> str: for field in generated.fields: _append_typed_dict_field(b, field) - _append_description(b, field.description) + _append_description(b, field.description, field.experimental) b.newline() diff --git a/experimental/python/codegen/codegen/generated_enum.py b/experimental/python/codegen/codegen/generated_enum.py index 0ea33aa898..6b482a4072 100644 --- a/experimental/python/codegen/codegen/generated_enum.py +++ b/experimental/python/codegen/codegen/generated_enum.py @@ -5,7 +5,7 @@ import codegen.packages as packages from codegen.code_builder import CodeBuilder from codegen.generated_dataclass import _append_description -from codegen.jsonschema import Schema +from codegen.jsonschema import Schema, Stage @dataclass(kw_only=True) @@ -14,6 +14,7 @@ class GeneratedEnum: package: str values: dict[str, str] description: Optional[str] + experimental: bool def generate_enum(schema_name: str, schema: Schema) -> GeneratedEnum: @@ -33,6 +34,7 @@ def generate_enum(schema_name: str, schema: Schema) -> GeneratedEnum: package=package, values=values, description=schema.description, + experimental=schema.stage == Stage.PRIVATE, ) @@ -46,7 +48,7 @@ def get_code(generated: GeneratedEnum) -> str: b.append(f"class {generated.class_name}(Enum):") b.newline() - _append_description(b, generated.description) + _append_description(b, generated.description, generated.experimental) # Example: # diff --git a/experimental/python/codegen/codegen/jsonschema.py b/experimental/python/codegen/codegen/jsonschema.py index e230d6d0b3..f07f8d2d1d 100644 --- a/experimental/python/codegen/codegen/jsonschema.py +++ b/experimental/python/codegen/codegen/jsonschema.py @@ -7,10 +7,16 @@ import codegen.packages as packages +class Stage: + PRIVATE = "PRIVATE" + + @dataclass class Property: ref: str description: Optional[str] = None + deprecated: Optional[bool] = None + stage: Optional[str] = None class SchemaType(Enum): @@ -25,6 +31,8 @@ class Schema: properties: dict[str, Property] = field(default_factory=dict) required: list[str] = field(default_factory=list) description: Optional[str] = None + deprecated: Optional[bool] = None + stage: Optional[str] = None def __post_init__(self): match self.type: @@ -76,6 +84,11 @@ def _parse_schema(schema: dict) -> Schema: schema = _unwrap_variable(schema) or schema properties = {} + def _parse_bool(value) -> Optional[bool]: + assert value is None or isinstance(value, bool) + + return value + for k, v in schema.get("properties", {}).items(): assert v.get("type") is None assert v.get("anyOf") is None @@ -87,6 +100,8 @@ def _parse_schema(schema: dict) -> Schema: prop = Property( ref=v["$ref"], description=v.get("description"), + deprecated=_parse_bool(v.get("deprecated")), + stage=v.get("x-databricks-preview"), ) properties[k] = prop @@ -102,6 +117,8 @@ def _parse_schema(schema: dict) -> Schema: properties=properties, required=schema.get("required", []), description=schema.get("description"), + deprecated=_parse_bool(schema.get("deprecated")), + stage=schema.get("x-databricks-preview"), ) diff --git a/experimental/python/codegen/codegen/jsonschema_patch.py b/experimental/python/codegen/codegen/jsonschema_patch.py index 1f0dfdf944..c1b4b9c0bd 100644 --- a/experimental/python/codegen/codegen/jsonschema_patch.py +++ b/experimental/python/codegen/codegen/jsonschema_patch.py @@ -3,43 +3,15 @@ from codegen.jsonschema import Schema REMOVED_FIELDS = { - "jobs.RunJobTask": { - # all params except job_parameters should be deprecated and should not be supported - "jar_params", - "notebook_params", - "python_params", - "spark_submit_params", - "python_named_params", - "sql_params", - "dbt_commands", - # except pipeline_params, that is not deprecated - }, - "jobs.TriggerSettings": { - # Old table trigger settings name. Deprecated in favor of `table_update` - "table", + # TODO remove as a follow-up + "jobs.Task": { + "dashboard_task", + "power_bi_task", }, "compute.ClusterSpec": { # doesn't work, openapi schema needs to be updated to be enum "kind", }, - "jobs.TaskEmailNotifications": { - # Deprecated - "no_alert_for_skipped_runs", - }, - "jobs.SparkJarTask": { - # Deprecated. A value of `false` is no longer supported. - "run_as_repl", - # Deprecated - "jar_uri", - }, - "resources.Pipeline": { - # Deprecated - "trigger", - }, - "pipelines.PipelineLibrary": { - # Deprecated - "whl", - }, } EXTRA_REQUIRED_FIELDS: dict[str, list[str]] = { diff --git a/experimental/python/codegen/codegen/main.py b/experimental/python/codegen/codegen/main.py index 735309ff4c..3ef2b5d2ee 100644 --- a/experimental/python/codegen/codegen/main.py +++ b/experimental/python/codegen/codegen/main.py @@ -1,4 +1,5 @@ import argparse +from dataclasses import replace from pathlib import Path from textwrap import dedent @@ -19,6 +20,12 @@ def main(output: str): schemas = openapi.get_schemas() schemas = openapi_patch.add_extra_required_fields(schemas) schemas = openapi_patch.remove_unsupported_fields(schemas) + + schemas = _transitively_mark_deprecated_and_private( + packages.RESOURCE_TYPES, schemas + ) + # first remove deprecated fields so there are more unused schemas + schemas = _remove_deprecated_fields(schemas) schemas = _remove_unused_schemas(packages.RESOURCE_TYPES, schemas) dataclasses, enums = _generate_code(schemas) @@ -37,6 +44,56 @@ def main(output: str): _write_exports(resource, resource_dataclasses, resource_enums, output) +def _transitively_mark_deprecated_and_private( + roots: list[str], + schemas: dict[str, openapi.Schema], +) -> dict[str, openapi.Schema]: + """ + If schema is only used through deprecated (private) fields, make it as deprecated (private). + + For example, if a field is marked as private, and is excluded from documentation, corresponding + dataclasses and enums should be private as well. + """ + + not_private = _collect_reachable_schemas(roots, schemas, include_private=False) + not_deprecated = _collect_reachable_schemas( + roots, schemas, include_deprecated=False + ) + new_schemas = {} + + for schema_name, schema in schemas.items(): + if schema_name not in not_private: + schema.stage = openapi.Stage.PRIVATE + + if schema_name not in not_deprecated: + schema.deprecated = True + + new_schemas[schema_name] = schema + + return new_schemas + + +def _remove_deprecated_fields( + schemas: dict[str, openapi.Schema], +) -> dict[str, openapi.Schema]: + new_schemas = {} + + for name, schema in schemas.items(): + if schema.type == openapi.SchemaType.OBJECT: + new_properties = {} + for field_name, field in schema.properties.items(): + if field.deprecated: + continue + + new_properties[field_name] = field + + new_schemas[name] = replace(schema, properties=new_properties) + else: + new_schemas[name] = schema + + return new_schemas + + def _generate_code( schemas: dict[str, openapi.Schema], ) -> tuple[dict[str, GeneratedDataclass], dict[str, GeneratedEnum]]: @@ -182,6 +239,12 @@ def _collect_reachable_schemas( if field.ref: name = field.ref.split("/")[-1] + if not include_private and field.stage == openapi.Stage.PRIVATE: + continue + + if not include_deprecated and field.deprecated: + continue + if name not in reachable: stack.append(name) diff --git a/experimental/python/codegen/codegen_tests/test_generated_dataclass.py b/experimental/python/codegen/codegen_tests/test_generated_dataclass.py index aa9a9b1a33..f93eb84341 100644 --- a/experimental/python/codegen/codegen_tests/test_generated_dataclass.py +++ b/experimental/python/codegen/codegen_tests/test_generated_dataclass.py @@ -65,8 +65,10 @@ def test_generate_dataclass(): field_name="task_key", param_type_name=variable_or_type(str_type(), is_required=True), type_name=variable_or_type(str_type(), is_required=True), + experimental=False, ), ], + experimental=False, ) diff --git a/experimental/python/codegen/codegen_tests/test_generated_enum.py b/experimental/python/codegen/codegen_tests/test_generated_enum.py index 8340e456e0..5035e05834 100644 --- a/experimental/python/codegen/codegen_tests/test_generated_enum.py +++ b/experimental/python/codegen/codegen_tests/test_generated_enum.py @@ -17,4 +17,5 @@ def test_generate_enum(): package="databricks.bundles.jobs._models.my_enum", values={"MY_ENUM_VALUE": "myEnumValue"}, description="enum description", + experimental=False, ) diff --git a/experimental/python/databricks/bundles/compute/_models/adlsgen2_info.py b/experimental/python/databricks/bundles/compute/_models/adlsgen2_info.py index 2a3203f7da..206c8b676e 100644 --- a/experimental/python/databricks/bundles/compute/_models/adlsgen2_info.py +++ b/experimental/python/databricks/bundles/compute/_models/adlsgen2_info.py @@ -11,7 +11,9 @@ @dataclass(kw_only=True) class Adlsgen2Info: - """""" + """ + A storage location in Adls Gen2 + """ destination: VariableOr[str] """ diff --git a/experimental/python/databricks/bundles/compute/_models/aws_attributes.py b/experimental/python/databricks/bundles/compute/_models/aws_attributes.py index 8494f47933..d61c2b2090 100644 --- a/experimental/python/databricks/bundles/compute/_models/aws_attributes.py +++ b/experimental/python/databricks/bundles/compute/_models/aws_attributes.py @@ -19,7 +19,9 @@ @dataclass(kw_only=True) class AwsAttributes: - """""" + """ + Attributes set during cluster creation which are related to Amazon Web Services. + """ availability: VariableOrOptional[AwsAvailability] = None @@ -81,8 +83,6 @@ class AwsAttributes: administrator. This feature may only be available to certain customer plans. - - If this field is ommitted, we will pull in the default from the conf if it exists. """ spot_bid_price_percent: VariableOrOptional[int] = None @@ -96,9 +96,6 @@ class AwsAttributes: When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. - - The default value and documentation here should be kept consistent with - CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. """ zone_id: VariableOrOptional[str] = None @@ -110,6 +107,7 @@ class AwsAttributes: This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. + The list of available zones as well as the default value can be found by using the `List Zones` method. """ @@ -185,8 +183,6 @@ class AwsAttributesDict(TypedDict, total=False): administrator. This feature may only be available to certain customer plans. - - If this field is ommitted, we will pull in the default from the conf if it exists. """ spot_bid_price_percent: VariableOrOptional[int] @@ -200,9 +196,6 @@ class AwsAttributesDict(TypedDict, total=False): When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. - - The default value and documentation here should be kept consistent with - CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. """ zone_id: VariableOrOptional[str] @@ -214,6 +207,7 @@ class AwsAttributesDict(TypedDict, total=False): This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. + The list of available zones as well as the default value can be found by using the `List Zones` method. """ diff --git a/experimental/python/databricks/bundles/compute/_models/aws_availability.py b/experimental/python/databricks/bundles/compute/_models/aws_availability.py index 62353ce15b..5d87ffafba 100644 --- a/experimental/python/databricks/bundles/compute/_models/aws_availability.py +++ b/experimental/python/databricks/bundles/compute/_models/aws_availability.py @@ -7,7 +7,6 @@ class AwsAvailability(Enum): Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - """ SPOT = "SPOT" diff --git a/experimental/python/databricks/bundles/compute/_models/azure_attributes.py b/experimental/python/databricks/bundles/compute/_models/azure_attributes.py index 5bd72b6728..3b7a3b2aff 100644 --- a/experimental/python/databricks/bundles/compute/_models/azure_attributes.py +++ b/experimental/python/databricks/bundles/compute/_models/azure_attributes.py @@ -19,7 +19,9 @@ @dataclass(kw_only=True) class AzureAttributes: - """""" + """ + Attributes set during cluster creation which are related to Microsoft Azure. + """ availability: VariableOrOptional[AzureAvailability] = None diff --git a/experimental/python/databricks/bundles/compute/_models/azure_availability.py b/experimental/python/databricks/bundles/compute/_models/azure_availability.py index c6bb0ecb33..72d461d5d7 100644 --- a/experimental/python/databricks/bundles/compute/_models/azure_availability.py +++ b/experimental/python/databricks/bundles/compute/_models/azure_availability.py @@ -5,8 +5,7 @@ class AzureAvailability(Enum): """ Availability type used for all subsequent nodes past the `first_on_demand` ones. - Note: If `first_on_demand` is zero (which only happens on pool clusters), this availability - type will be used for the entire cluster. + Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. """ SPOT_AZURE = "SPOT_AZURE" diff --git a/experimental/python/databricks/bundles/compute/_models/cluster_log_conf.py b/experimental/python/databricks/bundles/compute/_models/cluster_log_conf.py index 6abf15e7c5..6795d49db1 100644 --- a/experimental/python/databricks/bundles/compute/_models/cluster_log_conf.py +++ b/experimental/python/databricks/bundles/compute/_models/cluster_log_conf.py @@ -23,7 +23,9 @@ @dataclass(kw_only=True) class ClusterLogConf: - """""" + """ + Cluster log delivery config + """ dbfs: VariableOrOptional[DbfsStorageInfo] = None """ @@ -41,8 +43,8 @@ class ClusterLogConf: volumes: VariableOrOptional[VolumesStorageInfo] = None """ - destination needs to be provided. e.g. - `{ "volumes" : { "destination" : "/Volumes/catalog/schema/volume/cluster_log" } }` + destination needs to be provided, e.g. + `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` """ @classmethod @@ -72,8 +74,8 @@ class ClusterLogConfDict(TypedDict, total=False): volumes: VariableOrOptional[VolumesStorageInfoParam] """ - destination needs to be provided. e.g. - `{ "volumes" : { "destination" : "/Volumes/catalog/schema/volume/cluster_log" } }` + destination needs to be provided, e.g. + `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` """ diff --git a/experimental/python/databricks/bundles/compute/_models/cluster_spec.py b/experimental/python/databricks/bundles/compute/_models/cluster_spec.py index 9b37322933..5394c194ef 100644 --- a/experimental/python/databricks/bundles/compute/_models/cluster_spec.py +++ b/experimental/python/databricks/bundles/compute/_models/cluster_spec.py @@ -55,7 +55,9 @@ @dataclass(kw_only=True) class ClusterSpec: - """""" + """ + Contains a snapshot of the latest user specified settings that were used to create/edit the cluster. + """ apply_policy_default_values: VariableOrOptional[bool] = None """ @@ -101,7 +103,6 @@ class ClusterSpec: """ Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - """ custom_tags: VariableOrDict[str] = field(default_factory=dict) @@ -127,10 +128,12 @@ class ClusterSpec: driver_node_type_id: VariableOrOptional[str] = None """ - The node type of the Spark driver. Note that this field is optional; - if unset, the driver node type will be set as the same value + The node type of the Spark driver. + Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + This field, along with node_type_id, should not be set if virtual_cluster_size is set. + If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. """ enable_elastic_disk: VariableOrOptional[bool] = None @@ -153,7 +156,9 @@ class ClusterSpec: init_scripts: VariableOrList[InitScriptInfo] = field(default_factory=list) """ - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + The configuration for storing init scripts. Any number of destinations can be specified. + The scripts are executed sequentially in the order provided. + If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. """ instance_pool_id: VariableOrOptional[str] = None @@ -166,7 +171,6 @@ class ClusterSpec: This field can only be used when `kind = CLASSIC_PREVIEW`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - """ node_type_id: VariableOrOptional[str] = None @@ -175,7 +179,6 @@ class ClusterSpec: the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - """ num_workers: VariableOrOptional[int] = None @@ -207,7 +210,6 @@ class ClusterSpec: An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - """ spark_env_vars: VariableOrDict[str] = field(default_factory=dict) @@ -230,7 +232,6 @@ class ClusterSpec: The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. - """ ssh_public_keys: VariableOrList[str] = field(default_factory=list) @@ -245,7 +246,6 @@ class ClusterSpec: This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. - """ workload_type: VariableOrOptional[WorkloadType] = None @@ -305,7 +305,6 @@ class ClusterSpecDict(TypedDict, total=False): """ Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - """ custom_tags: VariableOrDict[str] @@ -331,10 +330,12 @@ class ClusterSpecDict(TypedDict, total=False): driver_node_type_id: VariableOrOptional[str] """ - The node type of the Spark driver. Note that this field is optional; - if unset, the driver node type will be set as the same value + The node type of the Spark driver. + Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + This field, along with node_type_id, should not be set if virtual_cluster_size is set. + If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. """ enable_elastic_disk: VariableOrOptional[bool] @@ -357,7 +358,9 @@ class ClusterSpecDict(TypedDict, total=False): init_scripts: VariableOrList[InitScriptInfoParam] """ - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + The configuration for storing init scripts. Any number of destinations can be specified. + The scripts are executed sequentially in the order provided. + If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. """ instance_pool_id: VariableOrOptional[str] @@ -370,7 +373,6 @@ class ClusterSpecDict(TypedDict, total=False): This field can only be used when `kind = CLASSIC_PREVIEW`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - """ node_type_id: VariableOrOptional[str] @@ -379,7 +381,6 @@ class ClusterSpecDict(TypedDict, total=False): the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - """ num_workers: VariableOrOptional[int] @@ -411,7 +412,6 @@ class ClusterSpecDict(TypedDict, total=False): An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - """ spark_env_vars: VariableOrDict[str] @@ -434,7 +434,6 @@ class ClusterSpecDict(TypedDict, total=False): The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. - """ ssh_public_keys: VariableOrList[str] @@ -449,7 +448,6 @@ class ClusterSpecDict(TypedDict, total=False): This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. - """ workload_type: VariableOrOptional[WorkloadTypeParam] diff --git a/experimental/python/databricks/bundles/compute/_models/data_security_mode.py b/experimental/python/databricks/bundles/compute/_models/data_security_mode.py index 6f642a13ba..e195d0cfc8 100644 --- a/experimental/python/databricks/bundles/compute/_models/data_security_mode.py +++ b/experimental/python/databricks/bundles/compute/_models/data_security_mode.py @@ -24,12 +24,8 @@ class DataSecurityMode(Enum): * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. - """ - DATA_SECURITY_MODE_AUTO = "DATA_SECURITY_MODE_AUTO" - DATA_SECURITY_MODE_STANDARD = "DATA_SECURITY_MODE_STANDARD" - DATA_SECURITY_MODE_DEDICATED = "DATA_SECURITY_MODE_DEDICATED" NONE = "NONE" SINGLE_USER = "SINGLE_USER" USER_ISOLATION = "USER_ISOLATION" @@ -37,13 +33,13 @@ class DataSecurityMode(Enum): LEGACY_PASSTHROUGH = "LEGACY_PASSTHROUGH" LEGACY_SINGLE_USER = "LEGACY_SINGLE_USER" LEGACY_SINGLE_USER_STANDARD = "LEGACY_SINGLE_USER_STANDARD" + DATA_SECURITY_MODE_STANDARD = "DATA_SECURITY_MODE_STANDARD" + DATA_SECURITY_MODE_DEDICATED = "DATA_SECURITY_MODE_DEDICATED" + DATA_SECURITY_MODE_AUTO = "DATA_SECURITY_MODE_AUTO" DataSecurityModeParam = ( Literal[ - "DATA_SECURITY_MODE_AUTO", - "DATA_SECURITY_MODE_STANDARD", - "DATA_SECURITY_MODE_DEDICATED", "NONE", "SINGLE_USER", "USER_ISOLATION", @@ -51,6 +47,9 @@ class DataSecurityMode(Enum): "LEGACY_PASSTHROUGH", "LEGACY_SINGLE_USER", "LEGACY_SINGLE_USER_STANDARD", + "DATA_SECURITY_MODE_STANDARD", + "DATA_SECURITY_MODE_DEDICATED", + "DATA_SECURITY_MODE_AUTO", ] | DataSecurityMode ) diff --git a/experimental/python/databricks/bundles/compute/_models/dbfs_storage_info.py b/experimental/python/databricks/bundles/compute/_models/dbfs_storage_info.py index e01a90a835..81fe319a65 100644 --- a/experimental/python/databricks/bundles/compute/_models/dbfs_storage_info.py +++ b/experimental/python/databricks/bundles/compute/_models/dbfs_storage_info.py @@ -11,7 +11,9 @@ @dataclass(kw_only=True) class DbfsStorageInfo: - """""" + """ + A storage location in DBFS + """ destination: VariableOr[str] """ diff --git a/experimental/python/databricks/bundles/compute/_models/ebs_volume_type.py b/experimental/python/databricks/bundles/compute/_models/ebs_volume_type.py index 5d53b1f9b5..b67853f8cb 100644 --- a/experimental/python/databricks/bundles/compute/_models/ebs_volume_type.py +++ b/experimental/python/databricks/bundles/compute/_models/ebs_volume_type.py @@ -4,7 +4,8 @@ class EbsVolumeType(Enum): """ - The type of EBS volumes that will be launched with this cluster. + All EBS volume types that Databricks supports. + See https://aws.amazon.com/ebs/details/ for details. """ GENERAL_PURPOSE_SSD = "GENERAL_PURPOSE_SSD" diff --git a/experimental/python/databricks/bundles/compute/_models/environment.py b/experimental/python/databricks/bundles/compute/_models/environment.py index 7cca7ae87c..32a731c31f 100644 --- a/experimental/python/databricks/bundles/compute/_models/environment.py +++ b/experimental/python/databricks/bundles/compute/_models/environment.py @@ -29,6 +29,13 @@ class Environment: List of pip dependencies, as supported by the version of pip in this environment. """ + jar_dependencies: VariableOrList[str] = field(default_factory=list) + """ + :meta private: [EXPERIMENTAL] + + List of jar dependencies, should be string representing volume paths. For example: `/Volumes/path/to/test.jar`. + """ + @classmethod def from_dict(cls, value: "EnvironmentDict") -> "Self": return _transform(cls, value) @@ -53,5 +60,12 @@ class EnvironmentDict(TypedDict, total=False): List of pip dependencies, as supported by the version of pip in this environment. """ + jar_dependencies: VariableOrList[str] + """ + :meta private: [EXPERIMENTAL] + + List of jar dependencies, should be string representing volume paths. For example: `/Volumes/path/to/test.jar`. + """ + EnvironmentParam = EnvironmentDict | Environment diff --git a/experimental/python/databricks/bundles/compute/_models/gcp_attributes.py b/experimental/python/databricks/bundles/compute/_models/gcp_attributes.py index 9ae209238d..f4d800fe1a 100644 --- a/experimental/python/databricks/bundles/compute/_models/gcp_attributes.py +++ b/experimental/python/databricks/bundles/compute/_models/gcp_attributes.py @@ -15,13 +15,15 @@ @dataclass(kw_only=True) class GcpAttributes: - """""" + """ + Attributes set during cluster creation which are related to GCP. + """ availability: VariableOrOptional[GcpAvailability] = None boot_disk_size: VariableOrOptional[int] = None """ - boot disk size in GB + Boot disk size in GB """ google_service_account: VariableOrOptional[str] = None @@ -34,22 +36,20 @@ class GcpAttributes: local_ssd_count: VariableOrOptional[int] = None """ - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. - """ - - use_preemptible_executors: VariableOrOptional[bool] = None - """ - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). - Note: Soon to be deprecated, use the availability field instead. + If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. + Each local SSD is 375GB in size. + Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) + for the supported number of local SSDs for each instance type. """ zone_id: VariableOrOptional[str] = None """ Identifier for the availability zone in which the cluster resides. This can be one of the following: - - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] + - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + - A GCP availability zone => Pick One of the available zones for (machine type + region) from + https://cloud.google.com/compute/docs/regions-zones. """ @classmethod @@ -67,7 +67,7 @@ class GcpAttributesDict(TypedDict, total=False): boot_disk_size: VariableOrOptional[int] """ - boot disk size in GB + Boot disk size in GB """ google_service_account: VariableOrOptional[str] @@ -80,22 +80,20 @@ class GcpAttributesDict(TypedDict, total=False): local_ssd_count: VariableOrOptional[int] """ - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. - """ - - use_preemptible_executors: VariableOrOptional[bool] - """ - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). - Note: Soon to be deprecated, use the availability field instead. + If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. + Each local SSD is 375GB in size. + Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) + for the supported number of local SSDs for each instance type. """ zone_id: VariableOrOptional[str] """ Identifier for the availability zone in which the cluster resides. This can be one of the following: - - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] + - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + - A GCP availability zone => Pick One of the available zones for (machine type + region) from + https://cloud.google.com/compute/docs/regions-zones. """ diff --git a/experimental/python/databricks/bundles/compute/_models/gcs_storage_info.py b/experimental/python/databricks/bundles/compute/_models/gcs_storage_info.py index 42e42714c0..a5e6d51e6e 100644 --- a/experimental/python/databricks/bundles/compute/_models/gcs_storage_info.py +++ b/experimental/python/databricks/bundles/compute/_models/gcs_storage_info.py @@ -11,7 +11,9 @@ @dataclass(kw_only=True) class GcsStorageInfo: - """""" + """ + A storage location in Google Cloud Platform's GCS + """ destination: VariableOr[str] """ diff --git a/experimental/python/databricks/bundles/compute/_models/init_script_info.py b/experimental/python/databricks/bundles/compute/_models/init_script_info.py index 47efb2e271..f8bd4de0f7 100644 --- a/experimental/python/databricks/bundles/compute/_models/init_script_info.py +++ b/experimental/python/databricks/bundles/compute/_models/init_script_info.py @@ -5,10 +5,6 @@ Adlsgen2Info, Adlsgen2InfoParam, ) -from databricks.bundles.compute._models.dbfs_storage_info import ( - DbfsStorageInfo, - DbfsStorageInfoParam, -) from databricks.bundles.compute._models.gcs_storage_info import ( GcsStorageInfo, GcsStorageInfoParam, @@ -39,35 +35,32 @@ @dataclass(kw_only=True) class InitScriptInfo: - """""" - - abfss: VariableOrOptional[Adlsgen2Info] = None """ - Contains the Azure Data Lake Storage destination path + Config for an individual init script + Next ID: 11 """ - dbfs: VariableOrOptional[DbfsStorageInfo] = None + abfss: VariableOrOptional[Adlsgen2Info] = None """ - destination needs to be provided. e.g. - `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + Contains the Azure Data Lake Storage destination path """ file: VariableOrOptional[LocalFileInfo] = None """ - destination needs to be provided. e.g. - `{ "file" : { "destination" : "file:/my/local/file.sh" } }` + destination needs to be provided, e.g. + `{ "file": { "destination": "file:/my/local/file.sh" } }` """ gcs: VariableOrOptional[GcsStorageInfo] = None """ - destination needs to be provided. e.g. + destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` """ s3: VariableOrOptional[S3StorageInfo] = None """ destination and either the region or endpoint need to be provided. e.g. - `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. """ @@ -75,13 +68,13 @@ class InitScriptInfo: volumes: VariableOrOptional[VolumesStorageInfo] = None """ destination needs to be provided. e.g. - `{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }` + `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` """ workspace: VariableOrOptional[WorkspaceStorageInfo] = None """ - destination needs to be provided. e.g. - `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + destination needs to be provided, e.g. + `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` """ @classmethod @@ -100,28 +93,22 @@ class InitScriptInfoDict(TypedDict, total=False): Contains the Azure Data Lake Storage destination path """ - dbfs: VariableOrOptional[DbfsStorageInfoParam] - """ - destination needs to be provided. e.g. - `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - """ - file: VariableOrOptional[LocalFileInfoParam] """ - destination needs to be provided. e.g. - `{ "file" : { "destination" : "file:/my/local/file.sh" } }` + destination needs to be provided, e.g. + `{ "file": { "destination": "file:/my/local/file.sh" } }` """ gcs: VariableOrOptional[GcsStorageInfoParam] """ - destination needs to be provided. e.g. + destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` """ s3: VariableOrOptional[S3StorageInfoParam] """ destination and either the region or endpoint need to be provided. e.g. - `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. """ @@ -129,13 +116,13 @@ class InitScriptInfoDict(TypedDict, total=False): volumes: VariableOrOptional[VolumesStorageInfoParam] """ destination needs to be provided. e.g. - `{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }` + `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` """ workspace: VariableOrOptional[WorkspaceStorageInfoParam] """ - destination needs to be provided. e.g. - `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + destination needs to be provided, e.g. + `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` """ diff --git a/experimental/python/databricks/bundles/compute/_models/library.py b/experimental/python/databricks/bundles/compute/_models/library.py index 4935fffc07..77a16240c6 100644 --- a/experimental/python/databricks/bundles/compute/_models/library.py +++ b/experimental/python/databricks/bundles/compute/_models/library.py @@ -30,11 +30,6 @@ class Library: Specification of a CRAN library to be installed as part of the library """ - egg: VariableOrOptional[str] = None - """ - Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above. - """ - jar: VariableOrOptional[str] = None """ URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. @@ -87,11 +82,6 @@ class LibraryDict(TypedDict, total=False): Specification of a CRAN library to be installed as part of the library """ - egg: VariableOrOptional[str] - """ - Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above. - """ - jar: VariableOrOptional[str] """ URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. diff --git a/experimental/python/databricks/bundles/compute/_models/runtime_engine.py b/experimental/python/databricks/bundles/compute/_models/runtime_engine.py index 0e858bd98e..1829c507e7 100644 --- a/experimental/python/databricks/bundles/compute/_models/runtime_engine.py +++ b/experimental/python/databricks/bundles/compute/_models/runtime_engine.py @@ -3,17 +3,6 @@ class RuntimeEngine(Enum): - """ - Determines the cluster's runtime engine, either standard or Photon. - - This field is not compatible with legacy `spark_version` values that contain `-photon-`. - Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. - - If left unspecified, the runtime engine defaults to standard unless the spark_version - contains -photon-, in which case Photon will be used. - - """ - NULL = "NULL" STANDARD = "STANDARD" PHOTON = "PHOTON" diff --git a/experimental/python/databricks/bundles/compute/_models/s3_storage_info.py b/experimental/python/databricks/bundles/compute/_models/s3_storage_info.py index 25a5566f22..b5e09063e5 100644 --- a/experimental/python/databricks/bundles/compute/_models/s3_storage_info.py +++ b/experimental/python/databricks/bundles/compute/_models/s3_storage_info.py @@ -11,7 +11,9 @@ @dataclass(kw_only=True) class S3StorageInfo: - """""" + """ + A storage location in Amazon S3 + """ destination: VariableOr[str] """ diff --git a/experimental/python/databricks/bundles/compute/_models/volumes_storage_info.py b/experimental/python/databricks/bundles/compute/_models/volumes_storage_info.py index 46cec7ab6c..cbe74758fc 100644 --- a/experimental/python/databricks/bundles/compute/_models/volumes_storage_info.py +++ b/experimental/python/databricks/bundles/compute/_models/volumes_storage_info.py @@ -11,11 +11,14 @@ @dataclass(kw_only=True) class VolumesStorageInfo: - """""" + """ + A storage location back by UC Volumes. + """ destination: VariableOr[str] """ - Unity Catalog volumes file destination, e.g. `/Volumes/catalog/schema/volume/dir/file` + UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` + or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` """ @classmethod @@ -31,7 +34,8 @@ class VolumesStorageInfoDict(TypedDict, total=False): destination: VariableOr[str] """ - Unity Catalog volumes file destination, e.g. `/Volumes/catalog/schema/volume/dir/file` + UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` + or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` """ diff --git a/experimental/python/databricks/bundles/compute/_models/workload_type.py b/experimental/python/databricks/bundles/compute/_models/workload_type.py index 5ec44ab41f..9582fbfc75 100644 --- a/experimental/python/databricks/bundles/compute/_models/workload_type.py +++ b/experimental/python/databricks/bundles/compute/_models/workload_type.py @@ -15,11 +15,13 @@ @dataclass(kw_only=True) class WorkloadType: - """""" + """ + Cluster Attributes showing for clusters workload types. + """ clients: VariableOr[ClientsTypes] """ - defined what type of clients can use the cluster. E.g. Notebooks, Jobs + defined what type of clients can use the cluster. E.g. Notebooks, Jobs """ @classmethod @@ -35,7 +37,7 @@ class WorkloadTypeDict(TypedDict, total=False): clients: VariableOr[ClientsTypesParam] """ - defined what type of clients can use the cluster. E.g. Notebooks, Jobs + defined what type of clients can use the cluster. E.g. Notebooks, Jobs """ diff --git a/experimental/python/databricks/bundles/compute/_models/workspace_storage_info.py b/experimental/python/databricks/bundles/compute/_models/workspace_storage_info.py index bb004162c7..29c075ac2f 100644 --- a/experimental/python/databricks/bundles/compute/_models/workspace_storage_info.py +++ b/experimental/python/databricks/bundles/compute/_models/workspace_storage_info.py @@ -11,11 +11,13 @@ @dataclass(kw_only=True) class WorkspaceStorageInfo: - """""" + """ + A storage location in Workspace Filesystem (WSFS) + """ destination: VariableOr[str] """ - workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` """ @classmethod @@ -31,7 +33,7 @@ class WorkspaceStorageInfoDict(TypedDict, total=False): destination: VariableOr[str] """ - workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` """ diff --git a/experimental/python/databricks/bundles/jobs/_models/compute_config.py b/experimental/python/databricks/bundles/jobs/_models/compute_config.py index 72c0e24160..a3d47305dd 100644 --- a/experimental/python/databricks/bundles/jobs/_models/compute_config.py +++ b/experimental/python/databricks/bundles/jobs/_models/compute_config.py @@ -13,8 +13,6 @@ class ComputeConfig: """ :meta private: [EXPERIMENTAL] - - Next field: 4 """ gpu_node_pool_id: VariableOr[str] diff --git a/experimental/python/databricks/bundles/jobs/_models/gen_ai_compute_task.py b/experimental/python/databricks/bundles/jobs/_models/gen_ai_compute_task.py index 81b8fdb78e..4d2c4b972c 100644 --- a/experimental/python/databricks/bundles/jobs/_models/gen_ai_compute_task.py +++ b/experimental/python/databricks/bundles/jobs/_models/gen_ai_compute_task.py @@ -18,8 +18,6 @@ class GenAiComputeTask: """ :meta private: [EXPERIMENTAL] - - Next field: 9 """ dl_runtime_image: VariableOr[str] diff --git a/experimental/python/databricks/bundles/jobs/_models/job.py b/experimental/python/databricks/bundles/jobs/_models/job.py index df8e7fe393..6d66279e04 100644 --- a/experimental/python/databricks/bundles/jobs/_models/job.py +++ b/experimental/python/databricks/bundles/jobs/_models/job.py @@ -114,7 +114,6 @@ class Job(Resource): job_clusters: VariableOrList[JobCluster] = field(default_factory=list) """ A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. - If more than 100 job clusters are available, you can paginate through them using :method:jobs/get. """ max_concurrent_runs: VariableOrOptional[int] = None @@ -146,7 +145,10 @@ class Job(Resource): """ :meta private: [EXPERIMENTAL] - PerformanceTarget defines how performant or cost efficient the execution of run on serverless should be. + The performance mode on a serverless job. The performance target determines the level of compute performance or cost-efficiency for the run. + + * `STANDARD`: Enables cost-efficient execution of serverless workloads. + * `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance. """ permissions: VariableOrList[JobPermission] = field(default_factory=list) @@ -171,7 +173,8 @@ class Job(Resource): tasks: VariableOrList[Task] = field(default_factory=list) """ A list of task specifications to be executed by this job. - If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. + It supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit). + Read endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. """ timeout_seconds: VariableOrOptional[int] = None @@ -244,7 +247,6 @@ class JobDict(TypedDict, total=False): job_clusters: VariableOrList[JobClusterParam] """ A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. - If more than 100 job clusters are available, you can paginate through them using :method:jobs/get. """ max_concurrent_runs: VariableOrOptional[int] @@ -276,7 +278,10 @@ class JobDict(TypedDict, total=False): """ :meta private: [EXPERIMENTAL] - PerformanceTarget defines how performant or cost efficient the execution of run on serverless should be. + The performance mode on a serverless job. The performance target determines the level of compute performance or cost-efficiency for the run. + + * `STANDARD`: Enables cost-efficient execution of serverless workloads. + * `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance. """ permissions: VariableOrList[JobPermissionParam] @@ -301,7 +306,8 @@ class JobDict(TypedDict, total=False): tasks: VariableOrList[TaskParam] """ A list of task specifications to be executed by this job. - If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. + It supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit). + Read endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. """ timeout_seconds: VariableOrOptional[int] diff --git a/experimental/python/databricks/bundles/jobs/_models/performance_target.py b/experimental/python/databricks/bundles/jobs/_models/performance_target.py index 8fde1dd4c2..eb805ca5ef 100644 --- a/experimental/python/databricks/bundles/jobs/_models/performance_target.py +++ b/experimental/python/databricks/bundles/jobs/_models/performance_target.py @@ -12,10 +12,9 @@ class PerformanceTarget(Enum): """ PERFORMANCE_OPTIMIZED = "PERFORMANCE_OPTIMIZED" - COST_OPTIMIZED = "COST_OPTIMIZED" - BALANCED = "BALANCED" + STANDARD = "STANDARD" PerformanceTargetParam = ( - Literal["PERFORMANCE_OPTIMIZED", "COST_OPTIMIZED", "BALANCED"] | PerformanceTarget + Literal["PERFORMANCE_OPTIMIZED", "STANDARD"] | PerformanceTarget ) diff --git a/experimental/python/databricks/bundles/jobs/_models/permission.py b/experimental/python/databricks/bundles/jobs/_models/permission.py deleted file mode 100644 index c0204944a7..0000000000 --- a/experimental/python/databricks/bundles/jobs/_models/permission.py +++ /dev/null @@ -1,80 +0,0 @@ -from dataclasses import dataclass -from typing import TYPE_CHECKING, TypedDict - -from databricks.bundles.core._transform import _transform -from databricks.bundles.core._transform_to_json import _transform_to_json_value -from databricks.bundles.core._variable import VariableOr, VariableOrOptional - -if TYPE_CHECKING: - from typing_extensions import Self - - -@dataclass(kw_only=True) -class Permission: - """""" - - level: VariableOr[str] - """ - The allowed permission for user, group, service principal defined for this permission. - """ - - group_name: VariableOrOptional[str] = None - """ - The name of the group that has the permission set in level. - """ - - service_principal_name: VariableOrOptional[str] = None - """ - The name of the service principal that has the permission set in level. - """ - - user_name: VariableOrOptional[str] = None - """ - The name of the user that has the permission set in level. - """ - - def __post_init__(self): - union_fields = [ - self.user_name, - self.service_principal_name, - self.group_name, - ] - - if sum(f is not None for f in union_fields) != 1: - raise ValueError( - "Permission must specify exactly one of 'user_name', 'service_principal_name', 'group_name'" - ) - - @classmethod - def from_dict(cls, value: "PermissionDict") -> "Self": - return _transform(cls, value) - - def as_dict(self) -> "PermissionDict": - return _transform_to_json_value(self) # type:ignore - - -class PermissionDict(TypedDict, total=False): - """""" - - level: VariableOr[str] - """ - The allowed permission for user, group, service principal defined for this permission. - """ - - group_name: VariableOrOptional[str] - """ - The name of the group that has the permission set in level. - """ - - service_principal_name: VariableOrOptional[str] - """ - The name of the service principal that has the permission set in level. - """ - - user_name: VariableOrOptional[str] - """ - The name of the user that has the permission set in level. - """ - - -PermissionParam = PermissionDict | Permission diff --git a/experimental/python/databricks/bundles/pipelines/_models/__init__.py b/experimental/python/databricks/bundles/pipelines/_models/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/experimental/python/databricks/bundles/pipelines/_models/pipeline_permission_level.py b/experimental/python/databricks/bundles/pipelines/_models/pipeline_permission_level.py index 2c7845fef3..3e6edf3080 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/pipeline_permission_level.py +++ b/experimental/python/databricks/bundles/pipelines/_models/pipeline_permission_level.py @@ -4,11 +4,11 @@ class PipelinePermissionLevel(Enum): CAN_MANAGE = "CAN_MANAGE" + IS_OWNER = "IS_OWNER" CAN_RUN = "CAN_RUN" CAN_VIEW = "CAN_VIEW" - IS_OWNER = "IS_OWNER" PipelinePermissionLevelParam = ( - Literal["CAN_MANAGE", "CAN_RUN", "CAN_VIEW", "IS_OWNER"] | PipelinePermissionLevel + Literal["CAN_MANAGE", "IS_OWNER", "CAN_RUN", "CAN_VIEW"] | PipelinePermissionLevel )