databricks · kanterov · Mar 17, 2025 · Mar 6, 2025
@@ -0,0 +1,38 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, TypedDict
+
+from databricks.bundles.core._transform import _transform
+from databricks.bundles.core._transform_to_json import _transform_to_json_value
+from databricks.bundles.core._variable import VariableOr
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+
+@dataclass(kw_only=True)
+class Adlsgen2Info:
+    """"""
+
+    destination: VariableOr[str]
+    """
+    abfss destination, e.g. `abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>`.
+    """
+
+    @classmethod
+    def from_dict(cls, value: "Adlsgen2InfoDict") -> "Self":
+        return _transform(cls, value)
+
+    def as_dict(self) -> "Adlsgen2InfoDict":
+        return _transform_to_json_value(self)  # type:ignore
+
+
+class Adlsgen2InfoDict(TypedDict, total=False):
+    """"""
+
+    destination: VariableOr[str]
+    """
+    abfss destination, e.g. `abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>`.
+    """
+
+
+Adlsgen2InfoParam = Adlsgen2InfoDict | Adlsgen2Info
@@ -0,0 +1,52 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, TypedDict
+
+from databricks.bundles.core._transform import _transform
+from databricks.bundles.core._transform_to_json import _transform_to_json_value
+from databricks.bundles.core._variable import VariableOrOptional
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+
+@dataclass(kw_only=True)
+class AutoScale:
+    """"""
+
+    max_workers: VariableOrOptional[int] = None
+    """
+    The maximum number of workers to which the cluster can scale up when overloaded.
+    Note that `max_workers` must be strictly greater than `min_workers`.
+    """
+
+    min_workers: VariableOrOptional[int] = None
+    """
+    The minimum number of workers to which the cluster can scale down when underutilized.
+    It is also the initial number of workers the cluster will have after creation.
+    """
+
+    @classmethod
+    def from_dict(cls, value: "AutoScaleDict") -> "Self":
+        return _transform(cls, value)
+
+    def as_dict(self) -> "AutoScaleDict":
+        return _transform_to_json_value(self)  # type:ignore
+
+
+class AutoScaleDict(TypedDict, total=False):
+    """"""
+
+    max_workers: VariableOrOptional[int]
+    """
+    The maximum number of workers to which the cluster can scale up when overloaded.
+    Note that `max_workers` must be strictly greater than `min_workers`.
+    """
+
+    min_workers: VariableOrOptional[int]
+    """
+    The minimum number of workers to which the cluster can scale down when underutilized.
+    It is also the initial number of workers the cluster will have after creation.
+    """
+
+
+AutoScaleParam = AutoScaleDict | AutoScale
@@ -0,0 +1,222 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, TypedDict
+
+from databricks.bundles.compute._models.aws_availability import (
+    AwsAvailability,
+    AwsAvailabilityParam,
+)
+from databricks.bundles.compute._models.ebs_volume_type import (
+    EbsVolumeType,
+    EbsVolumeTypeParam,
+)
+from databricks.bundles.core._transform import _transform
+from databricks.bundles.core._transform_to_json import _transform_to_json_value
+from databricks.bundles.core._variable import VariableOrOptional
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+
+@dataclass(kw_only=True)
+class AwsAttributes:
+    """"""
+
+    availability: VariableOrOptional[AwsAvailability] = None
+
+    ebs_volume_count: VariableOrOptional[int] = None
+    """
+    The number of volumes launched for each instance. Users can choose up to 10 volumes.
+    This feature is only enabled for supported node types. Legacy node types cannot specify
+    custom EBS volumes.
+    For node types with no instance store, at least one EBS volume needs to be specified;
+    otherwise, cluster creation will fail.
+
+    These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.
+    Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.
+
+    If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for
+    scratch storage because heterogenously sized scratch devices can lead to inefficient disk
+    utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance
+    store volumes.
+
+    Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`
+    will be overridden.
+    """
+
+    ebs_volume_iops: VariableOrOptional[int] = None
+    """
+    If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.
+    """
+
+    ebs_volume_size: VariableOrOptional[int] = None
+    """
+    The size of each EBS volume (in GiB) launched for each instance. For general purpose
+    SSD, this value must be within the range 100 - 4096. For throughput optimized HDD,
+    this value must be within the range 500 - 4096.
+    """
+
+    ebs_volume_throughput: VariableOrOptional[int] = None
+    """
+    If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.
+    """
+
+    ebs_volume_type: VariableOrOptional[EbsVolumeType] = None
+
+    first_on_demand: VariableOrOptional[int] = None
+    """
+    The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.
+    If this value is greater than 0, the cluster driver node in particular will be placed on an
+    on-demand instance. If this value is greater than or equal to the current cluster size, all
+    nodes will be placed on on-demand instances. If this value is less than the current cluster
+    size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will
+    be placed on `availability` instances. Note that this value does not affect
+    cluster size and cannot currently be mutated over the lifetime of a cluster.
+    """
+
+    instance_profile_arn: VariableOrOptional[str] = None
+    """
+    Nodes for this cluster will only be placed on AWS instances with this instance profile. If
+    ommitted, nodes will be placed on instances without an IAM instance profile. The instance
+    profile must have previously been added to the Databricks environment by an account
+    administrator.
+
+    This feature may only be available to certain customer plans.
+
+    If this field is ommitted, we will pull in the default from the conf if it exists.
+    """
+
+    spot_bid_price_percent: VariableOrOptional[int] = None
+    """
+    The bid price for AWS spot instances, as a percentage of the corresponding instance type's
+    on-demand price.
+    For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot
+    instance, then the bid price is half of the price of
+    on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice
+    the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.
+    When spot instances are requested for this cluster, only spot instances whose bid price
+    percentage matches this field will be considered.
+    Note that, for safety, we enforce this field to be no more than 10000.
+
+    The default value and documentation here should be kept consistent with
+    CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.
+    """
+
+    zone_id: VariableOrOptional[str] = None
+    """
+    Identifier for the availability zone/datacenter in which the cluster resides.
+    This string will be of a form like "us-west-2a". The provided availability
+    zone must be in the same region as the Databricks deployment. For example, "us-west-2a"
+    is not a valid zone id if the Databricks deployment resides in the "us-east-1" region.
+    This is an optional field at cluster creation, and if not specified, a default zone will be used.
+    If the zone specified is "auto", will try to place cluster in a zone with high availability,
+    and will retry placement in a different AZ if there is not enough capacity.
+    The list of available zones as well as the default value can be found by using the
+    `List Zones` method.
+    """
+
+    @classmethod
+    def from_dict(cls, value: "AwsAttributesDict") -> "Self":
+        return _transform(cls, value)
+
+    def as_dict(self) -> "AwsAttributesDict":
+        return _transform_to_json_value(self)  # type:ignore
+
+
+class AwsAttributesDict(TypedDict, total=False):
+    """"""
+
+    availability: VariableOrOptional[AwsAvailabilityParam]
+
+    ebs_volume_count: VariableOrOptional[int]
+    """
+    The number of volumes launched for each instance. Users can choose up to 10 volumes.
+    This feature is only enabled for supported node types. Legacy node types cannot specify
+    custom EBS volumes.
+    For node types with no instance store, at least one EBS volume needs to be specified;
+    otherwise, cluster creation will fail.
+
+    These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.
+    Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.
+
+    If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for
+    scratch storage because heterogenously sized scratch devices can lead to inefficient disk
+    utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance
+    store volumes.
+
+    Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`
+    will be overridden.
+    """
+
+    ebs_volume_iops: VariableOrOptional[int]
+    """
+    If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.
+    """
+
+    ebs_volume_size: VariableOrOptional[int]
+    """
+    The size of each EBS volume (in GiB) launched for each instance. For general purpose
+    SSD, this value must be within the range 100 - 4096. For throughput optimized HDD,
+    this value must be within the range 500 - 4096.
+    """
+
+    ebs_volume_throughput: VariableOrOptional[int]
+    """
+    If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.
+    """
+
+    ebs_volume_type: VariableOrOptional[EbsVolumeTypeParam]
+
+    first_on_demand: VariableOrOptional[int]
+    """
+    The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.
+    If this value is greater than 0, the cluster driver node in particular will be placed on an
+    on-demand instance. If this value is greater than or equal to the current cluster size, all
+    nodes will be placed on on-demand instances. If this value is less than the current cluster
+    size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will
+    be placed on `availability` instances. Note that this value does not affect
+    cluster size and cannot currently be mutated over the lifetime of a cluster.
+    """
+
+    instance_profile_arn: VariableOrOptional[str]
+    """
+    Nodes for this cluster will only be placed on AWS instances with this instance profile. If
+    ommitted, nodes will be placed on instances without an IAM instance profile. The instance
+    profile must have previously been added to the Databricks environment by an account
+    administrator.
+
+    This feature may only be available to certain customer plans.
+
+    If this field is ommitted, we will pull in the default from the conf if it exists.
+    """
+
+    spot_bid_price_percent: VariableOrOptional[int]
+    """
+    The bid price for AWS spot instances, as a percentage of the corresponding instance type's
+    on-demand price.
+    For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot
+    instance, then the bid price is half of the price of
+    on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice
+    the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.
+    When spot instances are requested for this cluster, only spot instances whose bid price
+    percentage matches this field will be considered.
+    Note that, for safety, we enforce this field to be no more than 10000.
+
+    The default value and documentation here should be kept consistent with
+    CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.
+    """
+
+    zone_id: VariableOrOptional[str]
+    """
+    Identifier for the availability zone/datacenter in which the cluster resides.
+    This string will be of a form like "us-west-2a". The provided availability
+    zone must be in the same region as the Databricks deployment. For example, "us-west-2a"
+    is not a valid zone id if the Databricks deployment resides in the "us-east-1" region.
+    This is an optional field at cluster creation, and if not specified, a default zone will be used.
+    If the zone specified is "auto", will try to place cluster in a zone with high availability,
+    and will retry placement in a different AZ if there is not enough capacity.
+    The list of available zones as well as the default value can be found by using the
+    `List Zones` method.
+    """
+
+
+AwsAttributesParam = AwsAttributesDict | AwsAttributes
@@ -0,0 +1,20 @@
+from enum import Enum
+from typing import Literal
+
+
+class AwsAvailability(Enum):
+    """
+    Availability type used for all subsequent nodes past the `first_on_demand` ones.
+
+    Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster.
+
+    """
+
+    SPOT = "SPOT"
+    ON_DEMAND = "ON_DEMAND"
+    SPOT_WITH_FALLBACK = "SPOT_WITH_FALLBACK"
+
+
+AwsAvailabilityParam = (
+    Literal["SPOT", "ON_DEMAND", "SPOT_WITH_FALLBACK"] | AwsAvailability
+)