diff --git a/.codegen.json b/.codegen.json index 03e092db7b..54c108218f 100644 --- a/.codegen.json +++ b/.codegen.json @@ -21,8 +21,6 @@ "go" ], "post_generate": [ - "[ ! -f tagging.py ] || mv tagging.py internal/genkit/tagging.py", - "rm .github/workflows/next-changelog.yml", "go test -timeout 240s -run TestConsistentDatabricksSdkVersion github.com/databricks/cli/internal/build", "make schema", "echo 'bundle/internal/tf/schema/\\*.go linguist-generated=true' >> ./.gitattributes", diff --git a/experimental/python/databricks/bundles/compute/_models/environment.py b/experimental/python/databricks/bundles/compute/_models/environment.py index 32a731c31f..3e0d1cc916 100644 --- a/experimental/python/databricks/bundles/compute/_models/environment.py +++ b/experimental/python/databricks/bundles/compute/_models/environment.py @@ -12,7 +12,8 @@ @dataclass(kw_only=True) class Environment: """ - The environment entity used to preserve serverless environment side panel and jobs' environment for non-notebook task. + The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. + (Note: DLT uses a copied version of the Environment proto below, at //spark/pipelines/api/protos/copied/libraries-environments-copy.proto) In this minimal environment spec, only pip dependencies are supported. """ diff --git a/experimental/python/databricks/bundles/jobs/_models/job.py b/experimental/python/databricks/bundles/jobs/_models/job.py index 6d66279e04..ca40311133 100644 --- a/experimental/python/databricks/bundles/jobs/_models/job.py +++ b/experimental/python/databricks/bundles/jobs/_models/job.py @@ -143,9 +143,7 @@ class Job(Resource): performance_target: VariableOrOptional[PerformanceTarget] = None """ - :meta private: [EXPERIMENTAL] - - The performance mode on a serverless job. The performance target determines the level of compute performance or cost-efficiency for the run. + The performance mode on a serverless job. This field determines the level of compute performance or cost-efficiency for the run. * `STANDARD`: Enables cost-efficient execution of serverless workloads. * `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance. @@ -276,9 +274,7 @@ class JobDict(TypedDict, total=False): performance_target: VariableOrOptional[PerformanceTargetParam] """ - :meta private: [EXPERIMENTAL] - - The performance mode on a serverless job. The performance target determines the level of compute performance or cost-efficiency for the run. + The performance mode on a serverless job. This field determines the level of compute performance or cost-efficiency for the run. * `STANDARD`: Enables cost-efficient execution of serverless workloads. * `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance. diff --git a/experimental/python/databricks/bundles/jobs/_models/performance_target.py b/experimental/python/databricks/bundles/jobs/_models/performance_target.py index eb805ca5ef..5e7b83ae61 100644 --- a/experimental/python/databricks/bundles/jobs/_models/performance_target.py +++ b/experimental/python/databricks/bundles/jobs/_models/performance_target.py @@ -4,8 +4,6 @@ class PerformanceTarget(Enum): """ - :meta private: [EXPERIMENTAL] - PerformanceTarget defines how performant (lower latency) or cost efficient the execution of run on serverless compute should be. The performance mode on the job or pipeline should map to a performance setting that is passed to Cluster Manager (see cluster-common PerformanceTarget). diff --git a/experimental/python/databricks/bundles/pipelines/_models/file_library.py b/experimental/python/databricks/bundles/pipelines/_models/file_library.py index 9f57a72cb9..612c1e05bd 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/file_library.py +++ b/experimental/python/databricks/bundles/pipelines/_models/file_library.py @@ -15,7 +15,7 @@ class FileLibrary: path: VariableOrOptional[str] = None """ - The absolute path of the file. + The absolute path of the source code. """ @classmethod @@ -31,7 +31,7 @@ class FileLibraryDict(TypedDict, total=False): path: VariableOrOptional[str] """ - The absolute path of the file. + The absolute path of the source code. """ diff --git a/experimental/python/databricks/bundles/pipelines/_models/ingestion_gateway_pipeline_definition.py b/experimental/python/databricks/bundles/pipelines/_models/ingestion_gateway_pipeline_definition.py index d490a0bef2..9793c02fe7 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/ingestion_gateway_pipeline_definition.py +++ b/experimental/python/databricks/bundles/pipelines/_models/ingestion_gateway_pipeline_definition.py @@ -3,7 +3,7 @@ from databricks.bundles.core._transform import _transform from databricks.bundles.core._transform_to_json import _transform_to_json_value -from databricks.bundles.core._variable import VariableOrOptional +from databricks.bundles.core._variable import VariableOr, VariableOrOptional if TYPE_CHECKING: from typing_extensions import Self @@ -15,27 +15,26 @@ class IngestionGatewayPipelineDefinition: :meta private: [EXPERIMENTAL] """ - connection_name: VariableOrOptional[str] = None + connection_name: VariableOr[str] """ Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. """ - gateway_storage_catalog: VariableOrOptional[str] = None + gateway_storage_catalog: VariableOr[str] """ Required, Immutable. The name of the catalog for the gateway pipeline's storage location. """ + gateway_storage_schema: VariableOr[str] + """ + Required, Immutable. The name of the schema for the gateway pipelines's storage location. + """ + gateway_storage_name: VariableOrOptional[str] = None """ Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. - - """ - - gateway_storage_schema: VariableOrOptional[str] = None - """ - Required, Immutable. The name of the schema for the gateway pipelines's storage location. """ @classmethod @@ -49,27 +48,26 @@ def as_dict(self) -> "IngestionGatewayPipelineDefinitionDict": class IngestionGatewayPipelineDefinitionDict(TypedDict, total=False): """""" - connection_name: VariableOrOptional[str] + connection_name: VariableOr[str] """ Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. """ - gateway_storage_catalog: VariableOrOptional[str] + gateway_storage_catalog: VariableOr[str] """ Required, Immutable. The name of the catalog for the gateway pipeline's storage location. """ + gateway_storage_schema: VariableOr[str] + """ + Required, Immutable. The name of the schema for the gateway pipelines's storage location. + """ + gateway_storage_name: VariableOrOptional[str] """ Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. - - """ - - gateway_storage_schema: VariableOrOptional[str] - """ - Required, Immutable. The name of the schema for the gateway pipelines's storage location. """ diff --git a/experimental/python/databricks/bundles/pipelines/_models/notebook_library.py b/experimental/python/databricks/bundles/pipelines/_models/notebook_library.py index b8ac89205f..5cc6ae9ee3 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/notebook_library.py +++ b/experimental/python/databricks/bundles/pipelines/_models/notebook_library.py @@ -15,7 +15,7 @@ class NotebookLibrary: path: VariableOrOptional[str] = None """ - The absolute path of the notebook. + The absolute path of the source code. """ @classmethod @@ -31,7 +31,7 @@ class NotebookLibraryDict(TypedDict, total=False): path: VariableOrOptional[str] """ - The absolute path of the notebook. + The absolute path of the source code. """ diff --git a/experimental/python/databricks/bundles/pipelines/_models/notifications.py b/experimental/python/databricks/bundles/pipelines/_models/notifications.py index 06ecfb8dbe..22dbec8d70 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/notifications.py +++ b/experimental/python/databricks/bundles/pipelines/_models/notifications.py @@ -22,13 +22,11 @@ class Notifications: * `on-update-failure`: Each time a pipeline update fails. * `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error. * `on-flow-failure`: A single data flow fails. - """ email_recipients: VariableOrList[str] = field(default_factory=list) """ A list of email addresses notified when a configured alert is triggered. - """ @classmethod @@ -51,13 +49,11 @@ class NotificationsDict(TypedDict, total=False): * `on-update-failure`: Each time a pipeline update fails. * `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error. * `on-flow-failure`: A single data flow fails. - """ email_recipients: VariableOrList[str] """ A list of email addresses notified when a configured alert is triggered. - """ diff --git a/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster.py b/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster.py index 3eb57baaf2..70de76b20d 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster.py +++ b/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster.py @@ -71,7 +71,6 @@ class PipelineCluster: for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. - """ custom_tags: VariableOrDict[str] = field(default_factory=dict) @@ -130,7 +129,6 @@ class PipelineCluster: the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - """ num_workers: VariableOrOptional[int] = None @@ -154,7 +152,6 @@ class PipelineCluster: """ An object containing a set of optional, user-specified Spark configuration key-value pairs. See :method:clusters/create for more details. - """ spark_env_vars: VariableOrDict[str] = field(default_factory=dict) @@ -220,7 +217,6 @@ class PipelineClusterDict(TypedDict, total=False): for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. - """ custom_tags: VariableOrDict[str] @@ -279,7 +275,6 @@ class PipelineClusterDict(TypedDict, total=False): the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - """ num_workers: VariableOrOptional[int] @@ -303,7 +298,6 @@ class PipelineClusterDict(TypedDict, total=False): """ An object containing a set of optional, user-specified Spark configuration key-value pairs. See :method:clusters/create for more details. - """ spark_env_vars: VariableOrDict[str] diff --git a/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster_autoscale.py b/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster_autoscale.py index 973e66a67b..58afdbefab 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster_autoscale.py +++ b/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster_autoscale.py @@ -35,7 +35,6 @@ class PipelineClusterAutoscale: the data processing latency of your pipelines. Enhanced Autoscaling is available for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` clusters. - """ @classmethod @@ -67,7 +66,6 @@ class PipelineClusterAutoscaleDict(TypedDict, total=False): the data processing latency of your pipelines. Enhanced Autoscaling is available for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` clusters. - """ diff --git a/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster_autoscale_mode.py b/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster_autoscale_mode.py index 16f46dbffd..21d2e054cb 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster_autoscale_mode.py +++ b/experimental/python/databricks/bundles/pipelines/_models/pipeline_cluster_autoscale_mode.py @@ -9,7 +9,6 @@ class PipelineClusterAutoscaleMode(Enum): the data processing latency of your pipelines. Enhanced Autoscaling is available for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` clusters. - """ ENHANCED = "ENHANCED" diff --git a/experimental/python/databricks/bundles/pipelines/_models/pipeline_library.py b/experimental/python/databricks/bundles/pipelines/_models/pipeline_library.py index 3e9b7094cb..6a15df15f1 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/pipeline_library.py +++ b/experimental/python/databricks/bundles/pipelines/_models/pipeline_library.py @@ -28,7 +28,6 @@ class PipelineLibrary: file: VariableOrOptional[FileLibrary] = None """ The path to a file that defines a pipeline and is stored in the Databricks Repos. - """ jar: VariableOrOptional[str] = None @@ -36,7 +35,6 @@ class PipelineLibrary: :meta private: [EXPERIMENTAL] URI of the jar to be installed. Currently only DBFS is supported. - """ maven: VariableOrOptional[MavenLibrary] = None @@ -44,13 +42,11 @@ class PipelineLibrary: :meta private: [EXPERIMENTAL] Specification of a maven library to be installed. - """ notebook: VariableOrOptional[NotebookLibrary] = None """ The path to a notebook that defines a pipeline and is stored in the Databricks workspace. - """ @classmethod @@ -67,7 +63,6 @@ class PipelineLibraryDict(TypedDict, total=False): file: VariableOrOptional[FileLibraryParam] """ The path to a file that defines a pipeline and is stored in the Databricks Repos. - """ jar: VariableOrOptional[str] @@ -75,7 +70,6 @@ class PipelineLibraryDict(TypedDict, total=False): :meta private: [EXPERIMENTAL] URI of the jar to be installed. Currently only DBFS is supported. - """ maven: VariableOrOptional[MavenLibraryParam] @@ -83,13 +77,11 @@ class PipelineLibraryDict(TypedDict, total=False): :meta private: [EXPERIMENTAL] Specification of a maven library to be installed. - """ notebook: VariableOrOptional[NotebookLibraryParam] """ The path to a notebook that defines a pipeline and is stored in the Databricks workspace. - """ diff --git a/experimental/python/databricks/bundles/pipelines/_models/report_spec.py b/experimental/python/databricks/bundles/pipelines/_models/report_spec.py index 4e32d9f51d..02d4a8760a 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/report_spec.py +++ b/experimental/python/databricks/bundles/pipelines/_models/report_spec.py @@ -3,7 +3,7 @@ from databricks.bundles.core._transform import _transform from databricks.bundles.core._transform_to_json import _transform_to_json_value -from databricks.bundles.core._variable import VariableOrOptional +from databricks.bundles.core._variable import VariableOr, VariableOrOptional from databricks.bundles.pipelines._models.table_specific_config import ( TableSpecificConfig, TableSpecificConfigParam, @@ -17,24 +17,24 @@ class ReportSpec: """""" - destination_catalog: VariableOrOptional[str] = None + destination_catalog: VariableOr[str] """ Required. Destination catalog to store table. """ - destination_schema: VariableOrOptional[str] = None + destination_schema: VariableOr[str] """ Required. Destination schema to store table. """ - destination_table: VariableOrOptional[str] = None + source_url: VariableOr[str] """ - Required. Destination table name. The pipeline fails if a table with that name already exists. + Required. Report URL in the source system. """ - source_url: VariableOrOptional[str] = None + destination_table: VariableOrOptional[str] = None """ - Required. Report URL in the source system. + Required. Destination table name. The pipeline fails if a table with that name already exists. """ table_configuration: VariableOrOptional[TableSpecificConfig] = None @@ -53,24 +53,24 @@ def as_dict(self) -> "ReportSpecDict": class ReportSpecDict(TypedDict, total=False): """""" - destination_catalog: VariableOrOptional[str] + destination_catalog: VariableOr[str] """ Required. Destination catalog to store table. """ - destination_schema: VariableOrOptional[str] + destination_schema: VariableOr[str] """ Required. Destination schema to store table. """ - destination_table: VariableOrOptional[str] + source_url: VariableOr[str] """ - Required. Destination table name. The pipeline fails if a table with that name already exists. + Required. Report URL in the source system. """ - source_url: VariableOrOptional[str] + destination_table: VariableOrOptional[str] """ - Required. Report URL in the source system. + Required. Destination table name. The pipeline fails if a table with that name already exists. """ table_configuration: VariableOrOptional[TableSpecificConfigParam] diff --git a/experimental/python/databricks/bundles/pipelines/_models/schema_spec.py b/experimental/python/databricks/bundles/pipelines/_models/schema_spec.py index dd47de000a..1307409fb4 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/schema_spec.py +++ b/experimental/python/databricks/bundles/pipelines/_models/schema_spec.py @@ -3,7 +3,7 @@ from databricks.bundles.core._transform import _transform from databricks.bundles.core._transform_to_json import _transform_to_json_value -from databricks.bundles.core._variable import VariableOrOptional +from databricks.bundles.core._variable import VariableOr, VariableOrOptional from databricks.bundles.pipelines._models.table_specific_config import ( TableSpecificConfig, TableSpecificConfigParam, @@ -17,24 +17,24 @@ class SchemaSpec: """""" - destination_catalog: VariableOrOptional[str] = None + destination_catalog: VariableOr[str] """ Required. Destination catalog to store tables. """ - destination_schema: VariableOrOptional[str] = None + destination_schema: VariableOr[str] """ Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists. """ - source_catalog: VariableOrOptional[str] = None + source_schema: VariableOr[str] """ - The source catalog name. Might be optional depending on the type of source. + Required. Schema name in the source database. """ - source_schema: VariableOrOptional[str] = None + source_catalog: VariableOrOptional[str] = None """ - Required. Schema name in the source database. + The source catalog name. Might be optional depending on the type of source. """ table_configuration: VariableOrOptional[TableSpecificConfig] = None @@ -53,24 +53,24 @@ def as_dict(self) -> "SchemaSpecDict": class SchemaSpecDict(TypedDict, total=False): """""" - destination_catalog: VariableOrOptional[str] + destination_catalog: VariableOr[str] """ Required. Destination catalog to store tables. """ - destination_schema: VariableOrOptional[str] + destination_schema: VariableOr[str] """ Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists. """ - source_catalog: VariableOrOptional[str] + source_schema: VariableOr[str] """ - The source catalog name. Might be optional depending on the type of source. + Required. Schema name in the source database. """ - source_schema: VariableOrOptional[str] + source_catalog: VariableOrOptional[str] """ - Required. Schema name in the source database. + The source catalog name. Might be optional depending on the type of source. """ table_configuration: VariableOrOptional[TableSpecificConfigParam] diff --git a/experimental/python/databricks/bundles/pipelines/_models/table_spec.py b/experimental/python/databricks/bundles/pipelines/_models/table_spec.py index 28e00c3c12..03155edf6a 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/table_spec.py +++ b/experimental/python/databricks/bundles/pipelines/_models/table_spec.py @@ -3,7 +3,7 @@ from databricks.bundles.core._transform import _transform from databricks.bundles.core._transform_to_json import _transform_to_json_value -from databricks.bundles.core._variable import VariableOrOptional +from databricks.bundles.core._variable import VariableOr, VariableOrOptional from databricks.bundles.pipelines._models.table_specific_config import ( TableSpecificConfig, TableSpecificConfigParam, @@ -17,16 +17,21 @@ class TableSpec: """""" - destination_catalog: VariableOrOptional[str] = None + destination_catalog: VariableOr[str] """ Required. Destination catalog to store table. """ - destination_schema: VariableOrOptional[str] = None + destination_schema: VariableOr[str] """ Required. Destination schema to store table. """ + source_table: VariableOr[str] + """ + Required. Table name in the source database. + """ + destination_table: VariableOrOptional[str] = None """ Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used. @@ -42,11 +47,6 @@ class TableSpec: Schema name in the source database. Might be optional depending on the type of source. """ - source_table: VariableOrOptional[str] = None - """ - Required. Table name in the source database. - """ - table_configuration: VariableOrOptional[TableSpecificConfig] = None """ Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. @@ -63,16 +63,21 @@ def as_dict(self) -> "TableSpecDict": class TableSpecDict(TypedDict, total=False): """""" - destination_catalog: VariableOrOptional[str] + destination_catalog: VariableOr[str] """ Required. Destination catalog to store table. """ - destination_schema: VariableOrOptional[str] + destination_schema: VariableOr[str] """ Required. Destination schema to store table. """ + source_table: VariableOr[str] + """ + Required. Table name in the source database. + """ + destination_table: VariableOrOptional[str] """ Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used. @@ -88,11 +93,6 @@ class TableSpecDict(TypedDict, total=False): Schema name in the source database. Might be optional depending on the type of source. """ - source_table: VariableOrOptional[str] - """ - Required. Table name in the source database. - """ - table_configuration: VariableOrOptional[TableSpecificConfigParam] """ Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. diff --git a/experimental/python/databricks/bundles/pipelines/_models/table_specific_config.py b/experimental/python/databricks/bundles/pipelines/_models/table_specific_config.py index 204447de62..c354ee74ae 100644 --- a/experimental/python/databricks/bundles/pipelines/_models/table_specific_config.py +++ b/experimental/python/databricks/bundles/pipelines/_models/table_specific_config.py @@ -17,6 +17,23 @@ class TableSpecificConfig: """""" + exclude_columns: VariableOrList[str] = field(default_factory=list) + """ + A list of column names to be excluded for the ingestion. + When not specified, include_columns fully controls what columns to be ingested. + When specified, all other columns including future ones will be automatically included for ingestion. + This field in mutually exclusive with `include_columns`. + """ + + include_columns: VariableOrList[str] = field(default_factory=list) + """ + A list of column names to be included for the ingestion. + When not specified, all columns except ones in exclude_columns will be included. Future + columns will be automatically included. + When specified, all other future columns will be automatically excluded from ingestion. + This field in mutually exclusive with `exclude_columns`. + """ + primary_keys: VariableOrList[str] = field(default_factory=list) """ The primary key of the table used to apply changes. @@ -52,6 +69,23 @@ def as_dict(self) -> "TableSpecificConfigDict": class TableSpecificConfigDict(TypedDict, total=False): """""" + exclude_columns: VariableOrList[str] + """ + A list of column names to be excluded for the ingestion. + When not specified, include_columns fully controls what columns to be ingested. + When specified, all other columns including future ones will be automatically included for ingestion. + This field in mutually exclusive with `include_columns`. + """ + + include_columns: VariableOrList[str] + """ + A list of column names to be included for the ingestion. + When not specified, all columns except ones in exclude_columns will be included. Future + columns will be automatically included. + When specified, all other future columns will be automatically excluded from ingestion. + This field in mutually exclusive with `exclude_columns`. + """ + primary_keys: VariableOrList[str] """ The primary key of the table used to apply changes.