Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 38 additions & 8 deletions experimental/python/databricks/bundles/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
"AzureAttributesParam",
"AzureAvailability",
"AzureAvailabilityParam",
"CleanRoomsNotebookTask",
"CleanRoomsNotebookTaskDict",
"CleanRoomsNotebookTaskParam",
"ClientsTypes",
"ClientsTypesDict",
"ClientsTypesParam",
Expand All @@ -24,6 +27,9 @@
"ClusterSpec",
"ClusterSpecDict",
"ClusterSpecParam",
"ComputeConfig",
"ComputeConfigDict",
"ComputeConfigParam",
"Condition",
"ConditionParam",
"ConditionTask",
Expand Down Expand Up @@ -70,6 +76,9 @@
"GcsStorageInfo",
"GcsStorageInfoDict",
"GcsStorageInfoParam",
"GenAiComputeTask",
"GenAiComputeTaskDict",
"GenAiComputeTaskParam",
"GitProvider",
"GitProviderParam",
"GitSource",
Expand All @@ -96,6 +105,11 @@
"JobParameterDefinition",
"JobParameterDefinitionDict",
"JobParameterDefinitionParam",
"JobPermission",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From your summary:

Rename jobs.Permission into jobs.JobPermission to avoid confusion, following the spec.

I don't fully understand the mechanics here yet, how is this generated? And is there a README covering that for maintainers somewhere?

Copy link
Collaborator Author

@kanterov kanterov Apr 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JobPermission is fixed in #2642, I've just generated the code using effective jsonschema

"JobPermissionDict",
"JobPermissionLevel",
"JobPermissionLevelParam",
"JobPermissionParam",
"JobRunAs",
"JobRunAsDict",
"JobRunAsParam",
Expand Down Expand Up @@ -133,9 +147,6 @@
"PeriodicTriggerConfigurationParam",
"PeriodicTriggerConfigurationTimeUnit",
"PeriodicTriggerConfigurationTimeUnitParam",
"Permission",
"PermissionDict",
"PermissionParam",
"PipelineParams",
"PipelineParamsDict",
"PipelineParamsParam",
Expand Down Expand Up @@ -373,6 +384,16 @@
WorkspaceStorageInfoDict,
WorkspaceStorageInfoParam,
)
from databricks.bundles.jobs._models.clean_rooms_notebook_task import (
CleanRoomsNotebookTask,
CleanRoomsNotebookTaskDict,
CleanRoomsNotebookTaskParam,
)
from databricks.bundles.jobs._models.compute_config import (
ComputeConfig,
ComputeConfigDict,
ComputeConfigParam,
)
from databricks.bundles.jobs._models.condition import Condition, ConditionParam
from databricks.bundles.jobs._models.condition_task import (
ConditionTask,
Expand Down Expand Up @@ -404,6 +425,11 @@
ForEachTaskDict,
ForEachTaskParam,
)
from databricks.bundles.jobs._models.gen_ai_compute_task import (
GenAiComputeTask,
GenAiComputeTaskDict,
GenAiComputeTaskParam,
)
from databricks.bundles.jobs._models.git_provider import GitProvider, GitProviderParam
from databricks.bundles.jobs._models.git_source import (
GitSource,
Expand Down Expand Up @@ -436,6 +462,15 @@
JobParameterDefinitionDict,
JobParameterDefinitionParam,
)
from databricks.bundles.jobs._models.job_permission import (
JobPermission,
JobPermissionDict,
JobPermissionParam,
)
from databricks.bundles.jobs._models.job_permission_level import (
JobPermissionLevel,
JobPermissionLevelParam,
)
from databricks.bundles.jobs._models.job_run_as import (
JobRunAs,
JobRunAsDict,
Expand Down Expand Up @@ -478,11 +513,6 @@
PeriodicTriggerConfigurationTimeUnit,
PeriodicTriggerConfigurationTimeUnitParam,
)
from databricks.bundles.jobs._models.permission import (
Permission,
PermissionDict,
PermissionParam,
)
from databricks.bundles.jobs._models.pipeline_params import (
PipelineParams,
PipelineParamsDict,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict

from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOr,
VariableOrDict,
VariableOrOptional,
)

if TYPE_CHECKING:
from typing_extensions import Self
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw, tangential question, what error do we show for Python <3.11, where things like these extensions are not available?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch will be only evaluated by tools like pyright/mypy that comes with typing_extensions. We could have added typing_extensions as a dependency, but we don't need it during runtime.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fine, this is just a tangential question, but I'm still interested in the answer — what happens if you try to use jobs as code with Python <3.11?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It works, our acceptance tests run with Python 3.10

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

discussed offline: there's metadata preventing installation for <3.10



@dataclass(kw_only=True)
class CleanRoomsNotebookTask:
""""""

clean_room_name: VariableOr[str]
"""
The clean room that the notebook belongs to.
"""

notebook_name: VariableOr[str]
"""
Name of the notebook being run.
"""

etag: VariableOrOptional[str] = None
"""
Checksum to validate the freshness of the notebook resource (i.e. the notebook being run is the latest version).
It can be fetched by calling the :method:cleanroomassets/get API.
"""

notebook_base_parameters: VariableOrDict[str] = field(default_factory=dict)
"""
Base parameters to be used for the clean room notebook job.
"""

@classmethod
def from_dict(cls, value: "CleanRoomsNotebookTaskDict") -> "Self":
return _transform(cls, value)

def as_dict(self) -> "CleanRoomsNotebookTaskDict":
return _transform_to_json_value(self) # type:ignore


class CleanRoomsNotebookTaskDict(TypedDict, total=False):
""""""

clean_room_name: VariableOr[str]
"""
The clean room that the notebook belongs to.
"""

notebook_name: VariableOr[str]
"""
Name of the notebook being run.
"""

etag: VariableOrOptional[str]
"""
Checksum to validate the freshness of the notebook resource (i.e. the notebook being run is the latest version).
It can be fetched by calling the :method:cleanroomassets/get API.
"""

notebook_base_parameters: VariableOrDict[str]
"""
Base parameters to be used for the clean room notebook job.
"""


CleanRoomsNotebookTaskParam = CleanRoomsNotebookTaskDict | CleanRoomsNotebookTask
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict

from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional

if TYPE_CHECKING:
from typing_extensions import Self


@dataclass(kw_only=True)
class ComputeConfig:
"""
:meta private: [EXPERIMENTAL]

Next field: 4
"""

gpu_node_pool_id: VariableOr[str]
"""
IDof the GPU pool to use.
"""

num_gpus: VariableOr[int]
"""
Number of GPUs.
"""

gpu_type: VariableOrOptional[str] = None
"""
GPU type.
"""

@classmethod
def from_dict(cls, value: "ComputeConfigDict") -> "Self":
return _transform(cls, value)

def as_dict(self) -> "ComputeConfigDict":
return _transform_to_json_value(self) # type:ignore


class ComputeConfigDict(TypedDict, total=False):
""""""

gpu_node_pool_id: VariableOr[str]
"""
IDof the GPU pool to use.
"""

num_gpus: VariableOr[int]
"""
Number of GPUs.
"""

gpu_type: VariableOrOptional[str]
"""
GPU type.
"""


ComputeConfigParam = ComputeConfigDict | ComputeConfig
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@


class Condition(Enum):
"""
:meta private: [EXPERIMENTAL]
"""

ANY_UPDATED = "ANY_UPDATED"
ALL_UPDATED = "ALL_UPDATED"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict

from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
from databricks.bundles.jobs._models.compute_config import (
ComputeConfig,
ComputeConfigParam,
)
from databricks.bundles.jobs._models.source import Source, SourceParam

if TYPE_CHECKING:
from typing_extensions import Self


@dataclass(kw_only=True)
class GenAiComputeTask:
"""
:meta private: [EXPERIMENTAL]

Next field: 9
"""

dl_runtime_image: VariableOr[str]
"""
Runtime image
"""

command: VariableOrOptional[str] = None
"""
Command launcher to run the actual script, e.g. bash, python etc.
"""

compute: VariableOrOptional[ComputeConfig] = None

mlflow_experiment_name: VariableOrOptional[str] = None
"""
Optional string containing the name of the MLflow experiment to log the run to. If name is not
found, backend will create the mlflow experiment using the name.
"""

source: VariableOrOptional[Source] = None
"""
Optional location type of the training script. When set to `WORKSPACE`, the script will be retrieved from the local Databricks workspace. When set to `GIT`, the script will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.
* `WORKSPACE`: Script is located in Databricks workspace.
* `GIT`: Script is located in cloud Git provider.
"""

training_script_path: VariableOrOptional[str] = None
"""
The training script file path to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required.
"""

yaml_parameters: VariableOrOptional[str] = None
"""
Optional string containing model parameters passed to the training script in yaml format.
If present, then the content in yaml_parameters_file_path will be ignored.
"""

yaml_parameters_file_path: VariableOrOptional[str] = None
"""
Optional path to a YAML file containing model parameters passed to the training script.
"""

@classmethod
def from_dict(cls, value: "GenAiComputeTaskDict") -> "Self":
return _transform(cls, value)

def as_dict(self) -> "GenAiComputeTaskDict":
return _transform_to_json_value(self) # type:ignore


class GenAiComputeTaskDict(TypedDict, total=False):
""""""

dl_runtime_image: VariableOr[str]
"""
Runtime image
"""

command: VariableOrOptional[str]
"""
Command launcher to run the actual script, e.g. bash, python etc.
"""

compute: VariableOrOptional[ComputeConfigParam]

mlflow_experiment_name: VariableOrOptional[str]
"""
Optional string containing the name of the MLflow experiment to log the run to. If name is not
found, backend will create the mlflow experiment using the name.
"""

source: VariableOrOptional[SourceParam]
"""
Optional location type of the training script. When set to `WORKSPACE`, the script will be retrieved from the local Databricks workspace. When set to `GIT`, the script will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.
* `WORKSPACE`: Script is located in Databricks workspace.
* `GIT`: Script is located in cloud Git provider.
"""

training_script_path: VariableOrOptional[str]
"""
The training script file path to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required.
"""

yaml_parameters: VariableOrOptional[str]
"""
Optional string containing model parameters passed to the training script in yaml format.
If present, then the content in yaml_parameters_file_path will be ignored.
"""

yaml_parameters_file_path: VariableOrOptional[str]
"""
Optional path to a YAML file containing model parameters passed to the training script.
"""


GenAiComputeTaskParam = GenAiComputeTaskDict | GenAiComputeTask
Loading
Loading