Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,5 @@ jobs:
if ! ( git diff --exit-code ); then
echo "Generated Python code is not up-to-date. Please run 'pushd experimental/python && make codegen' and commit the changes."

# TODO block PR if this fails once diffs are fixed
# exit 1
exit 1
fi
35 changes: 26 additions & 9 deletions experimental/python/codegen/codegen/generated_dataclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import codegen.packages as packages
from codegen.code_builder import CodeBuilder
from codegen.jsonschema import Property, Schema
from codegen.jsonschema import Property, Schema, Stage
from codegen.packages import is_resource


Expand Down Expand Up @@ -96,6 +96,12 @@ class GeneratedField:
Factory method for creating a default value, used for lists and dicts.
"""

experimental: bool
"""
If true, the field is experimental and should not be indexed in docs, and
be marked as experimental in docstring.
"""

def __post_init__(self):
if self.default_factory is not None and self.default is not None:
raise ValueError("Can't have both default and default_factory", self)
Expand Down Expand Up @@ -124,6 +130,7 @@ class GeneratedDataclass:

fields: list[GeneratedField]
extends: list[GeneratedType]
experimental: bool


def generate_field(
Expand All @@ -147,6 +154,7 @@ def generate_field(
default=None,
default_factory="dict",
create_func_default="None",
experimental=prop.stage == Stage.PRIVATE,
)
elif field_type.name == "VariableOrList":
return GeneratedField(
Expand All @@ -158,6 +166,7 @@ def generate_field(
default=None,
default_factory="list",
create_func_default="None",
experimental=prop.stage == Stage.PRIVATE,
)
elif is_required:
return GeneratedField(
Expand All @@ -169,6 +178,7 @@ def generate_field(
default=None,
default_factory=None,
create_func_default=None,
experimental=prop.stage == Stage.PRIVATE,
)
else:
return GeneratedField(
Expand All @@ -180,6 +190,7 @@ def generate_field(
default="None",
default_factory=None,
create_func_default="None",
experimental=prop.stage == Stage.PRIVATE,
)


Expand Down Expand Up @@ -308,6 +319,7 @@ def generate_dataclass(schema_name: str, schema: Schema) -> GeneratedDataclass:
description=schema.description,
fields=fields,
extends=extends,
experimental=schema.stage == Stage.PRIVATE,
)


Expand Down Expand Up @@ -347,10 +359,10 @@ def _append_dataclass(b: CodeBuilder, generated: GeneratedDataclass):
b.append(":").newline()

# FIXME should contain class docstring
if not generated.description:
if not generated.description and not generated.experimental:
b.indent().append_triple_quote().append_triple_quote().newline().newline()
else:
_append_description(b, generated.description)
_append_description(b, generated.description, generated.experimental)


def _append_field(b: CodeBuilder, field: GeneratedField):
Expand Down Expand Up @@ -428,11 +440,16 @@ def _append_typed_dict(b: CodeBuilder, generated: GeneratedDataclass):
b.indent().append_triple_quote().append_triple_quote().newline().newline()


def _append_description(b: CodeBuilder, description: Optional[str]):
if description:
def _append_description(b: CodeBuilder, description: Optional[str], experimental: bool):
if description or experimental:
b.indent().append_triple_quote().newline()
for line in description.split("\n"):
b.indent().append(line).newline()
if experimental:
b.indent().append(":meta private: [EXPERIMENTAL]").newline()
if description:
b.indent().newline()
if description:
for line in description.split("\n"):
b.indent().append(line).newline()
b.indent().append_triple_quote().newline()


Expand All @@ -449,7 +466,7 @@ def get_code(generated: GeneratedDataclass) -> str:

for field in generated.fields:
_append_field(b, field)
_append_description(b, field.description)
_append_description(b, field.description, field.experimental)

b.newline()

Expand All @@ -462,7 +479,7 @@ def get_code(generated: GeneratedDataclass) -> str:

for field in generated.fields:
_append_typed_dict_field(b, field)
_append_description(b, field.description)
_append_description(b, field.description, field.experimental)

b.newline()

Expand Down
6 changes: 4 additions & 2 deletions experimental/python/codegen/codegen/generated_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import codegen.packages as packages
from codegen.code_builder import CodeBuilder
from codegen.generated_dataclass import _append_description
from codegen.jsonschema import Schema
from codegen.jsonschema import Schema, Stage


@dataclass(kw_only=True)
Expand All @@ -14,6 +14,7 @@ class GeneratedEnum:
package: str
values: dict[str, str]
description: Optional[str]
experimental: bool


def generate_enum(schema_name: str, schema: Schema) -> GeneratedEnum:
Expand All @@ -33,6 +34,7 @@ def generate_enum(schema_name: str, schema: Schema) -> GeneratedEnum:
package=package,
values=values,
description=schema.description,
experimental=schema.stage == Stage.PRIVATE,
)


Expand All @@ -46,7 +48,7 @@ def get_code(generated: GeneratedEnum) -> str:
b.append(f"class {generated.class_name}(Enum):")
b.newline()

_append_description(b, generated.description)
_append_description(b, generated.description, generated.experimental)

# Example:
#
Expand Down
17 changes: 17 additions & 0 deletions experimental/python/codegen/codegen/jsonschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@
import codegen.packages as packages


class Stage:
PRIVATE = "PRIVATE"


@dataclass
class Property:
ref: str
description: Optional[str] = None
deprecated: Optional[bool] = None
stage: Optional[str] = None


class SchemaType(Enum):
Expand All @@ -25,6 +31,8 @@ class Schema:
properties: dict[str, Property] = field(default_factory=dict)
required: list[str] = field(default_factory=list)
description: Optional[str] = None
deprecated: Optional[bool] = None
stage: Optional[str] = None

def __post_init__(self):
match self.type:
Expand Down Expand Up @@ -76,6 +84,11 @@ def _parse_schema(schema: dict) -> Schema:
schema = _unwrap_variable(schema) or schema
properties = {}

def _parse_bool(value) -> Optional[bool]:
assert value is None or isinstance(value, bool)

return value

for k, v in schema.get("properties", {}).items():
assert v.get("type") is None
assert v.get("anyOf") is None
Expand All @@ -87,6 +100,8 @@ def _parse_schema(schema: dict) -> Schema:
prop = Property(
ref=v["$ref"],
description=v.get("description"),
deprecated=_parse_bool(v.get("deprecated")),
stage=v.get("x-databricks-preview"),
)

properties[k] = prop
Expand All @@ -102,6 +117,8 @@ def _parse_schema(schema: dict) -> Schema:
properties=properties,
required=schema.get("required", []),
description=schema.get("description"),
deprecated=_parse_bool(schema.get("deprecated")),
stage=schema.get("x-databricks-preview"),
)


Expand Down
36 changes: 4 additions & 32 deletions experimental/python/codegen/codegen/jsonschema_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,15 @@
from codegen.jsonschema import Schema

REMOVED_FIELDS = {
"jobs.RunJobTask": {
# all params except job_parameters should be deprecated and should not be supported
"jar_params",
"notebook_params",
"python_params",
"spark_submit_params",
"python_named_params",
"sql_params",
"dbt_commands",
# except pipeline_params, that is not deprecated
},
"jobs.TriggerSettings": {
# Old table trigger settings name. Deprecated in favor of `table_update`
"table",
# TODO remove as a follow-up
"jobs.Task": {
"dashboard_task",
"power_bi_task",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why remove?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's mostly to reduce the diff because these fields were never there

Copy link
Collaborator Author

@kanterov kanterov Apr 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can add them once we merge this PR, or can change this PR

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separately is fine, keeps this one focused.

The comment didn't suggest diff minimization, hence the question.

},
"compute.ClusterSpec": {
# doesn't work, openapi schema needs to be updated to be enum
"kind",
},
"jobs.TaskEmailNotifications": {
# Deprecated
"no_alert_for_skipped_runs",
},
"jobs.SparkJarTask": {
# Deprecated. A value of `false` is no longer supported.
"run_as_repl",
# Deprecated
"jar_uri",
},
"resources.Pipeline": {
# Deprecated
"trigger",
},
"pipelines.PipelineLibrary": {
# Deprecated
"whl",
},
}

EXTRA_REQUIRED_FIELDS: dict[str, list[str]] = {
Expand Down
63 changes: 63 additions & 0 deletions experimental/python/codegen/codegen/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
from dataclasses import replace
from pathlib import Path
from textwrap import dedent

Expand All @@ -19,6 +20,12 @@ def main(output: str):
schemas = openapi.get_schemas()
schemas = openapi_patch.add_extra_required_fields(schemas)
schemas = openapi_patch.remove_unsupported_fields(schemas)

schemas = _transitively_mark_deprecated_and_private(
packages.RESOURCE_TYPES, schemas
)
# first remove deprecated fields so there are more unused schemas
schemas = _remove_deprecated_fields(schemas)
schemas = _remove_unused_schemas(packages.RESOURCE_TYPES, schemas)

dataclasses, enums = _generate_code(schemas)
Expand All @@ -37,6 +44,56 @@ def main(output: str):
_write_exports(resource, resource_dataclasses, resource_enums, output)


def _transitively_mark_deprecated_and_private(
roots: list[str],
schemas: dict[str, openapi.Schema],
) -> dict[str, openapi.Schema]:
"""
If schema is only used through deprecated (private) fields, make it as deprecated (private).

For example, if a field is marked as private, and is excluded from documentation, corresponding
dataclasses and enums should be private as well.
"""

not_private = _collect_reachable_schemas(roots, schemas, include_private=False)
not_deprecated = _collect_reachable_schemas(
roots, schemas, include_deprecated=False
)
new_schemas = {}

for schema_name, schema in schemas.items():
if schema_name not in not_private:
schema.stage = openapi.Stage.PRIVATE

if schema_name not in not_deprecated:
schema.deprecated = True

new_schemas[schema_name] = schema

return new_schemas


def _remove_deprecated_fields(
schemas: dict[str, openapi.Schema],
) -> dict[str, openapi.Schema]:
new_schemas = {}

for name, schema in schemas.items():
if schema.type == openapi.SchemaType.OBJECT:
new_properties = {}
for field_name, field in schema.properties.items():
if field.deprecated:
continue

new_properties[field_name] = field

new_schemas[name] = replace(schema, properties=new_properties)
else:
new_schemas[name] = schema

return new_schemas


def _generate_code(
schemas: dict[str, openapi.Schema],
) -> tuple[dict[str, GeneratedDataclass], dict[str, GeneratedEnum]]:
Expand Down Expand Up @@ -182,6 +239,12 @@ def _collect_reachable_schemas(
if field.ref:
name = field.ref.split("/")[-1]

if not include_private and field.stage == openapi.Stage.PRIVATE:
continue

if not include_deprecated and field.deprecated:
continue

if name not in reachable:
stack.append(name)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ def test_generate_dataclass():
field_name="task_key",
param_type_name=variable_or_type(str_type(), is_required=True),
type_name=variable_or_type(str_type(), is_required=True),
experimental=False,
),
],
experimental=False,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ def test_generate_enum():
package="databricks.bundles.jobs._models.my_enum",
values={"MY_ENUM_VALUE": "myEnumValue"},
description="enum description",
experimental=False,
)
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

@dataclass(kw_only=True)
class Adlsgen2Info:
""""""
"""
A storage location in Adls Gen2
"""

destination: VariableOr[str]
"""
Expand Down
Loading
Loading