Skip to content

Commit a9cdc71

Browse files
authored
[Python] Handle deprecation and private annotations in codegen (#2773)
## Changes Handle deprecated and private annotations in codegen. - Deprecated fields are excluded from codegen - Private fields are excluded from documentation There is a trick to transitively mark fields as deprecated (private) because it's normal for JSON schema to only mark fields as deprecated (private) and not put the deprecated (private) annotation into referenced schemas. ## Why With that, we can continuously check that the Python code is consistent with the latest JSON schema.
1 parent 60e5da1 commit a9cdc71

35 files changed

+255
-273
lines changed

.github/workflows/push.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,5 @@ jobs:
186186
if ! ( git diff --exit-code ); then
187187
echo "Generated Python code is not up-to-date. Please run 'pushd experimental/python && make codegen' and commit the changes."
188188
189-
# TODO block PR if this fails once diffs are fixed
190-
# exit 1
189+
exit 1
191190
fi

experimental/python/codegen/codegen/generated_dataclass.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import codegen.packages as packages
88
from codegen.code_builder import CodeBuilder
9-
from codegen.jsonschema import Property, Schema
9+
from codegen.jsonschema import Property, Schema, Stage
1010
from codegen.packages import is_resource
1111

1212

@@ -96,6 +96,12 @@ class GeneratedField:
9696
Factory method for creating a default value, used for lists and dicts.
9797
"""
9898

99+
experimental: bool
100+
"""
101+
If true, the field is experimental and should not be indexed in docs, and
102+
be marked as experimental in docstring.
103+
"""
104+
99105
def __post_init__(self):
100106
if self.default_factory is not None and self.default is not None:
101107
raise ValueError("Can't have both default and default_factory", self)
@@ -124,6 +130,7 @@ class GeneratedDataclass:
124130

125131
fields: list[GeneratedField]
126132
extends: list[GeneratedType]
133+
experimental: bool
127134

128135

129136
def generate_field(
@@ -147,6 +154,7 @@ def generate_field(
147154
default=None,
148155
default_factory="dict",
149156
create_func_default="None",
157+
experimental=prop.stage == Stage.PRIVATE,
150158
)
151159
elif field_type.name == "VariableOrList":
152160
return GeneratedField(
@@ -158,6 +166,7 @@ def generate_field(
158166
default=None,
159167
default_factory="list",
160168
create_func_default="None",
169+
experimental=prop.stage == Stage.PRIVATE,
161170
)
162171
elif is_required:
163172
return GeneratedField(
@@ -169,6 +178,7 @@ def generate_field(
169178
default=None,
170179
default_factory=None,
171180
create_func_default=None,
181+
experimental=prop.stage == Stage.PRIVATE,
172182
)
173183
else:
174184
return GeneratedField(
@@ -180,6 +190,7 @@ def generate_field(
180190
default="None",
181191
default_factory=None,
182192
create_func_default="None",
193+
experimental=prop.stage == Stage.PRIVATE,
183194
)
184195

185196

@@ -308,6 +319,7 @@ def generate_dataclass(schema_name: str, schema: Schema) -> GeneratedDataclass:
308319
description=schema.description,
309320
fields=fields,
310321
extends=extends,
322+
experimental=schema.stage == Stage.PRIVATE,
311323
)
312324

313325

@@ -347,10 +359,10 @@ def _append_dataclass(b: CodeBuilder, generated: GeneratedDataclass):
347359
b.append(":").newline()
348360

349361
# FIXME should contain class docstring
350-
if not generated.description:
362+
if not generated.description and not generated.experimental:
351363
b.indent().append_triple_quote().append_triple_quote().newline().newline()
352364
else:
353-
_append_description(b, generated.description)
365+
_append_description(b, generated.description, generated.experimental)
354366

355367

356368
def _append_field(b: CodeBuilder, field: GeneratedField):
@@ -428,11 +440,16 @@ def _append_typed_dict(b: CodeBuilder, generated: GeneratedDataclass):
428440
b.indent().append_triple_quote().append_triple_quote().newline().newline()
429441

430442

431-
def _append_description(b: CodeBuilder, description: Optional[str]):
432-
if description:
443+
def _append_description(b: CodeBuilder, description: Optional[str], experimental: bool):
444+
if description or experimental:
433445
b.indent().append_triple_quote().newline()
434-
for line in description.split("\n"):
435-
b.indent().append(line).newline()
446+
if experimental:
447+
b.indent().append(":meta private: [EXPERIMENTAL]").newline()
448+
if description:
449+
b.indent().newline()
450+
if description:
451+
for line in description.split("\n"):
452+
b.indent().append(line).newline()
436453
b.indent().append_triple_quote().newline()
437454

438455

@@ -449,7 +466,7 @@ def get_code(generated: GeneratedDataclass) -> str:
449466

450467
for field in generated.fields:
451468
_append_field(b, field)
452-
_append_description(b, field.description)
469+
_append_description(b, field.description, field.experimental)
453470

454471
b.newline()
455472

@@ -462,7 +479,7 @@ def get_code(generated: GeneratedDataclass) -> str:
462479

463480
for field in generated.fields:
464481
_append_typed_dict_field(b, field)
465-
_append_description(b, field.description)
482+
_append_description(b, field.description, field.experimental)
466483

467484
b.newline()
468485

experimental/python/codegen/codegen/generated_enum.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import codegen.packages as packages
66
from codegen.code_builder import CodeBuilder
77
from codegen.generated_dataclass import _append_description
8-
from codegen.jsonschema import Schema
8+
from codegen.jsonschema import Schema, Stage
99

1010

1111
@dataclass(kw_only=True)
@@ -14,6 +14,7 @@ class GeneratedEnum:
1414
package: str
1515
values: dict[str, str]
1616
description: Optional[str]
17+
experimental: bool
1718

1819

1920
def generate_enum(schema_name: str, schema: Schema) -> GeneratedEnum:
@@ -33,6 +34,7 @@ def generate_enum(schema_name: str, schema: Schema) -> GeneratedEnum:
3334
package=package,
3435
values=values,
3536
description=schema.description,
37+
experimental=schema.stage == Stage.PRIVATE,
3638
)
3739

3840

@@ -46,7 +48,7 @@ def get_code(generated: GeneratedEnum) -> str:
4648
b.append(f"class {generated.class_name}(Enum):")
4749
b.newline()
4850

49-
_append_description(b, generated.description)
51+
_append_description(b, generated.description, generated.experimental)
5052

5153
# Example:
5254
#

experimental/python/codegen/codegen/jsonschema.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,16 @@
77
import codegen.packages as packages
88

99

10+
class Stage:
11+
PRIVATE = "PRIVATE"
12+
13+
1014
@dataclass
1115
class Property:
1216
ref: str
1317
description: Optional[str] = None
18+
deprecated: Optional[bool] = None
19+
stage: Optional[str] = None
1420

1521

1622
class SchemaType(Enum):
@@ -25,6 +31,8 @@ class Schema:
2531
properties: dict[str, Property] = field(default_factory=dict)
2632
required: list[str] = field(default_factory=list)
2733
description: Optional[str] = None
34+
deprecated: Optional[bool] = None
35+
stage: Optional[str] = None
2836

2937
def __post_init__(self):
3038
match self.type:
@@ -76,6 +84,11 @@ def _parse_schema(schema: dict) -> Schema:
7684
schema = _unwrap_variable(schema) or schema
7785
properties = {}
7886

87+
def _parse_bool(value) -> Optional[bool]:
88+
assert value is None or isinstance(value, bool)
89+
90+
return value
91+
7992
for k, v in schema.get("properties", {}).items():
8093
assert v.get("type") is None
8194
assert v.get("anyOf") is None
@@ -87,6 +100,8 @@ def _parse_schema(schema: dict) -> Schema:
87100
prop = Property(
88101
ref=v["$ref"],
89102
description=v.get("description"),
103+
deprecated=_parse_bool(v.get("deprecated")),
104+
stage=v.get("x-databricks-preview"),
90105
)
91106

92107
properties[k] = prop
@@ -102,6 +117,8 @@ def _parse_schema(schema: dict) -> Schema:
102117
properties=properties,
103118
required=schema.get("required", []),
104119
description=schema.get("description"),
120+
deprecated=_parse_bool(schema.get("deprecated")),
121+
stage=schema.get("x-databricks-preview"),
105122
)
106123

107124

experimental/python/codegen/codegen/jsonschema_patch.py

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,15 @@
33
from codegen.jsonschema import Schema
44

55
REMOVED_FIELDS = {
6-
"jobs.RunJobTask": {
7-
# all params except job_parameters should be deprecated and should not be supported
8-
"jar_params",
9-
"notebook_params",
10-
"python_params",
11-
"spark_submit_params",
12-
"python_named_params",
13-
"sql_params",
14-
"dbt_commands",
15-
# except pipeline_params, that is not deprecated
16-
},
17-
"jobs.TriggerSettings": {
18-
# Old table trigger settings name. Deprecated in favor of `table_update`
19-
"table",
6+
# TODO remove as a follow-up
7+
"jobs.Task": {
8+
"dashboard_task",
9+
"power_bi_task",
2010
},
2111
"compute.ClusterSpec": {
2212
# doesn't work, openapi schema needs to be updated to be enum
2313
"kind",
2414
},
25-
"jobs.TaskEmailNotifications": {
26-
# Deprecated
27-
"no_alert_for_skipped_runs",
28-
},
29-
"jobs.SparkJarTask": {
30-
# Deprecated. A value of `false` is no longer supported.
31-
"run_as_repl",
32-
# Deprecated
33-
"jar_uri",
34-
},
35-
"resources.Pipeline": {
36-
# Deprecated
37-
"trigger",
38-
},
39-
"pipelines.PipelineLibrary": {
40-
# Deprecated
41-
"whl",
42-
},
4315
}
4416

4517
EXTRA_REQUIRED_FIELDS: dict[str, list[str]] = {

experimental/python/codegen/codegen/main.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse
2+
from dataclasses import replace
23
from pathlib import Path
34
from textwrap import dedent
45

@@ -19,6 +20,12 @@ def main(output: str):
1920
schemas = openapi.get_schemas()
2021
schemas = openapi_patch.add_extra_required_fields(schemas)
2122
schemas = openapi_patch.remove_unsupported_fields(schemas)
23+
24+
schemas = _transitively_mark_deprecated_and_private(
25+
packages.RESOURCE_TYPES, schemas
26+
)
27+
# first remove deprecated fields so there are more unused schemas
28+
schemas = _remove_deprecated_fields(schemas)
2229
schemas = _remove_unused_schemas(packages.RESOURCE_TYPES, schemas)
2330

2431
dataclasses, enums = _generate_code(schemas)
@@ -37,6 +44,56 @@ def main(output: str):
3744
_write_exports(resource, resource_dataclasses, resource_enums, output)
3845

3946

47+
def _transitively_mark_deprecated_and_private(
48+
roots: list[str],
49+
schemas: dict[str, openapi.Schema],
50+
) -> dict[str, openapi.Schema]:
51+
"""
52+
If schema is only used through deprecated (private) fields, make it as deprecated (private).
53+
54+
For example, if a field is marked as private, and is excluded from documentation, corresponding
55+
dataclasses and enums should be private as well.
56+
"""
57+
58+
not_private = _collect_reachable_schemas(roots, schemas, include_private=False)
59+
not_deprecated = _collect_reachable_schemas(
60+
roots, schemas, include_deprecated=False
61+
)
62+
new_schemas = {}
63+
64+
for schema_name, schema in schemas.items():
65+
if schema_name not in not_private:
66+
schema.stage = openapi.Stage.PRIVATE
67+
68+
if schema_name not in not_deprecated:
69+
schema.deprecated = True
70+
71+
new_schemas[schema_name] = schema
72+
73+
return new_schemas
74+
75+
76+
def _remove_deprecated_fields(
77+
schemas: dict[str, openapi.Schema],
78+
) -> dict[str, openapi.Schema]:
79+
new_schemas = {}
80+
81+
for name, schema in schemas.items():
82+
if schema.type == openapi.SchemaType.OBJECT:
83+
new_properties = {}
84+
for field_name, field in schema.properties.items():
85+
if field.deprecated:
86+
continue
87+
88+
new_properties[field_name] = field
89+
90+
new_schemas[name] = replace(schema, properties=new_properties)
91+
else:
92+
new_schemas[name] = schema
93+
94+
return new_schemas
95+
96+
4097
def _generate_code(
4198
schemas: dict[str, openapi.Schema],
4299
) -> tuple[dict[str, GeneratedDataclass], dict[str, GeneratedEnum]]:
@@ -182,6 +239,12 @@ def _collect_reachable_schemas(
182239
if field.ref:
183240
name = field.ref.split("/")[-1]
184241

242+
if not include_private and field.stage == openapi.Stage.PRIVATE:
243+
continue
244+
245+
if not include_deprecated and field.deprecated:
246+
continue
247+
185248
if name not in reachable:
186249
stack.append(name)
187250

experimental/python/codegen/codegen_tests/test_generated_dataclass.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,10 @@ def test_generate_dataclass():
6565
field_name="task_key",
6666
param_type_name=variable_or_type(str_type(), is_required=True),
6767
type_name=variable_or_type(str_type(), is_required=True),
68+
experimental=False,
6869
),
6970
],
71+
experimental=False,
7072
)
7173

7274

experimental/python/codegen/codegen_tests/test_generated_enum.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@ def test_generate_enum():
1717
package="databricks.bundles.jobs._models.my_enum",
1818
values={"MY_ENUM_VALUE": "myEnumValue"},
1919
description="enum description",
20+
experimental=False,
2021
)

experimental/python/databricks/bundles/compute/_models/adlsgen2_info.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
@dataclass(kw_only=True)
1313
class Adlsgen2Info:
14-
""""""
14+
"""
15+
A storage location in Adls Gen2
16+
"""
1517

1618
destination: VariableOr[str]
1719
"""

0 commit comments

Comments
 (0)