From b7e0bed38ff6ccb4666933c7e94b44b28ddd0f5f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 11 Jun 2025 19:27:51 +0200 Subject: [PATCH 01/30] Generate required fields for DABs --- .../validation/generated/required_fields.go | 570 ++++++++++++++++++ bundle/internal/validation/main.go | 13 + bundle/internal/validation/required.go | 204 +++++++ libs/structdiff/structpath/path.go | 7 + 4 files changed, 794 insertions(+) create mode 100644 bundle/internal/validation/generated/required_fields.go create mode 100644 bundle/internal/validation/main.go create mode 100644 bundle/internal/validation/required.go diff --git a/bundle/internal/validation/generated/required_fields.go b/bundle/internal/validation/generated/required_fields.go new file mode 100644 index 0000000000..a4ce44ab77 --- /dev/null +++ b/bundle/internal/validation/generated/required_fields.go @@ -0,0 +1,570 @@ +package generated + +// THIS FILE IS AUTOGENERATED. +// DO NOT EDIT THIS FILE DIRECTLY. + +import ( + _ "github.com/databricks/cli/libs/dyn" +) + +// RequiredFields maps [dyn.Pattern] to required fields they should have. +var RequiredFields = map[string][]string{ + + "artifacts.*.files[*]": {"source"}, + + "bundle": {"name"}, + + "environments.*.artifacts.*.files[*]": {"source"}, + "environments.*.bundle": {"name"}, + "environments.*.permissions[*]": {"level"}, + "environments.*.resources.apps.*": {"source_code_path", "name"}, + "environments.*.resources.apps.*.permissions[*]": {"level"}, + "environments.*.resources.apps.*.resources[*]": {"name"}, + "environments.*.resources.apps.*.resources[*].job": {"id", "permission"}, + "environments.*.resources.apps.*.resources[*].secret": {"key", "permission", "scope"}, + "environments.*.resources.apps.*.resources[*].serving_endpoint": {"name", "permission"}, + "environments.*.resources.apps.*.resources[*].sql_warehouse": {"id", "permission"}, + "environments.*.resources.apps.*.resources[*].uc_securable": {"permission", "securable_full_name", "securable_type"}, + "environments.*.resources.clusters.*.cluster_log_conf.dbfs": {"destination"}, + "environments.*.resources.clusters.*.cluster_log_conf.s3": {"destination"}, + "environments.*.resources.clusters.*.cluster_log_conf.volumes": {"destination"}, + "environments.*.resources.clusters.*.init_scripts[*].abfss": {"destination"}, + "environments.*.resources.clusters.*.init_scripts[*].dbfs": {"destination"}, + "environments.*.resources.clusters.*.init_scripts[*].file": {"destination"}, + "environments.*.resources.clusters.*.init_scripts[*].gcs": {"destination"}, + "environments.*.resources.clusters.*.init_scripts[*].s3": {"destination"}, + "environments.*.resources.clusters.*.init_scripts[*].volumes": {"destination"}, + "environments.*.resources.clusters.*.init_scripts[*].workspace": {"destination"}, + "environments.*.resources.clusters.*.permissions[*]": {"level"}, + "environments.*.resources.clusters.*.workload_type": {"clients"}, + "environments.*.resources.dashboards.*.permissions[*]": {"level"}, + "environments.*.resources.experiments.*.permissions[*]": {"level"}, + "environments.*.resources.jobs.*.deployment": {"kind"}, + "environments.*.resources.jobs.*.environments[*]": {"environment_key"}, + "environments.*.resources.jobs.*.environments[*].spec": {"client"}, + "environments.*.resources.jobs.*.git_source": {"git_provider", "git_url"}, + "environments.*.resources.jobs.*.git_source.job_source": {"import_from_git_branch", "job_config_path"}, + "environments.*.resources.jobs.*.health.rules[*]": {"metric", "op", "value"}, + "environments.*.resources.jobs.*.job_clusters[*]": {"job_cluster_key", "new_cluster"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.s3": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.volumes": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].abfss": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].dbfs": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].file": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].gcs": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].s3": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].volumes": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].workspace": {"destination"}, + "environments.*.resources.jobs.*.job_clusters[*].new_cluster.workload_type": {"clients"}, + "environments.*.resources.jobs.*.parameters[*]": {"default", "name"}, + "environments.*.resources.jobs.*.permissions[*]": {"level"}, + "environments.*.resources.jobs.*.queue": {"enabled"}, + "environments.*.resources.jobs.*.schedule": {"quartz_cron_expression", "timezone_id"}, + "environments.*.resources.jobs.*.tasks[*]": {"task_key"}, + "environments.*.resources.jobs.*.tasks[*].clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, + "environments.*.resources.jobs.*.tasks[*].condition_task": {"left", "op", "right"}, + "environments.*.resources.jobs.*.tasks[*].dbt_task": {"commands"}, + "environments.*.resources.jobs.*.tasks[*].depends_on[*]": {"task_key"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task": {"inputs", "task"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task": {"task_key"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.condition_task": {"left", "op", "right"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.dbt_task": {"commands"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.depends_on[*]": {"task_key"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.for_each_task": {"inputs", "task"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task": {"dl_runtime_image"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task.compute": {"num_gpus"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.health.rules[*]": {"metric", "op", "value"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].cran": {"package"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].maven": {"coordinates"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].pypi": {"package"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.dbfs": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.s3": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.volumes": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].abfss": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].dbfs": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].file": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].gcs": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].s3": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].volumes": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].workspace": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.workload_type": {"clients"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.notebook_task": {"notebook_path"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.pipeline_task": {"pipeline_id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.python_wheel_task": {"entry_point", "package_name"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.run_job_task": {"job_id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.spark_python_task": {"python_file"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task": {"warehouse_id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.alert": {"alert_id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.dashboard": {"dashboard_id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.file": {"path"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.query": {"query_id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_failure[*]": {"id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_start[*]": {"id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, + "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_success[*]": {"id"}, + "environments.*.resources.jobs.*.tasks[*].gen_ai_compute_task": {"dl_runtime_image"}, + "environments.*.resources.jobs.*.tasks[*].gen_ai_compute_task.compute": {"num_gpus"}, + "environments.*.resources.jobs.*.tasks[*].health.rules[*]": {"metric", "op", "value"}, + "environments.*.resources.jobs.*.tasks[*].libraries[*].cran": {"package"}, + "environments.*.resources.jobs.*.tasks[*].libraries[*].maven": {"coordinates"}, + "environments.*.resources.jobs.*.tasks[*].libraries[*].pypi": {"package"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.s3": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.volumes": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].abfss": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].dbfs": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].file": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].gcs": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].s3": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].volumes": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].workspace": {"destination"}, + "environments.*.resources.jobs.*.tasks[*].new_cluster.workload_type": {"clients"}, + "environments.*.resources.jobs.*.tasks[*].notebook_task": {"notebook_path"}, + "environments.*.resources.jobs.*.tasks[*].pipeline_task": {"pipeline_id"}, + "environments.*.resources.jobs.*.tasks[*].python_wheel_task": {"entry_point", "package_name"}, + "environments.*.resources.jobs.*.tasks[*].run_job_task": {"job_id"}, + "environments.*.resources.jobs.*.tasks[*].spark_python_task": {"python_file"}, + "environments.*.resources.jobs.*.tasks[*].sql_task": {"warehouse_id"}, + "environments.*.resources.jobs.*.tasks[*].sql_task.alert": {"alert_id"}, + "environments.*.resources.jobs.*.tasks[*].sql_task.dashboard": {"dashboard_id"}, + "environments.*.resources.jobs.*.tasks[*].sql_task.file": {"path"}, + "environments.*.resources.jobs.*.tasks[*].sql_task.query": {"query_id"}, + "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, + "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_failure[*]": {"id"}, + "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_start[*]": {"id"}, + "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, + "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_success[*]": {"id"}, + "environments.*.resources.jobs.*.trigger.file_arrival": {"url"}, + "environments.*.resources.jobs.*.trigger.periodic": {"interval", "unit"}, + "environments.*.resources.jobs.*.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, + "environments.*.resources.jobs.*.webhook_notifications.on_failure[*]": {"id"}, + "environments.*.resources.jobs.*.webhook_notifications.on_start[*]": {"id"}, + "environments.*.resources.jobs.*.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, + "environments.*.resources.jobs.*.webhook_notifications.on_success[*]": {"id"}, + "environments.*.resources.model_serving_endpoints.*": {"name"}, + "environments.*.resources.model_serving_endpoints.*.ai_gateway.fallback_config": {"enabled"}, + "environments.*.resources.model_serving_endpoints.*.ai_gateway.rate_limits[*]": {"calls", "renewal_period"}, + "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model": {"name", "provider", "task"}, + "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.amazon_bedrock_config": {"aws_region", "bedrock_provider"}, + "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config": {"custom_provider_url"}, + "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config.api_key_auth": {"key"}, + "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.databricks_model_serving_config": {"databricks_workspace_url"}, + "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.google_cloud_vertex_ai_config": {"project_id", "region"}, + "environments.*.resources.model_serving_endpoints.*.config.served_models[*]": {"model_name", "model_version", "scale_to_zero_enabled"}, + "environments.*.resources.model_serving_endpoints.*.config.traffic_config.routes[*]": {"served_model_name", "traffic_percentage"}, + "environments.*.resources.model_serving_endpoints.*.permissions[*]": {"level"}, + "environments.*.resources.model_serving_endpoints.*.rate_limits[*]": {"calls", "renewal_period"}, + "environments.*.resources.model_serving_endpoints.*.tags[*]": {"key"}, + "environments.*.resources.models.*": {"name"}, + "environments.*.resources.models.*.permissions[*]": {"level"}, + "environments.*.resources.pipelines.*.clusters[*].autoscale": {"max_workers", "min_workers"}, + "environments.*.resources.pipelines.*.clusters[*].cluster_log_conf.dbfs": {"destination"}, + "environments.*.resources.pipelines.*.clusters[*].cluster_log_conf.s3": {"destination"}, + "environments.*.resources.pipelines.*.clusters[*].cluster_log_conf.volumes": {"destination"}, + "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].abfss": {"destination"}, + "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].dbfs": {"destination"}, + "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].file": {"destination"}, + "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].gcs": {"destination"}, + "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].s3": {"destination"}, + "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].volumes": {"destination"}, + "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].workspace": {"destination"}, + "environments.*.resources.pipelines.*.deployment": {"kind"}, + "environments.*.resources.pipelines.*.gateway_definition": {"connection_name", "gateway_storage_catalog", "gateway_storage_schema"}, + "environments.*.resources.pipelines.*.ingestion_definition.objects[*].report": {"destination_catalog", "destination_schema", "source_url"}, + "environments.*.resources.pipelines.*.ingestion_definition.objects[*].schema": {"destination_catalog", "destination_schema", "source_schema"}, + "environments.*.resources.pipelines.*.ingestion_definition.objects[*].table": {"destination_catalog", "destination_schema", "source_table"}, + "environments.*.resources.pipelines.*.libraries[*].maven": {"coordinates"}, + "environments.*.resources.pipelines.*.permissions[*]": {"level"}, + "environments.*.resources.pipelines.*.restart_window": {"start_hour"}, + "environments.*.resources.quality_monitors.*": {"table_name", "assets_dir", "output_schema_name"}, + "environments.*.resources.quality_monitors.*.custom_metrics[*]": {"definition", "input_columns", "name", "output_data_type", "type"}, + "environments.*.resources.quality_monitors.*.inference_log": {"granularities", "model_id_col", "prediction_col", "problem_type", "timestamp_col"}, + "environments.*.resources.quality_monitors.*.schedule": {"quartz_cron_expression", "timezone_id"}, + "environments.*.resources.quality_monitors.*.time_series": {"granularities", "timestamp_col"}, + "environments.*.resources.registered_models.*": {"catalog_name", "name", "schema_name"}, + "environments.*.resources.registered_models.*.grants[*]": {"privileges", "principal"}, + "environments.*.resources.schemas.*": {"catalog_name", "name"}, + "environments.*.resources.schemas.*.grants[*]": {"privileges", "principal"}, + "environments.*.resources.secret_scopes.*": {"name"}, + "environments.*.resources.secret_scopes.*.keyvault_metadata": {"dns_name", "resource_id"}, + "environments.*.resources.secret_scopes.*.permissions[*]": {"level"}, + "environments.*.resources.volumes.*": {"catalog_name", "name", "schema_name", "volume_type"}, + "environments.*.resources.volumes.*.grants[*]": {"privileges", "principal"}, + + "permissions[*]": {"level"}, + + "resources.apps.*": {"source_code_path", "name"}, + "resources.apps.*.permissions[*]": {"level"}, + "resources.apps.*.resources[*]": {"name"}, + "resources.apps.*.resources[*].job": {"id", "permission"}, + "resources.apps.*.resources[*].secret": {"key", "permission", "scope"}, + "resources.apps.*.resources[*].serving_endpoint": {"name", "permission"}, + "resources.apps.*.resources[*].sql_warehouse": {"id", "permission"}, + "resources.apps.*.resources[*].uc_securable": {"permission", "securable_full_name", "securable_type"}, + + "resources.clusters.*.cluster_log_conf.dbfs": {"destination"}, + "resources.clusters.*.cluster_log_conf.s3": {"destination"}, + "resources.clusters.*.cluster_log_conf.volumes": {"destination"}, + "resources.clusters.*.init_scripts[*].abfss": {"destination"}, + "resources.clusters.*.init_scripts[*].dbfs": {"destination"}, + "resources.clusters.*.init_scripts[*].file": {"destination"}, + "resources.clusters.*.init_scripts[*].gcs": {"destination"}, + "resources.clusters.*.init_scripts[*].s3": {"destination"}, + "resources.clusters.*.init_scripts[*].volumes": {"destination"}, + "resources.clusters.*.init_scripts[*].workspace": {"destination"}, + "resources.clusters.*.permissions[*]": {"level"}, + "resources.clusters.*.workload_type": {"clients"}, + + "resources.dashboards.*.permissions[*]": {"level"}, + + "resources.experiments.*.permissions[*]": {"level"}, + + "resources.jobs.*.deployment": {"kind"}, + "resources.jobs.*.environments[*]": {"environment_key"}, + "resources.jobs.*.environments[*].spec": {"client"}, + "resources.jobs.*.git_source": {"git_provider", "git_url"}, + "resources.jobs.*.git_source.job_source": {"import_from_git_branch", "job_config_path"}, + "resources.jobs.*.health.rules[*]": {"metric", "op", "value"}, + "resources.jobs.*.job_clusters[*]": {"job_cluster_key", "new_cluster"}, + "resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.s3": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.volumes": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].abfss": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].dbfs": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].file": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].gcs": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].s3": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].volumes": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].workspace": {"destination"}, + "resources.jobs.*.job_clusters[*].new_cluster.workload_type": {"clients"}, + "resources.jobs.*.parameters[*]": {"default", "name"}, + "resources.jobs.*.permissions[*]": {"level"}, + "resources.jobs.*.queue": {"enabled"}, + "resources.jobs.*.schedule": {"quartz_cron_expression", "timezone_id"}, + "resources.jobs.*.tasks[*]": {"task_key"}, + "resources.jobs.*.tasks[*].clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, + "resources.jobs.*.tasks[*].condition_task": {"left", "op", "right"}, + "resources.jobs.*.tasks[*].dbt_task": {"commands"}, + "resources.jobs.*.tasks[*].depends_on[*]": {"task_key"}, + "resources.jobs.*.tasks[*].for_each_task": {"inputs", "task"}, + "resources.jobs.*.tasks[*].for_each_task.task": {"task_key"}, + "resources.jobs.*.tasks[*].for_each_task.task.clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, + "resources.jobs.*.tasks[*].for_each_task.task.condition_task": {"left", "op", "right"}, + "resources.jobs.*.tasks[*].for_each_task.task.dbt_task": {"commands"}, + "resources.jobs.*.tasks[*].for_each_task.task.depends_on[*]": {"task_key"}, + "resources.jobs.*.tasks[*].for_each_task.task.for_each_task": {"inputs", "task"}, + "resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task": {"dl_runtime_image"}, + "resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task.compute": {"num_gpus"}, + "resources.jobs.*.tasks[*].for_each_task.task.health.rules[*]": {"metric", "op", "value"}, + "resources.jobs.*.tasks[*].for_each_task.task.libraries[*].cran": {"package"}, + "resources.jobs.*.tasks[*].for_each_task.task.libraries[*].maven": {"coordinates"}, + "resources.jobs.*.tasks[*].for_each_task.task.libraries[*].pypi": {"package"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.dbfs": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.s3": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.volumes": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].abfss": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].dbfs": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].file": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].gcs": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].s3": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].volumes": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].workspace": {"destination"}, + "resources.jobs.*.tasks[*].for_each_task.task.new_cluster.workload_type": {"clients"}, + "resources.jobs.*.tasks[*].for_each_task.task.notebook_task": {"notebook_path"}, + "resources.jobs.*.tasks[*].for_each_task.task.pipeline_task": {"pipeline_id"}, + "resources.jobs.*.tasks[*].for_each_task.task.python_wheel_task": {"entry_point", "package_name"}, + "resources.jobs.*.tasks[*].for_each_task.task.run_job_task": {"job_id"}, + "resources.jobs.*.tasks[*].for_each_task.task.spark_python_task": {"python_file"}, + "resources.jobs.*.tasks[*].for_each_task.task.sql_task": {"warehouse_id"}, + "resources.jobs.*.tasks[*].for_each_task.task.sql_task.alert": {"alert_id"}, + "resources.jobs.*.tasks[*].for_each_task.task.sql_task.dashboard": {"dashboard_id"}, + "resources.jobs.*.tasks[*].for_each_task.task.sql_task.file": {"path"}, + "resources.jobs.*.tasks[*].for_each_task.task.sql_task.query": {"query_id"}, + "resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, + "resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_failure[*]": {"id"}, + "resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_start[*]": {"id"}, + "resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, + "resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_success[*]": {"id"}, + "resources.jobs.*.tasks[*].gen_ai_compute_task": {"dl_runtime_image"}, + "resources.jobs.*.tasks[*].gen_ai_compute_task.compute": {"num_gpus"}, + "resources.jobs.*.tasks[*].health.rules[*]": {"metric", "op", "value"}, + "resources.jobs.*.tasks[*].libraries[*].cran": {"package"}, + "resources.jobs.*.tasks[*].libraries[*].maven": {"coordinates"}, + "resources.jobs.*.tasks[*].libraries[*].pypi": {"package"}, + "resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.s3": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.volumes": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.init_scripts[*].abfss": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.init_scripts[*].dbfs": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.init_scripts[*].file": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.init_scripts[*].gcs": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.init_scripts[*].s3": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.init_scripts[*].volumes": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.init_scripts[*].workspace": {"destination"}, + "resources.jobs.*.tasks[*].new_cluster.workload_type": {"clients"}, + "resources.jobs.*.tasks[*].notebook_task": {"notebook_path"}, + "resources.jobs.*.tasks[*].pipeline_task": {"pipeline_id"}, + "resources.jobs.*.tasks[*].python_wheel_task": {"entry_point", "package_name"}, + "resources.jobs.*.tasks[*].run_job_task": {"job_id"}, + "resources.jobs.*.tasks[*].spark_python_task": {"python_file"}, + "resources.jobs.*.tasks[*].sql_task": {"warehouse_id"}, + "resources.jobs.*.tasks[*].sql_task.alert": {"alert_id"}, + "resources.jobs.*.tasks[*].sql_task.dashboard": {"dashboard_id"}, + "resources.jobs.*.tasks[*].sql_task.file": {"path"}, + "resources.jobs.*.tasks[*].sql_task.query": {"query_id"}, + "resources.jobs.*.tasks[*].webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, + "resources.jobs.*.tasks[*].webhook_notifications.on_failure[*]": {"id"}, + "resources.jobs.*.tasks[*].webhook_notifications.on_start[*]": {"id"}, + "resources.jobs.*.tasks[*].webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, + "resources.jobs.*.tasks[*].webhook_notifications.on_success[*]": {"id"}, + "resources.jobs.*.trigger.file_arrival": {"url"}, + "resources.jobs.*.trigger.periodic": {"interval", "unit"}, + "resources.jobs.*.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, + "resources.jobs.*.webhook_notifications.on_failure[*]": {"id"}, + "resources.jobs.*.webhook_notifications.on_start[*]": {"id"}, + "resources.jobs.*.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, + "resources.jobs.*.webhook_notifications.on_success[*]": {"id"}, + + "resources.model_serving_endpoints.*": {"name"}, + "resources.model_serving_endpoints.*.ai_gateway.fallback_config": {"enabled"}, + "resources.model_serving_endpoints.*.ai_gateway.rate_limits[*]": {"calls", "renewal_period"}, + "resources.model_serving_endpoints.*.config.served_entities[*].external_model": {"name", "provider", "task"}, + "resources.model_serving_endpoints.*.config.served_entities[*].external_model.amazon_bedrock_config": {"aws_region", "bedrock_provider"}, + "resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config": {"custom_provider_url"}, + "resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config.api_key_auth": {"key"}, + "resources.model_serving_endpoints.*.config.served_entities[*].external_model.databricks_model_serving_config": {"databricks_workspace_url"}, + "resources.model_serving_endpoints.*.config.served_entities[*].external_model.google_cloud_vertex_ai_config": {"project_id", "region"}, + "resources.model_serving_endpoints.*.config.served_models[*]": {"model_name", "model_version", "scale_to_zero_enabled"}, + "resources.model_serving_endpoints.*.config.traffic_config.routes[*]": {"served_model_name", "traffic_percentage"}, + "resources.model_serving_endpoints.*.permissions[*]": {"level"}, + "resources.model_serving_endpoints.*.rate_limits[*]": {"calls", "renewal_period"}, + "resources.model_serving_endpoints.*.tags[*]": {"key"}, + + "resources.models.*": {"name"}, + "resources.models.*.permissions[*]": {"level"}, + + "resources.pipelines.*.clusters[*].autoscale": {"max_workers", "min_workers"}, + "resources.pipelines.*.clusters[*].cluster_log_conf.dbfs": {"destination"}, + "resources.pipelines.*.clusters[*].cluster_log_conf.s3": {"destination"}, + "resources.pipelines.*.clusters[*].cluster_log_conf.volumes": {"destination"}, + "resources.pipelines.*.clusters[*].init_scripts[*].abfss": {"destination"}, + "resources.pipelines.*.clusters[*].init_scripts[*].dbfs": {"destination"}, + "resources.pipelines.*.clusters[*].init_scripts[*].file": {"destination"}, + "resources.pipelines.*.clusters[*].init_scripts[*].gcs": {"destination"}, + "resources.pipelines.*.clusters[*].init_scripts[*].s3": {"destination"}, + "resources.pipelines.*.clusters[*].init_scripts[*].volumes": {"destination"}, + "resources.pipelines.*.clusters[*].init_scripts[*].workspace": {"destination"}, + "resources.pipelines.*.deployment": {"kind"}, + "resources.pipelines.*.gateway_definition": {"connection_name", "gateway_storage_catalog", "gateway_storage_schema"}, + "resources.pipelines.*.ingestion_definition.objects[*].report": {"destination_catalog", "destination_schema", "source_url"}, + "resources.pipelines.*.ingestion_definition.objects[*].schema": {"destination_catalog", "destination_schema", "source_schema"}, + "resources.pipelines.*.ingestion_definition.objects[*].table": {"destination_catalog", "destination_schema", "source_table"}, + "resources.pipelines.*.libraries[*].maven": {"coordinates"}, + "resources.pipelines.*.permissions[*]": {"level"}, + "resources.pipelines.*.restart_window": {"start_hour"}, + + "resources.quality_monitors.*": {"table_name", "assets_dir", "output_schema_name"}, + "resources.quality_monitors.*.custom_metrics[*]": {"definition", "input_columns", "name", "output_data_type", "type"}, + "resources.quality_monitors.*.inference_log": {"granularities", "model_id_col", "prediction_col", "problem_type", "timestamp_col"}, + "resources.quality_monitors.*.schedule": {"quartz_cron_expression", "timezone_id"}, + "resources.quality_monitors.*.time_series": {"granularities", "timestamp_col"}, + + "resources.registered_models.*": {"catalog_name", "name", "schema_name"}, + "resources.registered_models.*.grants[*]": {"privileges", "principal"}, + + "resources.schemas.*": {"catalog_name", "name"}, + "resources.schemas.*.grants[*]": {"privileges", "principal"}, + + "resources.secret_scopes.*": {"name"}, + "resources.secret_scopes.*.keyvault_metadata": {"dns_name", "resource_id"}, + "resources.secret_scopes.*.permissions[*]": {"level"}, + + "resources.volumes.*": {"catalog_name", "name", "schema_name", "volume_type"}, + "resources.volumes.*.grants[*]": {"privileges", "principal"}, + + "targets.*.artifacts.*.files[*]": {"source"}, + + "targets.*.bundle": {"name"}, + + "targets.*.permissions[*]": {"level"}, + + "targets.*.resources.apps.*": {"source_code_path", "name"}, + "targets.*.resources.apps.*.permissions[*]": {"level"}, + "targets.*.resources.apps.*.resources[*]": {"name"}, + "targets.*.resources.apps.*.resources[*].job": {"id", "permission"}, + "targets.*.resources.apps.*.resources[*].secret": {"key", "permission", "scope"}, + "targets.*.resources.apps.*.resources[*].serving_endpoint": {"name", "permission"}, + "targets.*.resources.apps.*.resources[*].sql_warehouse": {"id", "permission"}, + "targets.*.resources.apps.*.resources[*].uc_securable": {"permission", "securable_full_name", "securable_type"}, + "targets.*.resources.clusters.*.cluster_log_conf.dbfs": {"destination"}, + "targets.*.resources.clusters.*.cluster_log_conf.s3": {"destination"}, + "targets.*.resources.clusters.*.cluster_log_conf.volumes": {"destination"}, + "targets.*.resources.clusters.*.init_scripts[*].abfss": {"destination"}, + "targets.*.resources.clusters.*.init_scripts[*].dbfs": {"destination"}, + "targets.*.resources.clusters.*.init_scripts[*].file": {"destination"}, + "targets.*.resources.clusters.*.init_scripts[*].gcs": {"destination"}, + "targets.*.resources.clusters.*.init_scripts[*].s3": {"destination"}, + "targets.*.resources.clusters.*.init_scripts[*].volumes": {"destination"}, + "targets.*.resources.clusters.*.init_scripts[*].workspace": {"destination"}, + "targets.*.resources.clusters.*.permissions[*]": {"level"}, + "targets.*.resources.clusters.*.workload_type": {"clients"}, + "targets.*.resources.dashboards.*.permissions[*]": {"level"}, + "targets.*.resources.experiments.*.permissions[*]": {"level"}, + "targets.*.resources.jobs.*.deployment": {"kind"}, + "targets.*.resources.jobs.*.environments[*]": {"environment_key"}, + "targets.*.resources.jobs.*.environments[*].spec": {"client"}, + "targets.*.resources.jobs.*.git_source": {"git_provider", "git_url"}, + "targets.*.resources.jobs.*.git_source.job_source": {"import_from_git_branch", "job_config_path"}, + "targets.*.resources.jobs.*.health.rules[*]": {"metric", "op", "value"}, + "targets.*.resources.jobs.*.job_clusters[*]": {"job_cluster_key", "new_cluster"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.s3": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.volumes": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].abfss": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].dbfs": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].file": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].gcs": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].s3": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].volumes": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].workspace": {"destination"}, + "targets.*.resources.jobs.*.job_clusters[*].new_cluster.workload_type": {"clients"}, + "targets.*.resources.jobs.*.parameters[*]": {"default", "name"}, + "targets.*.resources.jobs.*.permissions[*]": {"level"}, + "targets.*.resources.jobs.*.queue": {"enabled"}, + "targets.*.resources.jobs.*.schedule": {"quartz_cron_expression", "timezone_id"}, + "targets.*.resources.jobs.*.tasks[*]": {"task_key"}, + "targets.*.resources.jobs.*.tasks[*].clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, + "targets.*.resources.jobs.*.tasks[*].condition_task": {"left", "op", "right"}, + "targets.*.resources.jobs.*.tasks[*].dbt_task": {"commands"}, + "targets.*.resources.jobs.*.tasks[*].depends_on[*]": {"task_key"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task": {"inputs", "task"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task": {"task_key"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.condition_task": {"left", "op", "right"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.dbt_task": {"commands"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.depends_on[*]": {"task_key"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.for_each_task": {"inputs", "task"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task": {"dl_runtime_image"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task.compute": {"num_gpus"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.health.rules[*]": {"metric", "op", "value"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].cran": {"package"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].maven": {"coordinates"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].pypi": {"package"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.dbfs": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.s3": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.volumes": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].abfss": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].dbfs": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].file": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].gcs": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].s3": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].volumes": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].workspace": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.workload_type": {"clients"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.notebook_task": {"notebook_path"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.pipeline_task": {"pipeline_id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.python_wheel_task": {"entry_point", "package_name"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.run_job_task": {"job_id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.spark_python_task": {"python_file"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task": {"warehouse_id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.alert": {"alert_id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.dashboard": {"dashboard_id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.file": {"path"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.query": {"query_id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_failure[*]": {"id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_start[*]": {"id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, + "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_success[*]": {"id"}, + "targets.*.resources.jobs.*.tasks[*].gen_ai_compute_task": {"dl_runtime_image"}, + "targets.*.resources.jobs.*.tasks[*].gen_ai_compute_task.compute": {"num_gpus"}, + "targets.*.resources.jobs.*.tasks[*].health.rules[*]": {"metric", "op", "value"}, + "targets.*.resources.jobs.*.tasks[*].libraries[*].cran": {"package"}, + "targets.*.resources.jobs.*.tasks[*].libraries[*].maven": {"coordinates"}, + "targets.*.resources.jobs.*.tasks[*].libraries[*].pypi": {"package"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.s3": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.volumes": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].abfss": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].dbfs": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].file": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].gcs": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].s3": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].volumes": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].workspace": {"destination"}, + "targets.*.resources.jobs.*.tasks[*].new_cluster.workload_type": {"clients"}, + "targets.*.resources.jobs.*.tasks[*].notebook_task": {"notebook_path"}, + "targets.*.resources.jobs.*.tasks[*].pipeline_task": {"pipeline_id"}, + "targets.*.resources.jobs.*.tasks[*].python_wheel_task": {"entry_point", "package_name"}, + "targets.*.resources.jobs.*.tasks[*].run_job_task": {"job_id"}, + "targets.*.resources.jobs.*.tasks[*].spark_python_task": {"python_file"}, + "targets.*.resources.jobs.*.tasks[*].sql_task": {"warehouse_id"}, + "targets.*.resources.jobs.*.tasks[*].sql_task.alert": {"alert_id"}, + "targets.*.resources.jobs.*.tasks[*].sql_task.dashboard": {"dashboard_id"}, + "targets.*.resources.jobs.*.tasks[*].sql_task.file": {"path"}, + "targets.*.resources.jobs.*.tasks[*].sql_task.query": {"query_id"}, + "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, + "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_failure[*]": {"id"}, + "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_start[*]": {"id"}, + "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, + "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_success[*]": {"id"}, + "targets.*.resources.jobs.*.trigger.file_arrival": {"url"}, + "targets.*.resources.jobs.*.trigger.periodic": {"interval", "unit"}, + "targets.*.resources.jobs.*.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, + "targets.*.resources.jobs.*.webhook_notifications.on_failure[*]": {"id"}, + "targets.*.resources.jobs.*.webhook_notifications.on_start[*]": {"id"}, + "targets.*.resources.jobs.*.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, + "targets.*.resources.jobs.*.webhook_notifications.on_success[*]": {"id"}, + "targets.*.resources.model_serving_endpoints.*": {"name"}, + "targets.*.resources.model_serving_endpoints.*.ai_gateway.fallback_config": {"enabled"}, + "targets.*.resources.model_serving_endpoints.*.ai_gateway.rate_limits[*]": {"calls", "renewal_period"}, + "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model": {"name", "provider", "task"}, + "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.amazon_bedrock_config": {"aws_region", "bedrock_provider"}, + "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config": {"custom_provider_url"}, + "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config.api_key_auth": {"key"}, + "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.databricks_model_serving_config": {"databricks_workspace_url"}, + "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.google_cloud_vertex_ai_config": {"project_id", "region"}, + "targets.*.resources.model_serving_endpoints.*.config.served_models[*]": {"model_name", "model_version", "scale_to_zero_enabled"}, + "targets.*.resources.model_serving_endpoints.*.config.traffic_config.routes[*]": {"served_model_name", "traffic_percentage"}, + "targets.*.resources.model_serving_endpoints.*.permissions[*]": {"level"}, + "targets.*.resources.model_serving_endpoints.*.rate_limits[*]": {"calls", "renewal_period"}, + "targets.*.resources.model_serving_endpoints.*.tags[*]": {"key"}, + "targets.*.resources.models.*": {"name"}, + "targets.*.resources.models.*.permissions[*]": {"level"}, + "targets.*.resources.pipelines.*.clusters[*].autoscale": {"max_workers", "min_workers"}, + "targets.*.resources.pipelines.*.clusters[*].cluster_log_conf.dbfs": {"destination"}, + "targets.*.resources.pipelines.*.clusters[*].cluster_log_conf.s3": {"destination"}, + "targets.*.resources.pipelines.*.clusters[*].cluster_log_conf.volumes": {"destination"}, + "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].abfss": {"destination"}, + "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].dbfs": {"destination"}, + "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].file": {"destination"}, + "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].gcs": {"destination"}, + "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].s3": {"destination"}, + "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].volumes": {"destination"}, + "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].workspace": {"destination"}, + "targets.*.resources.pipelines.*.deployment": {"kind"}, + "targets.*.resources.pipelines.*.gateway_definition": {"connection_name", "gateway_storage_catalog", "gateway_storage_schema"}, + "targets.*.resources.pipelines.*.ingestion_definition.objects[*].report": {"destination_catalog", "destination_schema", "source_url"}, + "targets.*.resources.pipelines.*.ingestion_definition.objects[*].schema": {"destination_catalog", "destination_schema", "source_schema"}, + "targets.*.resources.pipelines.*.ingestion_definition.objects[*].table": {"destination_catalog", "destination_schema", "source_table"}, + "targets.*.resources.pipelines.*.libraries[*].maven": {"coordinates"}, + "targets.*.resources.pipelines.*.permissions[*]": {"level"}, + "targets.*.resources.pipelines.*.restart_window": {"start_hour"}, + "targets.*.resources.quality_monitors.*": {"table_name", "assets_dir", "output_schema_name"}, + "targets.*.resources.quality_monitors.*.custom_metrics[*]": {"definition", "input_columns", "name", "output_data_type", "type"}, + "targets.*.resources.quality_monitors.*.inference_log": {"granularities", "model_id_col", "prediction_col", "problem_type", "timestamp_col"}, + "targets.*.resources.quality_monitors.*.schedule": {"quartz_cron_expression", "timezone_id"}, + "targets.*.resources.quality_monitors.*.time_series": {"granularities", "timestamp_col"}, + "targets.*.resources.registered_models.*": {"catalog_name", "name", "schema_name"}, + "targets.*.resources.registered_models.*.grants[*]": {"privileges", "principal"}, + "targets.*.resources.schemas.*": {"catalog_name", "name"}, + "targets.*.resources.schemas.*.grants[*]": {"privileges", "principal"}, + "targets.*.resources.secret_scopes.*": {"name"}, + "targets.*.resources.secret_scopes.*.keyvault_metadata": {"dns_name", "resource_id"}, + "targets.*.resources.secret_scopes.*.permissions[*]": {"level"}, + "targets.*.resources.volumes.*": {"catalog_name", "name", "schema_name", "volume_type"}, + "targets.*.resources.volumes.*.grants[*]": {"privileges", "principal"}, +} diff --git a/bundle/internal/validation/main.go b/bundle/internal/validation/main.go new file mode 100644 index 0000000000..8f5d64a393 --- /dev/null +++ b/bundle/internal/validation/main.go @@ -0,0 +1,13 @@ +package main + +import ( + "log" +) + +// This package is meant to be run from the root of the CLI repo. +func main() { + err := generateRequiredFields("bundle/internal/validation/generated") + if err != nil { + log.Fatalf("Error generating code: %v", err) + } +} diff --git a/bundle/internal/validation/required.go b/bundle/internal/validation/required.go new file mode 100644 index 0000000000..8163432bcc --- /dev/null +++ b/bundle/internal/validation/required.go @@ -0,0 +1,204 @@ +package main + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "reflect" + "sort" + "strings" + "text/template" + + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/structdiff/structpath" + "github.com/databricks/cli/libs/structwalk" +) + +// PatternInfo represents validation requirements for a specific configuration pattern +type PatternInfo struct { + // The pattern for which the fields in Required are applicable. + // This is a string representation of [dyn.Parent]. + Parent string + + // List of required fields that should be set for every path in the + // config tree that matches the pattern. This field be a string of the + // form `{field1, field2, ...}`. + RequiredFields string +} + +// formatRequiredFields formats a list of field names into a Go slice literal string +func formatRequiredFields(fields []string) string { + if len(fields) == 0 { + return "{}" + } + + var quoted []string + for _, field := range fields { + quoted = append(quoted, fmt.Sprintf("%q", field)) + } + + return "{" + strings.Join(quoted, ", ") + "}" +} + +// extractRequiredFields walks through a struct type and extracts required field patterns +func extractRequiredFields(typ reflect.Type) []PatternInfo { + fieldsByPattern := make(map[string][]string) + + structwalk.WalkType(typ, func(path *structpath.PathNode, _ reflect.Type) bool { + if path == nil { + return true + } + + // Do not perform required validation on fields that are deprecated, internal, or readonly. + bundleTag := path.BundleTag() + if bundleTag.Internal() || bundleTag.ReadOnly() { + return false + } + + // The "omitempty" tag indicates the field is optional in bundle config. + if path.JSONTag().OmitEmpty() { + return true + } + + // Only perform required validation for struct fields. + field, ok := path.Field() + if !ok { + return true + } + + parentPath := path.Parent().DynPath() + fieldsByPattern[parentPath] = append(fieldsByPattern[parentPath], field) + return true + }) + + return buildPatternInfos(fieldsByPattern) +} + +// buildPatternInfos converts the field map to PatternInfo slice +func buildPatternInfos(fieldsByPattern map[string][]string) []PatternInfo { + patterns := make([]PatternInfo, 0, len(fieldsByPattern)) + + for parentPath, fields := range fieldsByPattern { + patterns = append(patterns, PatternInfo{ + Parent: parentPath, + RequiredFields: formatRequiredFields(fields), + }) + } + + return patterns +} + +// getGroupingKey determines the grouping key for organizing patterns +func getGroupingKey(parentPath string) string { + parts := strings.Split(parentPath, ".") + + switch parts[0] { + case "resources": + // Group resources by their type (e.g., "resources.jobs") + if len(parts) > 1 { + return parts[0] + "." + parts[1] + } + case "targets": + // Group target overrides by their first 3 keys + if len(parts) > 2 { + return strings.Join(parts[:3], ".") + } + } + + // Use the top level key for other fields + return parts[0] +} + +// groupPatternsByKey groups patterns by their logical grouping key +func groupPatternsByKey(patterns []PatternInfo) map[string][]PatternInfo { + groupedPatterns := make(map[string][]PatternInfo) + + for _, pattern := range patterns { + key := getGroupingKey(pattern.Parent) + groupedPatterns[key] = append(groupedPatterns[key], pattern) + } + + return groupedPatterns +} + +// sortGroupedPatterns sorts patterns within each group and returns them as a sorted slice +func sortGroupedPatterns(groupedPatterns map[string][]PatternInfo) [][]PatternInfo { + // Get sorted group keys + groupKeys := make([]string, 0, len(groupedPatterns)) + for key := range groupedPatterns { + groupKeys = append(groupKeys, key) + } + sort.Strings(groupKeys) + + // Build sorted result + result := make([][]PatternInfo, 0, len(groupKeys)) + for _, key := range groupKeys { + patterns := groupedPatterns[key] + + // Sort patterns within each group by parent path + sort.Slice(patterns, func(i, j int) bool { + return patterns[i].Parent < patterns[j].Parent + }) + + result = append(result, patterns) + } + + return result +} + +// RequiredFields returns grouped required field patterns for validation +func requiredFields() [][]PatternInfo { + patterns := extractRequiredFields(reflect.TypeOf(config.Root{})) + groupedPatterns := groupPatternsByKey(patterns) + return sortGroupedPatterns(groupedPatterns) +} + +// Generate creates a Go source file with required field validation rules +func generateRequiredFields(outPath string) error { + requiredFields := requiredFields() + + // Ensure output directory exists + if err := os.MkdirAll(outPath, 0o755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + // Parse and execute template + tmpl, err := template.New("validation").Parse(validationTemplate) + if err != nil { + return fmt.Errorf("failed to parse template: %w", err) + } + + var generatedCode bytes.Buffer + if err := tmpl.Execute(&generatedCode, requiredFields); err != nil { + return fmt.Errorf("failed to execute template: %w", err) + } + + // Write generated code to file + filePath := filepath.Join(outPath, "required_fields.go") + if err := os.WriteFile(filePath, generatedCode.Bytes(), 0o644); err != nil { + return fmt.Errorf("failed to write generated code: %w", err) + } + + return nil +} + +// validationTemplate is the Go text template for generating the validation map +const validationTemplate = `package generated + +// THIS FILE IS AUTOGENERATED. +// DO NOT EDIT THIS FILE DIRECTLY. + +import ( + _ "github.com/databricks/cli/libs/dyn" +) + +// RequiredFields maps [dyn.Pattern] to required fields they should have. +var RequiredFields = map[string][]string{ +{{- range . }} +{{ range . }} + "{{ .Parent }}": {{ .RequiredFields }}, +{{- end }} +{{- end }} +} +` diff --git a/libs/structdiff/structpath/path.go b/libs/structdiff/structpath/path.go index 29f3b59577..07c06d256a 100644 --- a/libs/structdiff/structpath/path.go +++ b/libs/structdiff/structpath/path.go @@ -97,6 +97,13 @@ func (p *PathNode) Field() (string, bool) { return "", false } +func (p *PathNode) Parent() *PathNode { + if p == nil { + return nil + } + return p.prev +} + // NewIndex creates a new PathNode for an array/slice index. func NewIndex(prev *PathNode, index int) *PathNode { if index < 0 { From 4c327c55828d1bf561634087a88fc0319c73eaef Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 11 Jun 2025 19:31:34 +0200 Subject: [PATCH 02/30] some cleanup --- bundle/internal/validation/required.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/bundle/internal/validation/required.go b/bundle/internal/validation/required.go index 8163432bcc..f8879347b8 100644 --- a/bundle/internal/validation/required.go +++ b/bundle/internal/validation/required.go @@ -18,16 +18,17 @@ import ( // PatternInfo represents validation requirements for a specific configuration pattern type PatternInfo struct { // The pattern for which the fields in Required are applicable. - // This is a string representation of [dyn.Parent]. + // This is a string representation of [dyn.Pattern]. Parent string // List of required fields that should be set for every path in the - // config tree that matches the pattern. This field be a string of the + // config tree that matches the pattern. This field will be a string of the // form `{field1, field2, ...}`. RequiredFields string } -// formatRequiredFields formats a list of field names into a Go slice literal string +// formatRequiredFields formats a list of field names into string of the form `{field1, field2, ...}` +// representing a Go slice literal. func formatRequiredFields(fields []string) string { if len(fields) == 0 { return "{}" @@ -50,7 +51,7 @@ func extractRequiredFields(typ reflect.Type) []PatternInfo { return true } - // Do not perform required validation on fields that are deprecated, internal, or readonly. + // Do not generate required validation code for fields that are internal or readonly. bundleTag := path.BundleTag() if bundleTag.Internal() || bundleTag.ReadOnly() { return false From f612f5a90e804b7a915022e14a1060b3d7f8949a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 11:11:21 +0200 Subject: [PATCH 03/30] some cleanup --- .../validation/generated/required_fields.go | 364 ------------------ bundle/internal/validation/required.go | 52 +-- 2 files changed, 30 insertions(+), 386 deletions(-) diff --git a/bundle/internal/validation/generated/required_fields.go b/bundle/internal/validation/generated/required_fields.go index a4ce44ab77..58f3dec056 100644 --- a/bundle/internal/validation/generated/required_fields.go +++ b/bundle/internal/validation/generated/required_fields.go @@ -9,191 +9,10 @@ import ( // RequiredFields maps [dyn.Pattern] to required fields they should have. var RequiredFields = map[string][]string{ - "artifacts.*.files[*]": {"source"}, "bundle": {"name"}, - "environments.*.artifacts.*.files[*]": {"source"}, - "environments.*.bundle": {"name"}, - "environments.*.permissions[*]": {"level"}, - "environments.*.resources.apps.*": {"source_code_path", "name"}, - "environments.*.resources.apps.*.permissions[*]": {"level"}, - "environments.*.resources.apps.*.resources[*]": {"name"}, - "environments.*.resources.apps.*.resources[*].job": {"id", "permission"}, - "environments.*.resources.apps.*.resources[*].secret": {"key", "permission", "scope"}, - "environments.*.resources.apps.*.resources[*].serving_endpoint": {"name", "permission"}, - "environments.*.resources.apps.*.resources[*].sql_warehouse": {"id", "permission"}, - "environments.*.resources.apps.*.resources[*].uc_securable": {"permission", "securable_full_name", "securable_type"}, - "environments.*.resources.clusters.*.cluster_log_conf.dbfs": {"destination"}, - "environments.*.resources.clusters.*.cluster_log_conf.s3": {"destination"}, - "environments.*.resources.clusters.*.cluster_log_conf.volumes": {"destination"}, - "environments.*.resources.clusters.*.init_scripts[*].abfss": {"destination"}, - "environments.*.resources.clusters.*.init_scripts[*].dbfs": {"destination"}, - "environments.*.resources.clusters.*.init_scripts[*].file": {"destination"}, - "environments.*.resources.clusters.*.init_scripts[*].gcs": {"destination"}, - "environments.*.resources.clusters.*.init_scripts[*].s3": {"destination"}, - "environments.*.resources.clusters.*.init_scripts[*].volumes": {"destination"}, - "environments.*.resources.clusters.*.init_scripts[*].workspace": {"destination"}, - "environments.*.resources.clusters.*.permissions[*]": {"level"}, - "environments.*.resources.clusters.*.workload_type": {"clients"}, - "environments.*.resources.dashboards.*.permissions[*]": {"level"}, - "environments.*.resources.experiments.*.permissions[*]": {"level"}, - "environments.*.resources.jobs.*.deployment": {"kind"}, - "environments.*.resources.jobs.*.environments[*]": {"environment_key"}, - "environments.*.resources.jobs.*.environments[*].spec": {"client"}, - "environments.*.resources.jobs.*.git_source": {"git_provider", "git_url"}, - "environments.*.resources.jobs.*.git_source.job_source": {"import_from_git_branch", "job_config_path"}, - "environments.*.resources.jobs.*.health.rules[*]": {"metric", "op", "value"}, - "environments.*.resources.jobs.*.job_clusters[*]": {"job_cluster_key", "new_cluster"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.s3": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.volumes": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].abfss": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].dbfs": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].file": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].gcs": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].s3": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].volumes": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].workspace": {"destination"}, - "environments.*.resources.jobs.*.job_clusters[*].new_cluster.workload_type": {"clients"}, - "environments.*.resources.jobs.*.parameters[*]": {"default", "name"}, - "environments.*.resources.jobs.*.permissions[*]": {"level"}, - "environments.*.resources.jobs.*.queue": {"enabled"}, - "environments.*.resources.jobs.*.schedule": {"quartz_cron_expression", "timezone_id"}, - "environments.*.resources.jobs.*.tasks[*]": {"task_key"}, - "environments.*.resources.jobs.*.tasks[*].clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, - "environments.*.resources.jobs.*.tasks[*].condition_task": {"left", "op", "right"}, - "environments.*.resources.jobs.*.tasks[*].dbt_task": {"commands"}, - "environments.*.resources.jobs.*.tasks[*].depends_on[*]": {"task_key"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task": {"inputs", "task"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task": {"task_key"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.condition_task": {"left", "op", "right"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.dbt_task": {"commands"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.depends_on[*]": {"task_key"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.for_each_task": {"inputs", "task"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task": {"dl_runtime_image"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task.compute": {"num_gpus"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.health.rules[*]": {"metric", "op", "value"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].cran": {"package"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].maven": {"coordinates"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].pypi": {"package"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.dbfs": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.s3": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.volumes": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].abfss": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].dbfs": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].file": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].gcs": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].s3": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].volumes": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].workspace": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.workload_type": {"clients"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.notebook_task": {"notebook_path"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.pipeline_task": {"pipeline_id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.python_wheel_task": {"entry_point", "package_name"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.run_job_task": {"job_id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.spark_python_task": {"python_file"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task": {"warehouse_id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.alert": {"alert_id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.dashboard": {"dashboard_id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.file": {"path"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.query": {"query_id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_failure[*]": {"id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_start[*]": {"id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, - "environments.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_success[*]": {"id"}, - "environments.*.resources.jobs.*.tasks[*].gen_ai_compute_task": {"dl_runtime_image"}, - "environments.*.resources.jobs.*.tasks[*].gen_ai_compute_task.compute": {"num_gpus"}, - "environments.*.resources.jobs.*.tasks[*].health.rules[*]": {"metric", "op", "value"}, - "environments.*.resources.jobs.*.tasks[*].libraries[*].cran": {"package"}, - "environments.*.resources.jobs.*.tasks[*].libraries[*].maven": {"coordinates"}, - "environments.*.resources.jobs.*.tasks[*].libraries[*].pypi": {"package"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.s3": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.volumes": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].abfss": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].dbfs": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].file": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].gcs": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].s3": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].volumes": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].workspace": {"destination"}, - "environments.*.resources.jobs.*.tasks[*].new_cluster.workload_type": {"clients"}, - "environments.*.resources.jobs.*.tasks[*].notebook_task": {"notebook_path"}, - "environments.*.resources.jobs.*.tasks[*].pipeline_task": {"pipeline_id"}, - "environments.*.resources.jobs.*.tasks[*].python_wheel_task": {"entry_point", "package_name"}, - "environments.*.resources.jobs.*.tasks[*].run_job_task": {"job_id"}, - "environments.*.resources.jobs.*.tasks[*].spark_python_task": {"python_file"}, - "environments.*.resources.jobs.*.tasks[*].sql_task": {"warehouse_id"}, - "environments.*.resources.jobs.*.tasks[*].sql_task.alert": {"alert_id"}, - "environments.*.resources.jobs.*.tasks[*].sql_task.dashboard": {"dashboard_id"}, - "environments.*.resources.jobs.*.tasks[*].sql_task.file": {"path"}, - "environments.*.resources.jobs.*.tasks[*].sql_task.query": {"query_id"}, - "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, - "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_failure[*]": {"id"}, - "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_start[*]": {"id"}, - "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, - "environments.*.resources.jobs.*.tasks[*].webhook_notifications.on_success[*]": {"id"}, - "environments.*.resources.jobs.*.trigger.file_arrival": {"url"}, - "environments.*.resources.jobs.*.trigger.periodic": {"interval", "unit"}, - "environments.*.resources.jobs.*.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, - "environments.*.resources.jobs.*.webhook_notifications.on_failure[*]": {"id"}, - "environments.*.resources.jobs.*.webhook_notifications.on_start[*]": {"id"}, - "environments.*.resources.jobs.*.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, - "environments.*.resources.jobs.*.webhook_notifications.on_success[*]": {"id"}, - "environments.*.resources.model_serving_endpoints.*": {"name"}, - "environments.*.resources.model_serving_endpoints.*.ai_gateway.fallback_config": {"enabled"}, - "environments.*.resources.model_serving_endpoints.*.ai_gateway.rate_limits[*]": {"calls", "renewal_period"}, - "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model": {"name", "provider", "task"}, - "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.amazon_bedrock_config": {"aws_region", "bedrock_provider"}, - "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config": {"custom_provider_url"}, - "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config.api_key_auth": {"key"}, - "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.databricks_model_serving_config": {"databricks_workspace_url"}, - "environments.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.google_cloud_vertex_ai_config": {"project_id", "region"}, - "environments.*.resources.model_serving_endpoints.*.config.served_models[*]": {"model_name", "model_version", "scale_to_zero_enabled"}, - "environments.*.resources.model_serving_endpoints.*.config.traffic_config.routes[*]": {"served_model_name", "traffic_percentage"}, - "environments.*.resources.model_serving_endpoints.*.permissions[*]": {"level"}, - "environments.*.resources.model_serving_endpoints.*.rate_limits[*]": {"calls", "renewal_period"}, - "environments.*.resources.model_serving_endpoints.*.tags[*]": {"key"}, - "environments.*.resources.models.*": {"name"}, - "environments.*.resources.models.*.permissions[*]": {"level"}, - "environments.*.resources.pipelines.*.clusters[*].autoscale": {"max_workers", "min_workers"}, - "environments.*.resources.pipelines.*.clusters[*].cluster_log_conf.dbfs": {"destination"}, - "environments.*.resources.pipelines.*.clusters[*].cluster_log_conf.s3": {"destination"}, - "environments.*.resources.pipelines.*.clusters[*].cluster_log_conf.volumes": {"destination"}, - "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].abfss": {"destination"}, - "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].dbfs": {"destination"}, - "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].file": {"destination"}, - "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].gcs": {"destination"}, - "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].s3": {"destination"}, - "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].volumes": {"destination"}, - "environments.*.resources.pipelines.*.clusters[*].init_scripts[*].workspace": {"destination"}, - "environments.*.resources.pipelines.*.deployment": {"kind"}, - "environments.*.resources.pipelines.*.gateway_definition": {"connection_name", "gateway_storage_catalog", "gateway_storage_schema"}, - "environments.*.resources.pipelines.*.ingestion_definition.objects[*].report": {"destination_catalog", "destination_schema", "source_url"}, - "environments.*.resources.pipelines.*.ingestion_definition.objects[*].schema": {"destination_catalog", "destination_schema", "source_schema"}, - "environments.*.resources.pipelines.*.ingestion_definition.objects[*].table": {"destination_catalog", "destination_schema", "source_table"}, - "environments.*.resources.pipelines.*.libraries[*].maven": {"coordinates"}, - "environments.*.resources.pipelines.*.permissions[*]": {"level"}, - "environments.*.resources.pipelines.*.restart_window": {"start_hour"}, - "environments.*.resources.quality_monitors.*": {"table_name", "assets_dir", "output_schema_name"}, - "environments.*.resources.quality_monitors.*.custom_metrics[*]": {"definition", "input_columns", "name", "output_data_type", "type"}, - "environments.*.resources.quality_monitors.*.inference_log": {"granularities", "model_id_col", "prediction_col", "problem_type", "timestamp_col"}, - "environments.*.resources.quality_monitors.*.schedule": {"quartz_cron_expression", "timezone_id"}, - "environments.*.resources.quality_monitors.*.time_series": {"granularities", "timestamp_col"}, - "environments.*.resources.registered_models.*": {"catalog_name", "name", "schema_name"}, - "environments.*.resources.registered_models.*.grants[*]": {"privileges", "principal"}, - "environments.*.resources.schemas.*": {"catalog_name", "name"}, - "environments.*.resources.schemas.*.grants[*]": {"privileges", "principal"}, - "environments.*.resources.secret_scopes.*": {"name"}, - "environments.*.resources.secret_scopes.*.keyvault_metadata": {"dns_name", "resource_id"}, - "environments.*.resources.secret_scopes.*.permissions[*]": {"level"}, - "environments.*.resources.volumes.*": {"catalog_name", "name", "schema_name", "volume_type"}, - "environments.*.resources.volumes.*.grants[*]": {"privileges", "principal"}, - "permissions[*]": {"level"}, "resources.apps.*": {"source_code_path", "name"}, @@ -384,187 +203,4 @@ var RequiredFields = map[string][]string{ "resources.volumes.*": {"catalog_name", "name", "schema_name", "volume_type"}, "resources.volumes.*.grants[*]": {"privileges", "principal"}, - - "targets.*.artifacts.*.files[*]": {"source"}, - - "targets.*.bundle": {"name"}, - - "targets.*.permissions[*]": {"level"}, - - "targets.*.resources.apps.*": {"source_code_path", "name"}, - "targets.*.resources.apps.*.permissions[*]": {"level"}, - "targets.*.resources.apps.*.resources[*]": {"name"}, - "targets.*.resources.apps.*.resources[*].job": {"id", "permission"}, - "targets.*.resources.apps.*.resources[*].secret": {"key", "permission", "scope"}, - "targets.*.resources.apps.*.resources[*].serving_endpoint": {"name", "permission"}, - "targets.*.resources.apps.*.resources[*].sql_warehouse": {"id", "permission"}, - "targets.*.resources.apps.*.resources[*].uc_securable": {"permission", "securable_full_name", "securable_type"}, - "targets.*.resources.clusters.*.cluster_log_conf.dbfs": {"destination"}, - "targets.*.resources.clusters.*.cluster_log_conf.s3": {"destination"}, - "targets.*.resources.clusters.*.cluster_log_conf.volumes": {"destination"}, - "targets.*.resources.clusters.*.init_scripts[*].abfss": {"destination"}, - "targets.*.resources.clusters.*.init_scripts[*].dbfs": {"destination"}, - "targets.*.resources.clusters.*.init_scripts[*].file": {"destination"}, - "targets.*.resources.clusters.*.init_scripts[*].gcs": {"destination"}, - "targets.*.resources.clusters.*.init_scripts[*].s3": {"destination"}, - "targets.*.resources.clusters.*.init_scripts[*].volumes": {"destination"}, - "targets.*.resources.clusters.*.init_scripts[*].workspace": {"destination"}, - "targets.*.resources.clusters.*.permissions[*]": {"level"}, - "targets.*.resources.clusters.*.workload_type": {"clients"}, - "targets.*.resources.dashboards.*.permissions[*]": {"level"}, - "targets.*.resources.experiments.*.permissions[*]": {"level"}, - "targets.*.resources.jobs.*.deployment": {"kind"}, - "targets.*.resources.jobs.*.environments[*]": {"environment_key"}, - "targets.*.resources.jobs.*.environments[*].spec": {"client"}, - "targets.*.resources.jobs.*.git_source": {"git_provider", "git_url"}, - "targets.*.resources.jobs.*.git_source.job_source": {"import_from_git_branch", "job_config_path"}, - "targets.*.resources.jobs.*.health.rules[*]": {"metric", "op", "value"}, - "targets.*.resources.jobs.*.job_clusters[*]": {"job_cluster_key", "new_cluster"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.s3": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.cluster_log_conf.volumes": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].abfss": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].dbfs": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].file": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].gcs": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].s3": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].volumes": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.init_scripts[*].workspace": {"destination"}, - "targets.*.resources.jobs.*.job_clusters[*].new_cluster.workload_type": {"clients"}, - "targets.*.resources.jobs.*.parameters[*]": {"default", "name"}, - "targets.*.resources.jobs.*.permissions[*]": {"level"}, - "targets.*.resources.jobs.*.queue": {"enabled"}, - "targets.*.resources.jobs.*.schedule": {"quartz_cron_expression", "timezone_id"}, - "targets.*.resources.jobs.*.tasks[*]": {"task_key"}, - "targets.*.resources.jobs.*.tasks[*].clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, - "targets.*.resources.jobs.*.tasks[*].condition_task": {"left", "op", "right"}, - "targets.*.resources.jobs.*.tasks[*].dbt_task": {"commands"}, - "targets.*.resources.jobs.*.tasks[*].depends_on[*]": {"task_key"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task": {"inputs", "task"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task": {"task_key"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.clean_rooms_notebook_task": {"clean_room_name", "notebook_name"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.condition_task": {"left", "op", "right"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.dbt_task": {"commands"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.depends_on[*]": {"task_key"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.for_each_task": {"inputs", "task"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task": {"dl_runtime_image"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.gen_ai_compute_task.compute": {"num_gpus"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.health.rules[*]": {"metric", "op", "value"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].cran": {"package"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].maven": {"coordinates"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.libraries[*].pypi": {"package"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.dbfs": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.s3": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.cluster_log_conf.volumes": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].abfss": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].dbfs": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].file": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].gcs": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].s3": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].volumes": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.init_scripts[*].workspace": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.new_cluster.workload_type": {"clients"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.notebook_task": {"notebook_path"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.pipeline_task": {"pipeline_id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.python_wheel_task": {"entry_point", "package_name"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.run_job_task": {"job_id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.spark_python_task": {"python_file"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task": {"warehouse_id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.alert": {"alert_id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.dashboard": {"dashboard_id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.file": {"path"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.sql_task.query": {"query_id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_failure[*]": {"id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_start[*]": {"id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, - "targets.*.resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications.on_success[*]": {"id"}, - "targets.*.resources.jobs.*.tasks[*].gen_ai_compute_task": {"dl_runtime_image"}, - "targets.*.resources.jobs.*.tasks[*].gen_ai_compute_task.compute": {"num_gpus"}, - "targets.*.resources.jobs.*.tasks[*].health.rules[*]": {"metric", "op", "value"}, - "targets.*.resources.jobs.*.tasks[*].libraries[*].cran": {"package"}, - "targets.*.resources.jobs.*.tasks[*].libraries[*].maven": {"coordinates"}, - "targets.*.resources.jobs.*.tasks[*].libraries[*].pypi": {"package"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.dbfs": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.s3": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.cluster_log_conf.volumes": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].abfss": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].dbfs": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].file": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].gcs": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].s3": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].volumes": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.init_scripts[*].workspace": {"destination"}, - "targets.*.resources.jobs.*.tasks[*].new_cluster.workload_type": {"clients"}, - "targets.*.resources.jobs.*.tasks[*].notebook_task": {"notebook_path"}, - "targets.*.resources.jobs.*.tasks[*].pipeline_task": {"pipeline_id"}, - "targets.*.resources.jobs.*.tasks[*].python_wheel_task": {"entry_point", "package_name"}, - "targets.*.resources.jobs.*.tasks[*].run_job_task": {"job_id"}, - "targets.*.resources.jobs.*.tasks[*].spark_python_task": {"python_file"}, - "targets.*.resources.jobs.*.tasks[*].sql_task": {"warehouse_id"}, - "targets.*.resources.jobs.*.tasks[*].sql_task.alert": {"alert_id"}, - "targets.*.resources.jobs.*.tasks[*].sql_task.dashboard": {"dashboard_id"}, - "targets.*.resources.jobs.*.tasks[*].sql_task.file": {"path"}, - "targets.*.resources.jobs.*.tasks[*].sql_task.query": {"query_id"}, - "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, - "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_failure[*]": {"id"}, - "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_start[*]": {"id"}, - "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, - "targets.*.resources.jobs.*.tasks[*].webhook_notifications.on_success[*]": {"id"}, - "targets.*.resources.jobs.*.trigger.file_arrival": {"url"}, - "targets.*.resources.jobs.*.trigger.periodic": {"interval", "unit"}, - "targets.*.resources.jobs.*.webhook_notifications.on_duration_warning_threshold_exceeded[*]": {"id"}, - "targets.*.resources.jobs.*.webhook_notifications.on_failure[*]": {"id"}, - "targets.*.resources.jobs.*.webhook_notifications.on_start[*]": {"id"}, - "targets.*.resources.jobs.*.webhook_notifications.on_streaming_backlog_exceeded[*]": {"id"}, - "targets.*.resources.jobs.*.webhook_notifications.on_success[*]": {"id"}, - "targets.*.resources.model_serving_endpoints.*": {"name"}, - "targets.*.resources.model_serving_endpoints.*.ai_gateway.fallback_config": {"enabled"}, - "targets.*.resources.model_serving_endpoints.*.ai_gateway.rate_limits[*]": {"calls", "renewal_period"}, - "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model": {"name", "provider", "task"}, - "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.amazon_bedrock_config": {"aws_region", "bedrock_provider"}, - "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config": {"custom_provider_url"}, - "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.custom_provider_config.api_key_auth": {"key"}, - "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.databricks_model_serving_config": {"databricks_workspace_url"}, - "targets.*.resources.model_serving_endpoints.*.config.served_entities[*].external_model.google_cloud_vertex_ai_config": {"project_id", "region"}, - "targets.*.resources.model_serving_endpoints.*.config.served_models[*]": {"model_name", "model_version", "scale_to_zero_enabled"}, - "targets.*.resources.model_serving_endpoints.*.config.traffic_config.routes[*]": {"served_model_name", "traffic_percentage"}, - "targets.*.resources.model_serving_endpoints.*.permissions[*]": {"level"}, - "targets.*.resources.model_serving_endpoints.*.rate_limits[*]": {"calls", "renewal_period"}, - "targets.*.resources.model_serving_endpoints.*.tags[*]": {"key"}, - "targets.*.resources.models.*": {"name"}, - "targets.*.resources.models.*.permissions[*]": {"level"}, - "targets.*.resources.pipelines.*.clusters[*].autoscale": {"max_workers", "min_workers"}, - "targets.*.resources.pipelines.*.clusters[*].cluster_log_conf.dbfs": {"destination"}, - "targets.*.resources.pipelines.*.clusters[*].cluster_log_conf.s3": {"destination"}, - "targets.*.resources.pipelines.*.clusters[*].cluster_log_conf.volumes": {"destination"}, - "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].abfss": {"destination"}, - "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].dbfs": {"destination"}, - "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].file": {"destination"}, - "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].gcs": {"destination"}, - "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].s3": {"destination"}, - "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].volumes": {"destination"}, - "targets.*.resources.pipelines.*.clusters[*].init_scripts[*].workspace": {"destination"}, - "targets.*.resources.pipelines.*.deployment": {"kind"}, - "targets.*.resources.pipelines.*.gateway_definition": {"connection_name", "gateway_storage_catalog", "gateway_storage_schema"}, - "targets.*.resources.pipelines.*.ingestion_definition.objects[*].report": {"destination_catalog", "destination_schema", "source_url"}, - "targets.*.resources.pipelines.*.ingestion_definition.objects[*].schema": {"destination_catalog", "destination_schema", "source_schema"}, - "targets.*.resources.pipelines.*.ingestion_definition.objects[*].table": {"destination_catalog", "destination_schema", "source_table"}, - "targets.*.resources.pipelines.*.libraries[*].maven": {"coordinates"}, - "targets.*.resources.pipelines.*.permissions[*]": {"level"}, - "targets.*.resources.pipelines.*.restart_window": {"start_hour"}, - "targets.*.resources.quality_monitors.*": {"table_name", "assets_dir", "output_schema_name"}, - "targets.*.resources.quality_monitors.*.custom_metrics[*]": {"definition", "input_columns", "name", "output_data_type", "type"}, - "targets.*.resources.quality_monitors.*.inference_log": {"granularities", "model_id_col", "prediction_col", "problem_type", "timestamp_col"}, - "targets.*.resources.quality_monitors.*.schedule": {"quartz_cron_expression", "timezone_id"}, - "targets.*.resources.quality_monitors.*.time_series": {"granularities", "timestamp_col"}, - "targets.*.resources.registered_models.*": {"catalog_name", "name", "schema_name"}, - "targets.*.resources.registered_models.*.grants[*]": {"privileges", "principal"}, - "targets.*.resources.schemas.*": {"catalog_name", "name"}, - "targets.*.resources.schemas.*.grants[*]": {"privileges", "principal"}, - "targets.*.resources.secret_scopes.*": {"name"}, - "targets.*.resources.secret_scopes.*.keyvault_metadata": {"dns_name", "resource_id"}, - "targets.*.resources.secret_scopes.*.permissions[*]": {"level"}, - "targets.*.resources.volumes.*": {"catalog_name", "name", "schema_name", "volume_type"}, - "targets.*.resources.volumes.*.grants[*]": {"privileges", "principal"}, } diff --git a/bundle/internal/validation/required.go b/bundle/internal/validation/required.go index f8879347b8..f5acb2b5e4 100644 --- a/bundle/internal/validation/required.go +++ b/bundle/internal/validation/required.go @@ -15,7 +15,6 @@ import ( "github.com/databricks/cli/libs/structwalk" ) -// PatternInfo represents validation requirements for a specific configuration pattern type PatternInfo struct { // The pattern for which the fields in Required are applicable. // This is a string representation of [dyn.Pattern]. @@ -43,10 +42,10 @@ func formatRequiredFields(fields []string) string { } // extractRequiredFields walks through a struct type and extracts required field patterns -func extractRequiredFields(typ reflect.Type) []PatternInfo { +func extractRequiredFields(typ reflect.Type) ([]PatternInfo, error) { fieldsByPattern := make(map[string][]string) - structwalk.WalkType(typ, func(path *structpath.PathNode, _ reflect.Type) bool { + err := structwalk.WalkType(typ, func(path *structpath.PathNode, _ reflect.Type) bool { if path == nil { return true } @@ -62,7 +61,6 @@ func extractRequiredFields(typ reflect.Type) []PatternInfo { return true } - // Only perform required validation for struct fields. field, ok := path.Field() if !ok { return true @@ -73,7 +71,7 @@ func extractRequiredFields(typ reflect.Type) []PatternInfo { return true }) - return buildPatternInfos(fieldsByPattern) + return buildPatternInfos(fieldsByPattern), err } // buildPatternInfos converts the field map to PatternInfo slice @@ -94,17 +92,9 @@ func buildPatternInfos(fieldsByPattern map[string][]string) []PatternInfo { func getGroupingKey(parentPath string) string { parts := strings.Split(parentPath, ".") - switch parts[0] { - case "resources": - // Group resources by their type (e.g., "resources.jobs") - if len(parts) > 1 { - return parts[0] + "." + parts[1] - } - case "targets": - // Group target overrides by their first 3 keys - if len(parts) > 2 { - return strings.Join(parts[:3], ".") - } + // Group resources by their resource type (e.g., "resources.jobs") + if parts[0] == "resources" && len(parts) > 1 { + return parts[0] + "." + parts[1] } // Use the top level key for other fields @@ -123,6 +113,17 @@ func groupPatternsByKey(patterns []PatternInfo) map[string][]PatternInfo { return groupedPatterns } +func filterTargetsAndEnvironments(patterns map[string][]PatternInfo) map[string][]PatternInfo { + filtered := make(map[string][]PatternInfo) + for key, patterns := range patterns { + if key == "targets" || key == "environments" { + continue + } + filtered[key] = patterns + } + return filtered +} + // sortGroupedPatterns sorts patterns within each group and returns them as a sorted slice func sortGroupedPatterns(groupedPatterns map[string][]PatternInfo) [][]PatternInfo { // Get sorted group keys @@ -149,15 +150,22 @@ func sortGroupedPatterns(groupedPatterns map[string][]PatternInfo) [][]PatternIn } // RequiredFields returns grouped required field patterns for validation -func requiredFields() [][]PatternInfo { - patterns := extractRequiredFields(reflect.TypeOf(config.Root{})) +func requiredFields() ([][]PatternInfo, error) { + patterns, err := extractRequiredFields(reflect.TypeOf(config.Root{})) + if err != nil { + return nil, err + } groupedPatterns := groupPatternsByKey(patterns) - return sortGroupedPatterns(groupedPatterns) + filteredPatterns := filterTargetsAndEnvironments(groupedPatterns) + return sortGroupedPatterns(filteredPatterns), nil } // Generate creates a Go source file with required field validation rules func generateRequiredFields(outPath string) error { - requiredFields := requiredFields() + requiredFields, err := requiredFields() + if err != nil { + return fmt.Errorf("failed to generate required fields: %w", err) + } // Ensure output directory exists if err := os.MkdirAll(outPath, 0o755); err != nil { @@ -197,9 +205,9 @@ import ( // RequiredFields maps [dyn.Pattern] to required fields they should have. var RequiredFields = map[string][]string{ {{- range . }} -{{ range . }} +{{- range . }} "{{ .Parent }}": {{ .RequiredFields }}, {{- end }} -{{- end }} +{{ end -}} } ` From 9199d25f4cc819ff3b99de876b4047b968aaae1f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 13:21:33 +0200 Subject: [PATCH 04/30] add generate --- .codegen.json | 4 +++- Makefile | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.codegen.json b/.codegen.json index 54c108218f..616909c0f7 100644 --- a/.codegen.json +++ b/.codegen.json @@ -23,9 +23,11 @@ "post_generate": [ "go test -timeout 240s -run TestConsistentDatabricksSdkVersion github.com/databricks/cli/internal/build", "make schema", + "make generate-validation", "echo 'bundle/internal/tf/schema/\\*.go linguist-generated=true' >> ./.gitattributes", "echo 'go.sum linguist-generated=true' >> ./.gitattributes", - "echo 'bundle/schema/jsonschema.json linguist-generated=true' >> ./.gitattributes" + "echo 'bundle/schema/jsonschema.json linguist-generated=true' >> ./.gitattributes", + "echo 'bundle/internal/validation/generated/required_fields.go linguist-generated=true' >> ./.gitattributes" ] } } diff --git a/Makefile b/Makefile index 137f9f86b1..b7084eebf9 100644 --- a/Makefile +++ b/Makefile @@ -82,6 +82,9 @@ integration: integration-short: VERBOSE_TEST=1 $(INTEGRATION) -short +generate-validation: + go run ./bundle/internal/validation/main.go + generate: genkit update-sdk [ ! -f tagging.py ] || mv tagging.py internal/genkit/tagging.py @@ -91,4 +94,4 @@ generate: [ ! -f .github/workflows/next-changelog.yml ] || rm .github/workflows/next-changelog.yml pushd experimental/python && make codegen -.PHONY: lint lintfull tidy lintcheck fmt fmtfull test cover showcover build snapshot schema integration integration-short acc-cover acc-showcover docs ws links checks +.PHONY: lint lintfull tidy lintcheck fmt fmtfull test cover showcover build snapshot schema integration integration-short acc-cover acc-showcover docs ws links checks generate-validation From de8477e830cd785f2c5b3aa75aea19f8ab5961d2 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 13:36:07 +0200 Subject: [PATCH 05/30] fix and generate --- .gitattributes | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 629be14230..3c7e54d7f6 100755 --- a/.gitattributes +++ b/.gitattributes @@ -149,3 +149,4 @@ cmd/workspace/workspace/workspace.go linguist-generated=true bundle/internal/tf/schema/\*.go linguist-generated=true go.sum linguist-generated=true bundle/schema/jsonschema.json linguist-generated=true +bundle/internal/validation/generated/required_fields.go linguist-generated=true diff --git a/Makefile b/Makefile index b7084eebf9..d120b4144e 100644 --- a/Makefile +++ b/Makefile @@ -83,7 +83,7 @@ integration-short: VERBOSE_TEST=1 $(INTEGRATION) -short generate-validation: - go run ./bundle/internal/validation/main.go + go run ./bundle/internal/validation/. generate: genkit update-sdk From 427ae3bec7270b7f4a7e212c720ac16ee9c0b278 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 14:59:13 +0200 Subject: [PATCH 06/30] minor rename to type --- bundle/internal/validation/required.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/bundle/internal/validation/required.go b/bundle/internal/validation/required.go index f5acb2b5e4..e7fa4ae365 100644 --- a/bundle/internal/validation/required.go +++ b/bundle/internal/validation/required.go @@ -15,7 +15,7 @@ import ( "github.com/databricks/cli/libs/structwalk" ) -type PatternInfo struct { +type RequiredPatternInfo struct { // The pattern for which the fields in Required are applicable. // This is a string representation of [dyn.Pattern]. Parent string @@ -42,7 +42,7 @@ func formatRequiredFields(fields []string) string { } // extractRequiredFields walks through a struct type and extracts required field patterns -func extractRequiredFields(typ reflect.Type) ([]PatternInfo, error) { +func extractRequiredFields(typ reflect.Type) ([]RequiredPatternInfo, error) { fieldsByPattern := make(map[string][]string) err := structwalk.WalkType(typ, func(path *structpath.PathNode, _ reflect.Type) bool { @@ -75,11 +75,11 @@ func extractRequiredFields(typ reflect.Type) ([]PatternInfo, error) { } // buildPatternInfos converts the field map to PatternInfo slice -func buildPatternInfos(fieldsByPattern map[string][]string) []PatternInfo { - patterns := make([]PatternInfo, 0, len(fieldsByPattern)) +func buildPatternInfos(fieldsByPattern map[string][]string) []RequiredPatternInfo { + patterns := make([]RequiredPatternInfo, 0, len(fieldsByPattern)) for parentPath, fields := range fieldsByPattern { - patterns = append(patterns, PatternInfo{ + patterns = append(patterns, RequiredPatternInfo{ Parent: parentPath, RequiredFields: formatRequiredFields(fields), }) @@ -102,8 +102,8 @@ func getGroupingKey(parentPath string) string { } // groupPatternsByKey groups patterns by their logical grouping key -func groupPatternsByKey(patterns []PatternInfo) map[string][]PatternInfo { - groupedPatterns := make(map[string][]PatternInfo) +func groupPatternsByKey(patterns []RequiredPatternInfo) map[string][]RequiredPatternInfo { + groupedPatterns := make(map[string][]RequiredPatternInfo) for _, pattern := range patterns { key := getGroupingKey(pattern.Parent) @@ -113,8 +113,8 @@ func groupPatternsByKey(patterns []PatternInfo) map[string][]PatternInfo { return groupedPatterns } -func filterTargetsAndEnvironments(patterns map[string][]PatternInfo) map[string][]PatternInfo { - filtered := make(map[string][]PatternInfo) +func filterTargetsAndEnvironments(patterns map[string][]RequiredPatternInfo) map[string][]RequiredPatternInfo { + filtered := make(map[string][]RequiredPatternInfo) for key, patterns := range patterns { if key == "targets" || key == "environments" { continue @@ -125,7 +125,7 @@ func filterTargetsAndEnvironments(patterns map[string][]PatternInfo) map[string] } // sortGroupedPatterns sorts patterns within each group and returns them as a sorted slice -func sortGroupedPatterns(groupedPatterns map[string][]PatternInfo) [][]PatternInfo { +func sortGroupedPatterns(groupedPatterns map[string][]RequiredPatternInfo) [][]RequiredPatternInfo { // Get sorted group keys groupKeys := make([]string, 0, len(groupedPatterns)) for key := range groupedPatterns { @@ -134,7 +134,7 @@ func sortGroupedPatterns(groupedPatterns map[string][]PatternInfo) [][]PatternIn sort.Strings(groupKeys) // Build sorted result - result := make([][]PatternInfo, 0, len(groupKeys)) + result := make([][]RequiredPatternInfo, 0, len(groupKeys)) for _, key := range groupKeys { patterns := groupedPatterns[key] @@ -150,7 +150,7 @@ func sortGroupedPatterns(groupedPatterns map[string][]PatternInfo) [][]PatternIn } // RequiredFields returns grouped required field patterns for validation -func requiredFields() ([][]PatternInfo, error) { +func requiredFields() ([][]RequiredPatternInfo, error) { patterns, err := extractRequiredFields(reflect.TypeOf(config.Root{})) if err != nil { return nil, err From bcb614e493dc85f68197afa5bcb9146ab30d201c Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 13:51:37 +0200 Subject: [PATCH 07/30] Validate required bundle fields --- acceptance/bundle/debug/tf/out.stderr.txt | 1 + acceptance/bundle/paths/fallback/output.txt | 5 ++ acceptance/bundle/paths/nominal/output.txt | 25 ++++++++ acceptance/bundle/quality_monitor/output.txt | 4 ++ .../empty_resources/empty_dict/output.txt | 20 +++++++ .../empty_resources/with_grants/output.txt | 20 +++++++ .../with_permissions/output.txt | 20 +++++++ .../validate/models/missing_name/output.txt | 6 +- .../bundle/validate/models/user_id/output.txt | 6 +- .../validate/presets_name_prefix/output.txt | 12 ++++ .../validate/volume_defaults/output.txt | 12 ++++ .../prepend-workspace-var/output.txt | 7 +++ bundle/config/validate/required.go | 58 +++++++++++++++++++ bundle/phases/initialize.go | 4 ++ 14 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 bundle/config/validate/required.go diff --git a/acceptance/bundle/debug/tf/out.stderr.txt b/acceptance/bundle/debug/tf/out.stderr.txt index 0fe8151633..6d9d7a1b1b 100644 --- a/acceptance/bundle/debug/tf/out.stderr.txt +++ b/acceptance/bundle/debug/tf/out.stderr.txt @@ -57,6 +57,7 @@ 10:07:59 Debug: Apply pid=12345 mutator=apps.Validate 10:07:59 Debug: Apply pid=12345 mutator=ValidateTargetMode 10:07:59 Debug: Apply pid=12345 mutator=ValidateSharedRootPermissions +10:07:59 Debug: Apply pid=12345 mutator=validate:required 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotateJobs 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotatePipelines 10:07:59 Debug: Apply pid=12345 mutator=terraform.Initialize diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt index ee6d2c4acc..41ef87f9b3 100644 --- a/acceptance/bundle/paths/fallback/output.txt +++ b/acceptance/bundle/paths/fallback/output.txt @@ -8,6 +8,11 @@ Warn: path ../dist/wheel2.whl is defined relative to the [TEST_TMP_DIR]/resource Warn: path ../target/jar2.jar is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_job.yml:41:24). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. Warn: path ../src/notebook2.py is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_pipeline.yml:13:23). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. Warn: path ../src/file2.py is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_pipeline.yml:10:23). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[4].python_wheel_task + in resources/my_job.yml:33:13 + override_job.yml:29:17 + >>> [CLI] bundle validate -t error Error: notebook "resources/this value is overridden" not found. Local notebook references are expected diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt index 432169fddb..c7f7b08e61 100644 --- a/acceptance/bundle/paths/nominal/output.txt +++ b/acceptance/bundle/paths/nominal/output.txt @@ -1,5 +1,30 @@ >>> [CLI] bundle validate -t development -o json +Warning: required field "task_key" is not set + at resources.jobs.my_job.tasks[6].for_each_task.task + in resources/my_job.yml:48:15 + override_job.yml:46:19 + +Warning: required field "task_key" is not set + at resources.jobs.my_job.tasks[7].for_each_task.task + in resources/my_job.yml:54:15 + override_job.yml:52:19 + +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[4].python_wheel_task + in resources/my_job.yml:33:13 + override_job.yml:29:17 + +Warning: required field "inputs" is not set + at resources.jobs.my_job.tasks[6].for_each_task + in resources/my_job.yml:47:13 + override_job.yml:45:17 + +Warning: required field "inputs" is not set + at resources.jobs.my_job.tasks[7].for_each_task + in resources/my_job.yml:53:13 + override_job.yml:51:17 + >>> [CLI] bundle validate -t error Error: notebook "resources/this value is overridden" not found. Local notebook references are expected diff --git a/acceptance/bundle/quality_monitor/output.txt b/acceptance/bundle/quality_monitor/output.txt index 8a7f64ef23..3dec589065 100644 --- a/acceptance/bundle/quality_monitor/output.txt +++ b/acceptance/bundle/quality_monitor/output.txt @@ -1,5 +1,9 @@ >>> [CLI] bundle validate -o json -t development +Warning: required field "quartz_cron_expression" is not set + at resources.quality_monitors.my_monitor.schedule + in databricks.yml:17:9 + { "mode": "development", "quality_monitors": { diff --git a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt index 732a124809..924861c6f1 100644 --- a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt +++ b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt @@ -35,6 +35,10 @@ } === resources.models.rname === +Warning: required field "name" is not set + at resources.models.rname + in databricks.yml:6:12 + { "models": { "rname": { @@ -54,6 +58,10 @@ } === resources.registered_models.rname === +Warning: required field "catalog_name" is not set + at resources.registered_models.rname + in databricks.yml:6:12 + { "registered_models": { "rname": {} @@ -61,6 +69,10 @@ } === resources.quality_monitors.rname === +Warning: required field "table_name" is not set + at resources.quality_monitors.rname + in databricks.yml:6:12 + { "quality_monitors": { "rname": {} @@ -68,6 +80,10 @@ } === resources.schemas.rname === +Warning: required field "catalog_name" is not set + at resources.schemas.rname + in databricks.yml:6:12 + { "schemas": { "rname": {} @@ -75,6 +91,10 @@ } === resources.volumes.rname === +Warning: required field "catalog_name" is not set + at resources.volumes.rname + in databricks.yml:6:12 + { "volumes": { "rname": { diff --git a/acceptance/bundle/validate/empty_resources/with_grants/output.txt b/acceptance/bundle/validate/empty_resources/with_grants/output.txt index 276de7b3a6..4a6b9f22e2 100644 --- a/acceptance/bundle/validate/empty_resources/with_grants/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_grants/output.txt @@ -47,6 +47,10 @@ Warning: unknown field: grants at resources.models.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.models.rname + in databricks.yml:7:7 + { "models": { "rname": { @@ -70,6 +74,10 @@ Warning: unknown field: grants } === resources.registered_models.rname === +Warning: required field "catalog_name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + { "registered_models": { "rname": { @@ -83,6 +91,10 @@ Warning: unknown field: grants at resources.quality_monitors.rname in databricks.yml:7:7 +Warning: required field "table_name" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + { "quality_monitors": { "rname": {} @@ -90,6 +102,10 @@ Warning: unknown field: grants } === resources.schemas.rname === +Warning: required field "catalog_name" is not set + at resources.schemas.rname + in databricks.yml:7:7 + { "schemas": { "rname": { @@ -99,6 +115,10 @@ Warning: unknown field: grants } === resources.volumes.rname === +Warning: required field "catalog_name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + { "volumes": { "rname": { diff --git a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt index 9510671a8d..c68d74af2a 100644 --- a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt @@ -35,6 +35,10 @@ } === resources.models.rname === +Warning: required field "name" is not set + at resources.models.rname + in databricks.yml:7:7 + { "models": { "rname": { @@ -58,6 +62,10 @@ Warning: unknown field: permissions at resources.registered_models.rname in databricks.yml:7:7 +Warning: required field "catalog_name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + { "registered_models": { "rname": {} @@ -69,6 +77,10 @@ Warning: unknown field: permissions at resources.quality_monitors.rname in databricks.yml:7:7 +Warning: required field "table_name" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + { "quality_monitors": { "rname": {} @@ -80,6 +92,10 @@ Warning: unknown field: permissions at resources.schemas.rname in databricks.yml:7:7 +Warning: required field "catalog_name" is not set + at resources.schemas.rname + in databricks.yml:7:7 + { "schemas": { "rname": {} @@ -91,6 +107,10 @@ Warning: unknown field: permissions at resources.volumes.rname in databricks.yml:7:7 +Warning: required field "catalog_name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + { "volumes": { "rname": { diff --git a/acceptance/bundle/validate/models/missing_name/output.txt b/acceptance/bundle/validate/models/missing_name/output.txt index 4f189effdf..77dfd61284 100644 --- a/acceptance/bundle/validate/models/missing_name/output.txt +++ b/acceptance/bundle/validate/models/missing_name/output.txt @@ -1,7 +1,11 @@ +Warning: required field "name" is not set + at resources.models.mymodel + in databricks.yml:5:14 + Name: test-bundle Target: default Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default -Validation OK! +Found 1 warning diff --git a/acceptance/bundle/validate/models/user_id/output.txt b/acceptance/bundle/validate/models/user_id/output.txt index 8c0d8db730..95c7ddde0e 100644 --- a/acceptance/bundle/validate/models/user_id/output.txt +++ b/acceptance/bundle/validate/models/user_id/output.txt @@ -2,10 +2,14 @@ Warning: unknown field: user_id at resources.models.mymodel in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.models.mymodel + in databricks.yml:7:7 + Name: test-bundle Target: default Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default -Found 1 warning +Found 2 warnings diff --git a/acceptance/bundle/validate/presets_name_prefix/output.txt b/acceptance/bundle/validate/presets_name_prefix/output.txt index e81ea99190..986b72a823 100644 --- a/acceptance/bundle/validate/presets_name_prefix/output.txt +++ b/acceptance/bundle/validate/presets_name_prefix/output.txt @@ -3,6 +3,10 @@ name_prefix: "prefix-" >>> [CLI] bundle validate -o json +Warning: required field "catalog_name" is not set + at resources.schemas.schema1 + in databricks.yml:14:7 + { "jobs": { "job1": { @@ -39,6 +43,10 @@ name_prefix: "[prefix]" >>> [CLI] bundle validate -o json +Warning: required field "catalog_name" is not set + at resources.schemas.schema1 + in databricks.yml:14:7 + { "jobs": { "job1": { @@ -75,6 +83,10 @@ name_prefix: "" >>> [CLI] bundle validate -o json +Warning: required field "catalog_name" is not set + at resources.schemas.schema1 + in databricks.yml:14:7 + { "jobs": { "job1": { diff --git a/acceptance/bundle/validate/volume_defaults/output.txt b/acceptance/bundle/validate/volume_defaults/output.txt index f128a32958..e6cd44bf83 100644 --- a/acceptance/bundle/validate/volume_defaults/output.txt +++ b/acceptance/bundle/validate/volume_defaults/output.txt @@ -1,3 +1,15 @@ +Warning: required field "catalog_name" is not set + at resources.volumes.v2 + in databricks.yml:8:7 + +Warning: required field "catalog_name" is not set + at resources.volumes.v3 + in databricks.yml:10:7 + +Warning: required field "catalog_name" is not set + at resources.volumes.v1 + in databricks.yml:6:7 + { "v1": { "volume_type": "" diff --git a/acceptance/bundle/variables/prepend-workspace-var/output.txt b/acceptance/bundle/variables/prepend-workspace-var/output.txt index bed722a3b3..99bca80722 100644 --- a/acceptance/bundle/variables/prepend-workspace-var/output.txt +++ b/acceptance/bundle/variables/prepend-workspace-var/output.txt @@ -1,4 +1,11 @@ /Workspace should be prepended on all paths, but it is not the case: +Warning: required field "name" is not set + at bundle + +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[0].python_wheel_task + in databricks.yml:23:13 + { "bundle": { "environment": "dev", diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go new file mode 100644 index 0000000000..fd9e3fc28d --- /dev/null +++ b/bundle/config/validate/required.go @@ -0,0 +1,58 @@ +package validate + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/internal/validation/generated" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type required struct{} + +func Required() bundle.Mutator { + return &required{} +} + +func (f *required) Name() string { + return "validate:required" +} + +func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + diags := diag.Diagnostics{} + + for k, requiredFields := range generated.RequiredFields { + pattern, err := dyn.NewPatternFromString(k) + if err != nil { + return diag.FromErr(fmt.Errorf("invalid pattern %q for required field validation: %w", k, err)) + } + + _, err = dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + for _, requiredField := range requiredFields { + vv := v.Get(requiredField) + if vv.Kind() == dyn.KindInvalid || vv.Kind() == dyn.KindNil { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("required field %q is not set", requiredField), + Locations: v.Locations(), + Paths: []dyn.Path{p}, + }) + + return dyn.NilValue, nil + } + } + return v, nil + }) + if dyn.IsExpectedMapError(err) || dyn.IsExpectedSequenceError(err) || dyn.IsExpectedMapToIndexError(err) || dyn.IsExpectedSequenceToIndexError(err) { + // No map or sequence defined for this pattern, so we ignore it. + continue + } + if err != nil { + return diag.FromErr(err) + } + } + + return diags +} diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index d499f19de9..cc18fc1266 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -181,6 +181,10 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // Validates that when using a shared workspace path, appropriate permissions are configured permissions.ValidateSharedRootPermissions(), + // Validate all required fields are set. This is run after variable interpolation and PyDABs mutators + // since they can also set and modify resources. + validate.Required(), + // Annotate resources with "deployment" metadata. // // We don't include this step into initializeResources because these mutators set fields that are From 0c988db37c267d5f633b85064f2dfc1635a1f2d3 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 14:08:06 +0200 Subject: [PATCH 08/30] additional validation things --- acceptance/bundle/quality_monitor/output.txt | 4 +++ .../empty_resources/empty_dict/output.txt | 28 +++++++++++++++++++ .../empty_resources/with_grants/output.txt | 28 +++++++++++++++++++ .../with_permissions/output.txt | 28 +++++++++++++++++++ .../validate/volume_defaults/databricks.yml | 11 +++++--- .../validate/volume_defaults/output.txt | 20 ++++--------- .../prepend-workspace-var/output.txt | 10 +++++-- bundle/config/validate/required.go | 2 -- 8 files changed, 107 insertions(+), 24 deletions(-) diff --git a/acceptance/bundle/quality_monitor/output.txt b/acceptance/bundle/quality_monitor/output.txt index 3dec589065..278e116606 100644 --- a/acceptance/bundle/quality_monitor/output.txt +++ b/acceptance/bundle/quality_monitor/output.txt @@ -4,6 +4,10 @@ Warning: required field "quartz_cron_expression" is not set at resources.quality_monitors.my_monitor.schedule in databricks.yml:17:9 +Warning: required field "timezone_id" is not set + at resources.quality_monitors.my_monitor.schedule + in databricks.yml:17:9 + { "mode": "development", "quality_monitors": { diff --git a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt index 924861c6f1..15bc20acbf 100644 --- a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt +++ b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt @@ -62,6 +62,14 @@ Warning: required field "catalog_name" is not set at resources.registered_models.rname in databricks.yml:6:12 +Warning: required field "name" is not set + at resources.registered_models.rname + in databricks.yml:6:12 + +Warning: required field "schema_name" is not set + at resources.registered_models.rname + in databricks.yml:6:12 + { "registered_models": { "rname": {} @@ -73,6 +81,14 @@ Warning: required field "table_name" is not set at resources.quality_monitors.rname in databricks.yml:6:12 +Warning: required field "assets_dir" is not set + at resources.quality_monitors.rname + in databricks.yml:6:12 + +Warning: required field "output_schema_name" is not set + at resources.quality_monitors.rname + in databricks.yml:6:12 + { "quality_monitors": { "rname": {} @@ -84,6 +100,10 @@ Warning: required field "catalog_name" is not set at resources.schemas.rname in databricks.yml:6:12 +Warning: required field "name" is not set + at resources.schemas.rname + in databricks.yml:6:12 + { "schemas": { "rname": {} @@ -95,6 +115,14 @@ Warning: required field "catalog_name" is not set at resources.volumes.rname in databricks.yml:6:12 +Warning: required field "name" is not set + at resources.volumes.rname + in databricks.yml:6:12 + +Warning: required field "schema_name" is not set + at resources.volumes.rname + in databricks.yml:6:12 + { "volumes": { "rname": { diff --git a/acceptance/bundle/validate/empty_resources/with_grants/output.txt b/acceptance/bundle/validate/empty_resources/with_grants/output.txt index 4a6b9f22e2..d049da8f80 100644 --- a/acceptance/bundle/validate/empty_resources/with_grants/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_grants/output.txt @@ -78,6 +78,14 @@ Warning: required field "catalog_name" is not set at resources.registered_models.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + +Warning: required field "schema_name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + { "registered_models": { "rname": { @@ -95,6 +103,14 @@ Warning: required field "table_name" is not set at resources.quality_monitors.rname in databricks.yml:7:7 +Warning: required field "assets_dir" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + +Warning: required field "output_schema_name" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + { "quality_monitors": { "rname": {} @@ -106,6 +122,10 @@ Warning: required field "catalog_name" is not set at resources.schemas.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.schemas.rname + in databricks.yml:7:7 + { "schemas": { "rname": { @@ -119,6 +139,14 @@ Warning: required field "catalog_name" is not set at resources.volumes.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + +Warning: required field "schema_name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + { "volumes": { "rname": { diff --git a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt index c68d74af2a..3626b86ba7 100644 --- a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt @@ -66,6 +66,14 @@ Warning: required field "catalog_name" is not set at resources.registered_models.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + +Warning: required field "schema_name" is not set + at resources.registered_models.rname + in databricks.yml:7:7 + { "registered_models": { "rname": {} @@ -81,6 +89,14 @@ Warning: required field "table_name" is not set at resources.quality_monitors.rname in databricks.yml:7:7 +Warning: required field "assets_dir" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + +Warning: required field "output_schema_name" is not set + at resources.quality_monitors.rname + in databricks.yml:7:7 + { "quality_monitors": { "rname": {} @@ -96,6 +112,10 @@ Warning: required field "catalog_name" is not set at resources.schemas.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.schemas.rname + in databricks.yml:7:7 + { "schemas": { "rname": {} @@ -111,6 +131,14 @@ Warning: required field "catalog_name" is not set at resources.volumes.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + +Warning: required field "schema_name" is not set + at resources.volumes.rname + in databricks.yml:7:7 + { "volumes": { "rname": { diff --git a/acceptance/bundle/validate/volume_defaults/databricks.yml b/acceptance/bundle/validate/volume_defaults/databricks.yml index 5a93e15ec0..3200e2b1e8 100644 --- a/acceptance/bundle/validate/volume_defaults/databricks.yml +++ b/acceptance/bundle/validate/volume_defaults/databricks.yml @@ -2,9 +2,12 @@ resources: volumes: v1: volume_type: "" + catalog_name: "main" - v2: - volume_type: "already-set" + # v2: + # volume_type: "already-set" + # catalog_name: "main" - v3: - comment: hello + # v3: + # comment: hello + # catalog_name: "main" diff --git a/acceptance/bundle/validate/volume_defaults/output.txt b/acceptance/bundle/validate/volume_defaults/output.txt index e6cd44bf83..fa238a76e8 100644 --- a/acceptance/bundle/validate/volume_defaults/output.txt +++ b/acceptance/bundle/validate/volume_defaults/output.txt @@ -1,24 +1,14 @@ -Warning: required field "catalog_name" is not set - at resources.volumes.v2 - in databricks.yml:8:7 - -Warning: required field "catalog_name" is not set - at resources.volumes.v3 - in databricks.yml:10:7 +Warning: required field "name" is not set + at resources.volumes.v1 + in databricks.yml:6:7 -Warning: required field "catalog_name" is not set +Warning: required field "schema_name" is not set at resources.volumes.v1 in databricks.yml:6:7 { "v1": { + "catalog_name": "main", "volume_type": "" - }, - "v2": { - "volume_type": "already-set" - }, - "v3": { - "comment": "hello", - "volume_type": "MANAGED" } } diff --git a/acceptance/bundle/variables/prepend-workspace-var/output.txt b/acceptance/bundle/variables/prepend-workspace-var/output.txt index 99bca80722..979767b037 100644 --- a/acceptance/bundle/variables/prepend-workspace-var/output.txt +++ b/acceptance/bundle/variables/prepend-workspace-var/output.txt @@ -1,11 +1,15 @@ /Workspace should be prepended on all paths, but it is not the case: -Warning: required field "name" is not set - at bundle - Warning: required field "entry_point" is not set at resources.jobs.my_job.tasks[0].python_wheel_task in databricks.yml:23:13 +Warning: required field "package_name" is not set + at resources.jobs.my_job.tasks[0].python_wheel_task + in databricks.yml:23:13 + +Warning: required field "name" is not set + at bundle + { "bundle": { "environment": "dev", diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go index fd9e3fc28d..0cbd57a07e 100644 --- a/bundle/config/validate/required.go +++ b/bundle/config/validate/required.go @@ -39,8 +39,6 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics Locations: v.Locations(), Paths: []dyn.Path{p}, }) - - return dyn.NilValue, nil } } return v, nil From 539bd714106be35e612f955dfc91902586cc7b45 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 14:16:16 +0200 Subject: [PATCH 09/30] again --- .../bundle/validate/volume_defaults/databricks.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/acceptance/bundle/validate/volume_defaults/databricks.yml b/acceptance/bundle/validate/volume_defaults/databricks.yml index 3200e2b1e8..5a93e15ec0 100644 --- a/acceptance/bundle/validate/volume_defaults/databricks.yml +++ b/acceptance/bundle/validate/volume_defaults/databricks.yml @@ -2,12 +2,9 @@ resources: volumes: v1: volume_type: "" - catalog_name: "main" - # v2: - # volume_type: "already-set" - # catalog_name: "main" + v2: + volume_type: "already-set" - # v3: - # comment: hello - # catalog_name: "main" + v3: + comment: hello From 5c8d934bbc19100abee9ce206092321b469abb4f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 14:16:40 +0200 Subject: [PATCH 10/30] - --- .../validate/volume_defaults/output.txt | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/acceptance/bundle/validate/volume_defaults/output.txt b/acceptance/bundle/validate/volume_defaults/output.txt index fa238a76e8..986acc18c3 100644 --- a/acceptance/bundle/validate/volume_defaults/output.txt +++ b/acceptance/bundle/validate/volume_defaults/output.txt @@ -1,3 +1,7 @@ +Warning: required field "catalog_name" is not set + at resources.volumes.v1 + in databricks.yml:6:7 + Warning: required field "name" is not set at resources.volumes.v1 in databricks.yml:6:7 @@ -6,9 +10,39 @@ Warning: required field "schema_name" is not set at resources.volumes.v1 in databricks.yml:6:7 +Warning: required field "catalog_name" is not set + at resources.volumes.v2 + in databricks.yml:8:7 + +Warning: required field "name" is not set + at resources.volumes.v2 + in databricks.yml:8:7 + +Warning: required field "schema_name" is not set + at resources.volumes.v2 + in databricks.yml:8:7 + +Warning: required field "catalog_name" is not set + at resources.volumes.v3 + in databricks.yml:10:7 + +Warning: required field "name" is not set + at resources.volumes.v3 + in databricks.yml:10:7 + +Warning: required field "schema_name" is not set + at resources.volumes.v3 + in databricks.yml:10:7 + { "v1": { - "catalog_name": "main", "volume_type": "" + }, + "v2": { + "volume_type": "already-set" + }, + "v3": { + "comment": "hello", + "volume_type": "MANAGED" } } From e4124ae0c44a64a896d87114b7abeb8389677fa4 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 14:18:54 +0200 Subject: [PATCH 11/30] - --- bundle/config/validate/required.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go index 0cbd57a07e..1dc6cc0172 100644 --- a/bundle/config/validate/required.go +++ b/bundle/config/validate/required.go @@ -44,7 +44,7 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics return v, nil }) if dyn.IsExpectedMapError(err) || dyn.IsExpectedSequenceError(err) || dyn.IsExpectedMapToIndexError(err) || dyn.IsExpectedSequenceToIndexError(err) { - // No map or sequence defined for this pattern, so we ignore it. + // No map or sequence value is set at this pattern, so we ignore it. continue } if err != nil { From 6ed086850eb1814e37990f85079780229c56d2cc Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 14:42:23 +0200 Subject: [PATCH 12/30] deterministic output --- acceptance/acceptance_test.go | 2 +- .../bundle/validate/required/databricks.yml | 13 ++++++++++ .../bundle/validate/required/output.txt | 25 +++++++++++++++++++ acceptance/bundle/validate/required/script | 1 + bundle/config/validate/required.go | 14 +++++++++++ bundle/phases/initialize.go | 8 +++--- 6 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 acceptance/bundle/validate/required/databricks.yml create mode 100644 acceptance/bundle/validate/required/output.txt create mode 100644 acceptance/bundle/validate/required/script diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 1e565d8a7a..336182f35b 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -105,7 +105,7 @@ var Ignored = map[string]bool{ } func TestAccept(t *testing.T) { - testAccept(t, InprocessMode, "") + testAccept(t, true, "bundle/validate/required") } func TestInprocessMode(t *testing.T) { diff --git a/acceptance/bundle/validate/required/databricks.yml b/acceptance/bundle/validate/required/databricks.yml new file mode 100644 index 0000000000..18947a0e77 --- /dev/null +++ b/acceptance/bundle/validate/required/databricks.yml @@ -0,0 +1,13 @@ +artifacts: + zab: + files: + - {} + +resources: + models: + foo: + description: "hello" + + volumes: + baz: + name: "baz" diff --git a/acceptance/bundle/validate/required/output.txt b/acceptance/bundle/validate/required/output.txt new file mode 100644 index 0000000000..5d7d275998 --- /dev/null +++ b/acceptance/bundle/validate/required/output.txt @@ -0,0 +1,25 @@ + +>>> [CLI] bundle validate +Warning: required field "catalog_name" is not set + at resources.volumes.baz + in databricks.yml:13:7 + +Warning: required field "name" is not set + at resources.models.foo + in databricks.yml:10:7 + +Warning: required field "schema_name" is not set + at resources.volumes.baz + in databricks.yml:13:7 + +Warning: required field "source" is not set + at artifacts.zab.files[0] + in databricks.yml:4:9 + +Name: test-bundle +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default + +Found 4 warnings diff --git a/acceptance/bundle/validate/required/script b/acceptance/bundle/validate/required/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/validate/required/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go index 1dc6cc0172..30324e9521 100644 --- a/bundle/config/validate/required.go +++ b/bundle/config/validate/required.go @@ -3,6 +3,7 @@ package validate import ( "context" "fmt" + "sort" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/internal/validation/generated" @@ -52,5 +53,18 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics } } + // Sort diagnostics to make them deterministic + sort.Slice(diags, func(i, j int) bool { + // First sort by summary + if diags[i].Summary != diags[j].Summary { + return diags[i].Summary < diags[j].Summary + } + + // Then sort by locations as a tie breaker if summaries are the same. + iLocs := fmt.Sprintf("%v", diags[i].Locations) + jLocs := fmt.Sprintf("%v", diags[j].Locations) + return iLocs < jLocs + }) + return diags } diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index cc18fc1266..597302c38d 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -148,6 +148,10 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // After PythonMutator, mutators must not change bundle resources, or such changes are not // going to be visible in Python code. + // Validate all required fields are set. This is run after variable interpolation and PyDABs mutators + // since they can also set and modify resources. + validate.Required(), + // Reads (typed): b.Config.Permissions (checks if current user or their groups have CAN_MANAGE permissions) // Reads (typed): b.Config.Workspace.CurrentUser (gets current user information) // Provides diagnostic recommendations if the current deployment identity isn't explicitly granted CAN_MANAGE permissions @@ -181,10 +185,6 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // Validates that when using a shared workspace path, appropriate permissions are configured permissions.ValidateSharedRootPermissions(), - // Validate all required fields are set. This is run after variable interpolation and PyDABs mutators - // since they can also set and modify resources. - validate.Required(), - // Annotate resources with "deployment" metadata. // // We don't include this step into initializeResources because these mutators set fields that are From 7516ec1e7887b18e298bc658f48c5dfbcc1ca52a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 14:44:32 +0200 Subject: [PATCH 13/30] deterministic output --- acceptance/acceptance_test.go | 2 +- acceptance/bundle/debug/tf/out.stderr.txt | 2 +- acceptance/bundle/paths/fallback/output.txt | 6 +++- acceptance/bundle/paths/nominal/output.txt | 30 ++++++++++++++--- .../empty_resources/empty_dict/output.txt | 14 ++++++-- .../empty_resources/with_grants/output.txt | 14 ++++++-- .../with_permissions/output.txt | 14 ++++++-- .../validate/volume_defaults/output.txt | 32 +++++++++---------- .../prepend-workspace-var/output.txt | 6 ++-- 9 files changed, 84 insertions(+), 36 deletions(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 336182f35b..1e565d8a7a 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -105,7 +105,7 @@ var Ignored = map[string]bool{ } func TestAccept(t *testing.T) { - testAccept(t, true, "bundle/validate/required") + testAccept(t, InprocessMode, "") } func TestInprocessMode(t *testing.T) { diff --git a/acceptance/bundle/debug/tf/out.stderr.txt b/acceptance/bundle/debug/tf/out.stderr.txt index 6d9d7a1b1b..18c6a2cfb5 100644 --- a/acceptance/bundle/debug/tf/out.stderr.txt +++ b/acceptance/bundle/debug/tf/out.stderr.txt @@ -48,6 +48,7 @@ 10:07:59 Debug: Apply pid=12345 mutator=ProcessStaticResources mutator=TranslatePathsDashboards 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources) 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators) +10:07:59 Debug: Apply pid=12345 mutator=validate:required 10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions 10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths 10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning @@ -57,7 +58,6 @@ 10:07:59 Debug: Apply pid=12345 mutator=apps.Validate 10:07:59 Debug: Apply pid=12345 mutator=ValidateTargetMode 10:07:59 Debug: Apply pid=12345 mutator=ValidateSharedRootPermissions -10:07:59 Debug: Apply pid=12345 mutator=validate:required 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotateJobs 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotatePipelines 10:07:59 Debug: Apply pid=12345 mutator=terraform.Initialize diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt index 41ef87f9b3..67f011a194 100644 --- a/acceptance/bundle/paths/fallback/output.txt +++ b/acceptance/bundle/paths/fallback/output.txt @@ -15,6 +15,10 @@ Warning: required field "entry_point" is not set >>> [CLI] bundle validate -t error +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[4].python_wheel_task + in resources/my_job.yml:33:13 + Error: notebook "resources/this value is overridden" not found. Local notebook references are expected to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] @@ -24,6 +28,6 @@ Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/fallback/error -Found 1 error +Found 1 error and 1 warning Exit code: 1 diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt index c7f7b08e61..7c44604d25 100644 --- a/acceptance/bundle/paths/nominal/output.txt +++ b/acceptance/bundle/paths/nominal/output.txt @@ -1,5 +1,20 @@ >>> [CLI] bundle validate -t development -o json +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[4].python_wheel_task + in resources/my_job.yml:33:13 + override_job.yml:29:17 + +Warning: required field "inputs" is not set + at resources.jobs.my_job.tasks[6].for_each_task + in resources/my_job.yml:47:13 + override_job.yml:45:17 + +Warning: required field "inputs" is not set + at resources.jobs.my_job.tasks[7].for_each_task + in resources/my_job.yml:53:13 + override_job.yml:51:17 + Warning: required field "task_key" is not set at resources.jobs.my_job.tasks[6].for_each_task.task in resources/my_job.yml:48:15 @@ -10,23 +25,28 @@ Warning: required field "task_key" is not set in resources/my_job.yml:54:15 override_job.yml:52:19 + +>>> [CLI] bundle validate -t error Warning: required field "entry_point" is not set at resources.jobs.my_job.tasks[4].python_wheel_task in resources/my_job.yml:33:13 - override_job.yml:29:17 Warning: required field "inputs" is not set at resources.jobs.my_job.tasks[6].for_each_task in resources/my_job.yml:47:13 - override_job.yml:45:17 Warning: required field "inputs" is not set at resources.jobs.my_job.tasks[7].for_each_task in resources/my_job.yml:53:13 - override_job.yml:51:17 +Warning: required field "task_key" is not set + at resources.jobs.my_job.tasks[6].for_each_task.task + in resources/my_job.yml:48:15 + +Warning: required field "task_key" is not set + at resources.jobs.my_job.tasks[7].for_each_task.task + in resources/my_job.yml:54:15 ->>> [CLI] bundle validate -t error Error: notebook "resources/this value is overridden" not found. Local notebook references are expected to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] @@ -36,6 +56,6 @@ Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/nominal/error -Found 1 error +Found 1 error and 5 warnings Exit code: 1 diff --git a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt index 15bc20acbf..34b50a12e9 100644 --- a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt +++ b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt @@ -77,15 +77,15 @@ Warning: required field "schema_name" is not set } === resources.quality_monitors.rname === -Warning: required field "table_name" is not set +Warning: required field "assets_dir" is not set at resources.quality_monitors.rname in databricks.yml:6:12 -Warning: required field "assets_dir" is not set +Warning: required field "output_schema_name" is not set at resources.quality_monitors.rname in databricks.yml:6:12 -Warning: required field "output_schema_name" is not set +Warning: required field "table_name" is not set at resources.quality_monitors.rname in databricks.yml:6:12 @@ -150,6 +150,14 @@ Warning: required field "schema_name" is not set } === resources.apps.rname === +Warning: required field "name" is not set + at resources.apps.rname + in databricks.yml:6:12 + +Warning: required field "source_code_path" is not set + at resources.apps.rname + in databricks.yml:6:12 + Error: Missing app source code path in databricks.yml:6:12 diff --git a/acceptance/bundle/validate/empty_resources/with_grants/output.txt b/acceptance/bundle/validate/empty_resources/with_grants/output.txt index d049da8f80..ecf1ca2f75 100644 --- a/acceptance/bundle/validate/empty_resources/with_grants/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_grants/output.txt @@ -99,15 +99,15 @@ Warning: unknown field: grants at resources.quality_monitors.rname in databricks.yml:7:7 -Warning: required field "table_name" is not set +Warning: required field "assets_dir" is not set at resources.quality_monitors.rname in databricks.yml:7:7 -Warning: required field "assets_dir" is not set +Warning: required field "output_schema_name" is not set at resources.quality_monitors.rname in databricks.yml:7:7 -Warning: required field "output_schema_name" is not set +Warning: required field "table_name" is not set at resources.quality_monitors.rname in databricks.yml:7:7 @@ -187,6 +187,14 @@ Warning: unknown field: grants at resources.apps.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.apps.rname + in databricks.yml:7:7 + +Warning: required field "source_code_path" is not set + at resources.apps.rname + in databricks.yml:7:7 + Error: Missing app source code path in databricks.yml:7:7 diff --git a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt index 3626b86ba7..6ffecee1af 100644 --- a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt @@ -85,15 +85,15 @@ Warning: unknown field: permissions at resources.quality_monitors.rname in databricks.yml:7:7 -Warning: required field "table_name" is not set +Warning: required field "assets_dir" is not set at resources.quality_monitors.rname in databricks.yml:7:7 -Warning: required field "assets_dir" is not set +Warning: required field "output_schema_name" is not set at resources.quality_monitors.rname in databricks.yml:7:7 -Warning: required field "output_schema_name" is not set +Warning: required field "table_name" is not set at resources.quality_monitors.rname in databricks.yml:7:7 @@ -168,6 +168,14 @@ Warning: required field "schema_name" is not set } === resources.apps.rname === +Warning: required field "name" is not set + at resources.apps.rname + in databricks.yml:7:7 + +Warning: required field "source_code_path" is not set + at resources.apps.rname + in databricks.yml:7:7 + Error: Missing app source code path in databricks.yml:7:7 diff --git a/acceptance/bundle/validate/volume_defaults/output.txt b/acceptance/bundle/validate/volume_defaults/output.txt index 986acc18c3..ca01eb5457 100644 --- a/acceptance/bundle/validate/volume_defaults/output.txt +++ b/acceptance/bundle/validate/volume_defaults/output.txt @@ -1,12 +1,8 @@ Warning: required field "catalog_name" is not set - at resources.volumes.v1 - in databricks.yml:6:7 - -Warning: required field "name" is not set - at resources.volumes.v1 - in databricks.yml:6:7 + at resources.volumes.v3 + in databricks.yml:10:7 -Warning: required field "schema_name" is not set +Warning: required field "catalog_name" is not set at resources.volumes.v1 in databricks.yml:6:7 @@ -15,24 +11,28 @@ Warning: required field "catalog_name" is not set in databricks.yml:8:7 Warning: required field "name" is not set - at resources.volumes.v2 - in databricks.yml:8:7 + at resources.volumes.v3 + in databricks.yml:10:7 -Warning: required field "schema_name" is not set +Warning: required field "name" is not set + at resources.volumes.v1 + in databricks.yml:6:7 + +Warning: required field "name" is not set at resources.volumes.v2 in databricks.yml:8:7 -Warning: required field "catalog_name" is not set +Warning: required field "schema_name" is not set at resources.volumes.v3 in databricks.yml:10:7 -Warning: required field "name" is not set - at resources.volumes.v3 - in databricks.yml:10:7 +Warning: required field "schema_name" is not set + at resources.volumes.v1 + in databricks.yml:6:7 Warning: required field "schema_name" is not set - at resources.volumes.v3 - in databricks.yml:10:7 + at resources.volumes.v2 + in databricks.yml:8:7 { "v1": { diff --git a/acceptance/bundle/variables/prepend-workspace-var/output.txt b/acceptance/bundle/variables/prepend-workspace-var/output.txt index 979767b037..201dc769af 100644 --- a/acceptance/bundle/variables/prepend-workspace-var/output.txt +++ b/acceptance/bundle/variables/prepend-workspace-var/output.txt @@ -3,13 +3,13 @@ Warning: required field "entry_point" is not set at resources.jobs.my_job.tasks[0].python_wheel_task in databricks.yml:23:13 +Warning: required field "name" is not set + at bundle + Warning: required field "package_name" is not set at resources.jobs.my_job.tasks[0].python_wheel_task in databricks.yml:23:13 -Warning: required field "name" is not set - at bundle - { "bundle": { "environment": "dev", From 126dfb8f8c1e83492df31afa2dcebdd87658a669 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 12 Jun 2025 14:50:09 +0200 Subject: [PATCH 14/30] -gs --- bundle/config/validate/required.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go index 30324e9521..de739bbd5b 100644 --- a/bundle/config/validate/required.go +++ b/bundle/config/validate/required.go @@ -30,6 +30,9 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics return diag.FromErr(fmt.Errorf("invalid pattern %q for required field validation: %w", k, err)) } + // Note we only emit diagnostics for fields that are not set. If a field is set to a zero + // value we don't emit a diagnostic. This is so that we defer the interpretation of zero values + // to the server. _, err = dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { for _, requiredField := range requiredFields { vv := v.Get(requiredField) From 1ab0efb1bfbd5b36cbb9932848d2580d03f7bafb Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 13 Jun 2025 14:18:09 +0200 Subject: [PATCH 15/30] Add benchmark --- bundle/config/validate/required_test.go | 239 ++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 bundle/config/validate/required_test.go diff --git a/bundle/config/validate/required_test.go b/bundle/config/validate/required_test.go new file mode 100644 index 0000000000..0ccf40ba16 --- /dev/null +++ b/bundle/config/validate/required_test.go @@ -0,0 +1,239 @@ +package validate + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const jobExample = ` +{ + "budget_policy_id": "550e8400-e29b-41d4-a716-446655440000", + "continuous": { + "pause_status": "UNPAUSED" + }, + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "string" + }, + "description": "This job contain multiple tasks that are required to produce the weekly shark sightings report.", + "edit_mode": "UI_LOCKED", + "email_notifications": { + "no_alert_for_skipped_runs": false, + "on_duration_warning_threshold_exceeded": [ + "user.name@databricks.com" + ], + "on_failure": [ + "user.name@databricks.com" + ], + "on_start": [ + "user.name@databricks.com" + ], + "on_streaming_backlog_exceeded": [ + "user.name@databricks.com" + ], + "on_success": [ + "user.name@databricks.com" + ] + }, + "environments": [ + { + "environment_key": "string", + "spec": { + "client": "1", + "dependencies": [ + "string" + ] + } + } + ], + "format": "SINGLE_TASK", + "git_source": { + "git_branch": "main", + "git_provider": "gitHub", + "git_url": "https://github.com/databricks/databricks-cli" + }, + "health": { + "rules": [ + { + "metric": "RUN_DURATION_SECONDS", + "op": "GREATER_THAN", + "value": 10 + } + ] + }, + "job_clusters": [ + { + "job_cluster_key": "auto_scaling_cluster", + "new_cluster": { + "autoscale": { + "max_workers": 16, + "min_workers": 2 + }, + "node_type_id": null, + "spark_conf": { + "spark.speculation": "true" + }, + "spark_version": "7.3.x-scala2.12" + } + } + ], + "max_concurrent_runs": 10, + "name": "A multitask job", + "notification_settings": { + "no_alert_for_canceled_runs": false, + "no_alert_for_skipped_runs": false + }, + "parameters": [ + { + "default": "users", + "name": "table" + } + ], + "performance_target": "PERFORMANCE_OPTIMIZED", + "queue": { + "enabled": true + }, + "run_as": { + "service_principal_name": "692bc6d0-ffa3-11ed-be56-0242ac120002", + "user_name": "user@databricks.com" + }, + "schedule": { + "pause_status": "UNPAUSED", + "quartz_cron_expression": "20 30 * * * ?", + "timezone_id": "Europe/London" + }, + "tags": { + "cost-center": "engineering", + "team": "jobs" + }, + "tasks": [ + { + "depends_on": [], + "description": "Extracts session data from events", + "existing_cluster_id": "0923-164208-meows279", + "libraries": [ + { + "jar": "dbfs:/mnt/databricks/Sessionize.jar" + } + ], + "max_retries": 3, + "min_retry_interval_millis": 2000, + "retry_on_timeout": false, + "spark_jar_task": { + "main_class_name": "com.databricks.Sessionize", + "parameters": [ + "--data", + "dbfs:/path/to/data.json" + ] + }, + "task_key": "Sessionize", + "timeout_seconds": 86400 + }, + { + "depends_on": [], + "description": "Ingests order data", + "job_cluster_key": "auto_scaling_cluster", + "libraries": [ + { + "jar": "dbfs:/mnt/databricks/OrderIngest.jar" + } + ], + "max_retries": 3, + "min_retry_interval_millis": 2000, + "retry_on_timeout": false, + "spark_jar_task": { + "main_class_name": "com.databricks.OrdersIngest", + "parameters": [ + "--data", + "dbfs:/path/to/order-data.json" + ] + }, + "task_key": "Orders_Ingest", + "timeout_seconds": 86400 + }, + { + "depends_on": [ + { + "task_key": "Orders_Ingest" + }, + { + "task_key": "Sessionize" + } + ], + "description": "Matches orders with user sessions", + "max_retries": 3, + "min_retry_interval_millis": 2000, + "new_cluster": { + "autoscale": { + "max_workers": 16, + "min_workers": 2 + }, + "node_type_id": null, + "spark_conf": { + "spark.speculation": "true" + }, + "spark_version": "7.3.x-scala2.12" + }, + "notebook_task": { + "base_parameters": { + "age": "35", + "name": "John Doe" + }, + "notebook_path": "/Users/user.name@databricks.com/Match" + }, + "retry_on_timeout": false, + "run_if": "ALL_SUCCESS", + "timeout_seconds": 86400 + } + ], + "timeout_seconds": 86400, + "trigger": { + "file_arrival": { + "min_time_between_triggers_seconds": 0, + "url": "string", + "wait_after_last_change_seconds": 0 + }, + "pause_status": "UNPAUSED", + "periodic": { + "interval": 0, + "unit": "HOURS" + } + } +}` + +// This benchmark took 3.6 seconds to run on on 13th June 2025. +func BenchmarkValidateRequired(b *testing.B) { + allJobs := map[string]*resources.Job{} + for i := 0; i < 10000; i++ { + // we repeated unmarshal the job to ensure each job occupies distinct memory. + job := jobs.JobSettings{} + err := json.Unmarshal([]byte(jobExample), &job) + require.NoError(b, err) + + allJobs[fmt.Sprintf("s%d", i)] = &resources.Job{ + JobSettings: job, + } + } + + myBundle := bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: allJobs, + }, + }, + } + + for b.Loop() { + diags := bundle.Apply(context.Background(), &myBundle, Required()) + assert.Len(b, diags, 20000) + } +} From 3607bb7c607bbbc9d988f49686c5707ab3eea697 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 13 Jun 2025 14:22:03 +0200 Subject: [PATCH 16/30] add more benchmarks --- bundle/config/validate/required_test.go | 28 ++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/bundle/config/validate/required_test.go b/bundle/config/validate/required_test.go index 0ccf40ba16..b9cf211412 100644 --- a/bundle/config/validate/required_test.go +++ b/bundle/config/validate/required_test.go @@ -210,11 +210,9 @@ const jobExample = ` } }` -// This benchmark took 3.6 seconds to run on on 13th June 2025. -func BenchmarkValidateRequired(b *testing.B) { +func benchmarkRequired(b *testing.B, numJobs int) { allJobs := map[string]*resources.Job{} - for i := 0; i < 10000; i++ { - // we repeated unmarshal the job to ensure each job occupies distinct memory. + for i := 0; i < numJobs; i++ { job := jobs.JobSettings{} err := json.Unmarshal([]byte(jobExample), &job) require.NoError(b, err) @@ -234,6 +232,26 @@ func BenchmarkValidateRequired(b *testing.B) { for b.Loop() { diags := bundle.Apply(context.Background(), &myBundle, Required()) - assert.Len(b, diags, 20000) + assert.Len(b, diags, 2*numJobs) } } + +// This benchmark took 3.5 seconds to run on 13th June 2025. +func BenchmarkValidateRequired10000(b *testing.B) { + benchmarkRequired(b, 10000) +} + +// This benchmark took 0.28 seconds to run on 13th June 2025. +func BenchmarkValidateRequired1000(b *testing.B) { + benchmarkRequired(b, 1000) +} + +// This benchmark took 26 ms to run on 13th June 2025. +func BenchmarkValidateRequired100(b *testing.B) { + benchmarkRequired(b, 100) +} + +// This benchmark took 3 ms to run on 13th June 2025. +func BenchmarkValidateRequired10(b *testing.B) { + benchmarkRequired(b, 10) +} From b112506fdbddefad6cf7f781033ade40b3702310 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 13 Jun 2025 14:26:43 +0200 Subject: [PATCH 17/30] lint --- bundle/config/validate/required_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/config/validate/required_test.go b/bundle/config/validate/required_test.go index b9cf211412..c73604ebe2 100644 --- a/bundle/config/validate/required_test.go +++ b/bundle/config/validate/required_test.go @@ -212,7 +212,7 @@ const jobExample = ` func benchmarkRequired(b *testing.B, numJobs int) { allJobs := map[string]*resources.Job{} - for i := 0; i < numJobs; i++ { + for i := range numJobs { job := jobs.JobSettings{} err := json.Unmarshal([]byte(jobExample), &job) require.NoError(b, err) From 5c85284d8fce6333babc41d9cf5e2dde49813e7d Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 13 Jun 2025 16:28:45 +0200 Subject: [PATCH 18/30] baseline --- bundle/config/validate/required_test.go | 42 +++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/bundle/config/validate/required_test.go b/bundle/config/validate/required_test.go index c73604ebe2..477b215cb9 100644 --- a/bundle/config/validate/required_test.go +++ b/bundle/config/validate/required_test.go @@ -9,6 +9,8 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -236,6 +238,46 @@ func benchmarkRequired(b *testing.B, numJobs int) { } } +func benchmarkBaseline(b *testing.B, numJobs int) { + allJobs := map[string]*resources.Job{} + for i := range numJobs { + job := jobs.JobSettings{} + err := json.Unmarshal([]byte(jobExample), &job) + require.NoError(b, err) + + allJobs[fmt.Sprintf("s%d", i)] = &resources.Job{ + JobSettings: job, + } + } + + myBundle := bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: allJobs, + }, + }, + } + + for b.Loop() { + paths := []dyn.Path{} + bundle.ApplyFunc(context.Background(), &myBundle, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + dyn.Walk(b.Config.Value(), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + paths = append(paths, p) + return v, nil + }) + return nil + }) + } +} + +// This benchmark took 1.43 seconds to run on 13th June 2025. +// This benchmark compared the additional cost of using the dyn.MapByPattern +// function compared to using dyn.Walk which walks the entire configuration tree +// once. +func BenchmarkBaseline(b *testing.B) { + benchmarkBaseline(b, 10000) +} + // This benchmark took 3.5 seconds to run on 13th June 2025. func BenchmarkValidateRequired10000(b *testing.B) { benchmarkRequired(b, 10000) From b95da1b2fa7ca00ca10204a18917c635b67a1f87 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 13 Jun 2025 16:38:26 +0200 Subject: [PATCH 19/30] refactor --- bundle/config/validate/required_test.go | 29 +++++++------------------ 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/bundle/config/validate/required_test.go b/bundle/config/validate/required_test.go index 477b215cb9..c12bb3736d 100644 --- a/bundle/config/validate/required_test.go +++ b/bundle/config/validate/required_test.go @@ -212,25 +212,29 @@ const jobExample = ` } }` -func benchmarkRequired(b *testing.B, numJobs int) { +func setupBundle(b *testing.B, numJobs int) bundle.Bundle { allJobs := map[string]*resources.Job{} for i := range numJobs { job := jobs.JobSettings{} err := json.Unmarshal([]byte(jobExample), &job) require.NoError(b, err) - allJobs[fmt.Sprintf("s%d", i)] = &resources.Job{ + allJobs[fmt.Sprintf("%d", i)] = &resources.Job{ JobSettings: job, } } - myBundle := bundle.Bundle{ + return bundle.Bundle{ Config: config.Root{ Resources: config.Resources{ Jobs: allJobs, }, }, } +} + +func benchmarkRequired(b *testing.B, numJobs int) { + myBundle := setupBundle(b, numJobs) for b.Loop() { diags := bundle.Apply(context.Background(), &myBundle, Required()) @@ -239,24 +243,7 @@ func benchmarkRequired(b *testing.B, numJobs int) { } func benchmarkBaseline(b *testing.B, numJobs int) { - allJobs := map[string]*resources.Job{} - for i := range numJobs { - job := jobs.JobSettings{} - err := json.Unmarshal([]byte(jobExample), &job) - require.NoError(b, err) - - allJobs[fmt.Sprintf("s%d", i)] = &resources.Job{ - JobSettings: job, - } - } - - myBundle := bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: allJobs, - }, - }, - } + myBundle := setupBundle(b, numJobs) for b.Loop() { paths := []dyn.Path{} From b405af6d4d9323d14f7ddbbcf5c8c162ae1f64fc Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 17 Jun 2025 18:24:04 +0200 Subject: [PATCH 20/30] more efficient validation --- bundle/config/validate/required.go | 83 +++++++--- libs/dyn/pattern.go | 33 ++++ libs/dyn/pattern_test.go | 18 +++ libs/dyn/walk_read.go | 41 +++++ libs/dyn/walk_read_test.go | 251 +++++++++++++++++++++++++++++ 5 files changed, 403 insertions(+), 23 deletions(-) create mode 100644 libs/dyn/walk_read.go create mode 100644 libs/dyn/walk_read_test.go diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go index de739bbd5b..7e379e95da 100644 --- a/bundle/config/validate/required.go +++ b/bundle/config/validate/required.go @@ -24,38 +24,75 @@ func (f *required) Name() string { func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { diags := diag.Diagnostics{} - for k, requiredFields := range generated.RequiredFields { + // Generate prefix tree for all required fields. + trie := dyn.NewPatternTrie() + for k := range generated.RequiredFields { pattern, err := dyn.NewPatternFromString(k) if err != nil { return diag.FromErr(fmt.Errorf("invalid pattern %q for required field validation: %w", k, err)) } - // Note we only emit diagnostics for fields that are not set. If a field is set to a zero - // value we don't emit a diagnostic. This is so that we defer the interpretation of zero values - // to the server. - _, err = dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { - for _, requiredField := range requiredFields { - vv := v.Get(requiredField) - if vv.Kind() == dyn.KindInvalid || vv.Kind() == dyn.KindNil { - diags = diags.Append(diag.Diagnostic{ - Severity: diag.Warning, - Summary: fmt.Sprintf("required field %q is not set", requiredField), - Locations: v.Locations(), - Paths: []dyn.Path{p}, - }) - } - } - return v, nil - }) - if dyn.IsExpectedMapError(err) || dyn.IsExpectedSequenceError(err) || dyn.IsExpectedMapToIndexError(err) || dyn.IsExpectedSequenceToIndexError(err) { - // No map or sequence value is set at this pattern, so we ignore it. - continue - } + err = trie.Insert(pattern) if err != nil { - return diag.FromErr(err) + return diag.FromErr(fmt.Errorf("failed to insert pattern %q into trie: %w", k, err)) } } + dyn.WalkRead(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { + // If the path is not preset in the prefix tree, we do not need to validate any required + // fields in it. + pattern, ok := trie.SearchPath(p) + if !ok { + return nil + } + + fields := generated.RequiredFields[pattern.String()] + for _, field := range fields { + vv := v.Get(field) + if vv.Kind() == dyn.KindInvalid || vv.Kind() == dyn.KindNil { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("required field %q is not set", field), + Locations: v.Locations(), + Paths: []dyn.Path{p}, + }) + } + } + return nil + }) + + // for k, requiredFields := range generated.RequiredFields { + // pattern, err := dyn.NewPatternFromString(k) + // if err != nil { + // return diag.FromErr(fmt.Errorf("invalid pattern %q for required field validation: %w", k, err)) + // } + + // // Note we only emit diagnostics for fields that are not set. If a field is set to a zero + // // value we don't emit a diagnostic. This is so that we defer the interpretation of zero values + // // to the server. + // _, err = dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + // for _, requiredField := range requiredFields { + // vv := v.Get(requiredField) + // if vv.Kind() == dyn.KindInvalid || vv.Kind() == dyn.KindNil { + // diags = diags.Append(diag.Diagnostic{ + // Severity: diag.Warning, + // Summary: fmt.Sprintf("required field %q is not set", requiredField), + // Locations: v.Locations(), + // Paths: []dyn.Path{p}, + // }) + // } + // } + // return v, nil + // }) + // if dyn.IsExpectedMapError(err) || dyn.IsExpectedSequenceError(err) || dyn.IsExpectedMapToIndexError(err) || dyn.IsExpectedSequenceToIndexError(err) { + // // No map or sequence value is set at this pattern, so we ignore it. + // continue + // } + // if err != nil { + // return diag.FromErr(err) + // } + // } + // Sort diagnostics to make them deterministic sort.Slice(diags, func(i, j int) bool { // First sort by summary diff --git a/libs/dyn/pattern.go b/libs/dyn/pattern.go index efbe499761..3b08004a9b 100644 --- a/libs/dyn/pattern.go +++ b/libs/dyn/pattern.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "slices" + "strings" ) // Pattern represents a matcher for paths in a [Value] configuration tree. @@ -12,6 +13,38 @@ import ( // The reverse is not true; not every [Pattern] is a valid [Path], as patterns may contain wildcards. type Pattern []patternComponent +func (p Pattern) String() string { + buf := strings.Builder{} + first := true + + for _, c := range p { + switch c.(type) { + case anyKeyComponent: + if !first { + buf.WriteString(".") + } + buf.WriteString("*") + case anyIndexComponent: + buf.WriteString("[*]") + case pathComponent: + v := c.(pathComponent) + if v.isKey() { + if !first { + buf.WriteString(".") + } + buf.WriteString(v.Key()) + } else { + buf.WriteString(fmt.Sprintf("[%d]", v.Index())) + } + default: + buf.WriteString("???") + } + + first = false + } + return buf.String() +} + // A pattern component can visit a [Value] and recursively call into [visit] for matching elements. // Fixed components can match a single key or index, while wildcards can match any key or index. type patternComponent interface { diff --git a/libs/dyn/pattern_test.go b/libs/dyn/pattern_test.go index edc0c32660..a4ac0d7a3a 100644 --- a/libs/dyn/pattern_test.go +++ b/libs/dyn/pattern_test.go @@ -8,6 +8,24 @@ import ( assert "github.com/databricks/cli/libs/dyn/dynassert" ) +func TestPatternString(t *testing.T) { + patterns := []string{ + "foo.bar", + "foo.bar.baz", + "foo[1]", + "foo[*]", + "*[1].bar", + "foo.*.bar", + "foo[*].bar", + "", + } + + for _, p := range patterns { + pp := dyn.MustPatternFromString(p) + assert.Equal(t, p, pp.String()) + } +} + func TestNewPattern(t *testing.T) { pat := dyn.NewPattern( dyn.Key("foo"), diff --git a/libs/dyn/walk_read.go b/libs/dyn/walk_read.go new file mode 100644 index 0000000000..46d9da9251 --- /dev/null +++ b/libs/dyn/walk_read.go @@ -0,0 +1,41 @@ +package dyn + +// WalkRead walks the configuration tree in readonly mode and calls the given function on each node. +// The callback may return ErrSkip to skip traversal of a subtree. +// If the callback returns another error, the walk is aborted, and the error is returned. +func WalkRead(v Value, fn func(p Path, v Value) error) error { + return walkRead(v, EmptyPath, fn) +} + +// Unexported counterpart to WalkRead. +// It carries the path leading up to the current node, +// such that it can be passed to the callback function. +func walkRead(v Value, p Path, fn func(p Path, v Value) error) error { + if err := fn(p, v); err != nil { + if err == ErrSkip { + return nil + } + return err + } + + switch v.Kind() { + case KindMap: + m := v.MustMap() + for _, pair := range m.Pairs() { + pk := pair.Key + pv := pair.Value + if err := walkRead(pv, append(p, Key(pk.MustString())), fn); err != nil { + return err + } + } + case KindSequence: + s := v.MustSequence() + for i := range s { + if err := walkRead(s[i], append(p, Index(i)), fn); err != nil { + return err + } + } + } + + return nil +} diff --git a/libs/dyn/walk_read_test.go b/libs/dyn/walk_read_test.go new file mode 100644 index 0000000000..319cf59c3a --- /dev/null +++ b/libs/dyn/walk_read_test.go @@ -0,0 +1,251 @@ +package dyn + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestWalkRead(t *testing.T) { + tests := []struct { + name string + input Value + expectedPaths []Path + expectedValues []Value + }{ + { + name: "simple map", + input: V(map[string]Value{ + "a": V("1"), + "b": V("2"), + }), + expectedPaths: []Path{ + EmptyPath, + {Key("a")}, + {Key("b")}, + }, + expectedValues: []Value{ + V(map[string]Value{ + "a": V("1"), + "b": V("2"), + }), + V("1"), + V("2"), + }, + }, + { + name: "nested map", + input: V(map[string]Value{ + "a": V(map[string]Value{ + "b": V("1"), + }), + }), + expectedPaths: []Path{ + EmptyPath, + {Key("a")}, + {Key("a"), Key("b")}, + }, + expectedValues: []Value{ + V(map[string]Value{ + "a": V(map[string]Value{ + "b": V("1"), + }), + }), + V(map[string]Value{ + "b": V("1"), + }), + V("1"), + }, + }, + { + name: "sequence", + input: V([]Value{ + V("1"), + V("2"), + }), + expectedPaths: []Path{ + EmptyPath, + {Index(0)}, + {Index(1)}, + }, + expectedValues: []Value{ + V([]Value{ + V("1"), + V("2"), + }), + V("1"), + V("2"), + }, + }, + { + name: "nested sequence", + input: V([]Value{ + V([]Value{ + V("1"), + }), + }), + expectedPaths: []Path{ + EmptyPath, + {Index(0)}, + {Index(0), Index(0)}, + }, + expectedValues: []Value{ + V([]Value{ + V([]Value{ + V("1"), + }), + }), + V([]Value{ + V("1"), + }), + V("1"), + }, + }, + { + name: "complex structure", + input: V(map[string]Value{ + "a": V([]Value{ + V(map[string]Value{ + "b": V("1"), + }), + }), + }), + expectedPaths: []Path{ + EmptyPath, + {Key("a")}, + {Key("a"), Index(0)}, + {Key("a"), Index(0), Key("b")}, + }, + expectedValues: []Value{ + V(map[string]Value{ + "a": V([]Value{ + V(map[string]Value{ + "b": V("1"), + }), + }), + }), + V([]Value{ + V(map[string]Value{ + "b": V("1"), + }), + }), + V(map[string]Value{ + "b": V("1"), + }), + V("1"), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var visited []struct { + path Path + value Value + } + + err := WalkRead(tt.input, func(p Path, v Value) error { + visited = append(visited, struct { + path Path + value Value + }{p, v}) + return nil + }) + if err != nil { + t.Errorf("WalkRead() error = %v", err) + } + + if len(visited) != len(tt.expectedPaths) { + t.Errorf("WalkRead() visited %d nodes, want %d", len(visited), len(tt.expectedPaths)) + } + + for i, v := range visited { + if !v.path.Equal(tt.expectedPaths[i]) { + t.Errorf("WalkRead() path[%d] = %v, want %v", i, v.path, tt.expectedPaths[i]) + } + if v.value.AsAny() != tt.expectedValues[i].AsAny() { + t.Errorf("WalkRead() value[%d] = %v, want %v", i, v.value.AsAny(), tt.expectedValues[i].AsAny()) + } + } + }) + } +} + +func TestWalkReadError(t *testing.T) { + testErr := errors.New("test error") + input := V(map[string]Value{ + "a": V("1"), + }) + + err := WalkRead(input, func(p Path, v Value) error { + if p.Equal(Path{Key("a")}) { + return testErr + } + return nil + }) + + assert.Equal(t, err, testErr) +} + +func TestWalkReadSkipPaths(t *testing.T) { + input := V(map[string]Value{ + "a": V(map[string]Value{ + "b": V("1"), + "c": V("2"), + }), + "d": V(map[string]Value{ + "e": V("3"), + }), + "f": V("4"), + }) + + skipPaths := map[string]bool{ + "a.b": true, + "d": true, + } + + var visitedPaths []Path + var visitedValues []Value + + err := WalkRead(input, func(p Path, v Value) error { + _, ok := skipPaths[p.String()] + if ok { + return ErrSkip + } + + visitedPaths = append(visitedPaths, p) + visitedValues = append(visitedValues, v) + return nil + }) + require.NoError(t, err) + + expectedPaths := []Path{ + EmptyPath, + {Key("a")}, + {Key("a"), Key("c")}, + {Key("f")}, + } + expectedValues := []Value{ + V(map[string]Value{ + "a": V(map[string]Value{ + "b": V("1"), + "c": V("2"), + }), + "d": V(map[string]Value{ + "e": V("3"), + }), + "f": V("4"), + }), + V(map[string]Value{ + "b": V("1"), + "c": V("2"), + }), + V("2"), + V("4"), + } + + assert.Equal(t, visitedPaths, expectedPaths) + assert.Equal(t, visitedValues, expectedValues) +} From 578112a952078f245adc5ffe23ef4f0d1c7e97fb Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 10 Jul 2025 23:17:37 +0200 Subject: [PATCH 21/30] update the PR with latest changes --- .../bundle/validate/required/out.test.toml | 5 + bundle/config/validate/required.go | 36 +-- bundle/config/validate/required_test.go | 286 ------------------ bundle/internal/bundletest/benchmark.go | 26 ++ .../bundletest/mutator_benchmark_test.go | 76 +++++ libs/dyn/walk_read.go | 41 --- libs/dyn/walk_read_test.go | 251 --------------- 7 files changed, 109 insertions(+), 612 deletions(-) create mode 100644 acceptance/bundle/validate/required/out.test.toml delete mode 100644 bundle/config/validate/required_test.go create mode 100644 bundle/internal/bundletest/mutator_benchmark_test.go delete mode 100644 libs/dyn/walk_read.go delete mode 100644 libs/dyn/walk_read_test.go diff --git a/acceptance/bundle/validate/required/out.test.toml b/acceptance/bundle/validate/required/out.test.toml new file mode 100644 index 0000000000..8f3575be7b --- /dev/null +++ b/acceptance/bundle/validate/required/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go index 7e379e95da..319367c85f 100644 --- a/bundle/config/validate/required.go +++ b/bundle/config/validate/required.go @@ -25,7 +25,7 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics diags := diag.Diagnostics{} // Generate prefix tree for all required fields. - trie := dyn.NewPatternTrie() + trie := &dyn.TrieNode{} for k := range generated.RequiredFields { pattern, err := dyn.NewPatternFromString(k) if err != nil { @@ -38,7 +38,7 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics } } - dyn.WalkRead(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { + dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { // If the path is not preset in the prefix tree, we do not need to validate any required // fields in it. pattern, ok := trie.SearchPath(p) @@ -61,38 +61,6 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics return nil }) - // for k, requiredFields := range generated.RequiredFields { - // pattern, err := dyn.NewPatternFromString(k) - // if err != nil { - // return diag.FromErr(fmt.Errorf("invalid pattern %q for required field validation: %w", k, err)) - // } - - // // Note we only emit diagnostics for fields that are not set. If a field is set to a zero - // // value we don't emit a diagnostic. This is so that we defer the interpretation of zero values - // // to the server. - // _, err = dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { - // for _, requiredField := range requiredFields { - // vv := v.Get(requiredField) - // if vv.Kind() == dyn.KindInvalid || vv.Kind() == dyn.KindNil { - // diags = diags.Append(diag.Diagnostic{ - // Severity: diag.Warning, - // Summary: fmt.Sprintf("required field %q is not set", requiredField), - // Locations: v.Locations(), - // Paths: []dyn.Path{p}, - // }) - // } - // } - // return v, nil - // }) - // if dyn.IsExpectedMapError(err) || dyn.IsExpectedSequenceError(err) || dyn.IsExpectedMapToIndexError(err) || dyn.IsExpectedSequenceToIndexError(err) { - // // No map or sequence value is set at this pattern, so we ignore it. - // continue - // } - // if err != nil { - // return diag.FromErr(err) - // } - // } - // Sort diagnostics to make them deterministic sort.Slice(diags, func(i, j int) bool { // First sort by summary diff --git a/bundle/config/validate/required_test.go b/bundle/config/validate/required_test.go deleted file mode 100644 index c12bb3736d..0000000000 --- a/bundle/config/validate/required_test.go +++ /dev/null @@ -1,286 +0,0 @@ -package validate - -import ( - "context" - "encoding/json" - "fmt" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" - "github.com/databricks/databricks-sdk-go/service/jobs" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -const jobExample = ` -{ - "budget_policy_id": "550e8400-e29b-41d4-a716-446655440000", - "continuous": { - "pause_status": "UNPAUSED" - }, - "deployment": { - "kind": "BUNDLE", - "metadata_file_path": "string" - }, - "description": "This job contain multiple tasks that are required to produce the weekly shark sightings report.", - "edit_mode": "UI_LOCKED", - "email_notifications": { - "no_alert_for_skipped_runs": false, - "on_duration_warning_threshold_exceeded": [ - "user.name@databricks.com" - ], - "on_failure": [ - "user.name@databricks.com" - ], - "on_start": [ - "user.name@databricks.com" - ], - "on_streaming_backlog_exceeded": [ - "user.name@databricks.com" - ], - "on_success": [ - "user.name@databricks.com" - ] - }, - "environments": [ - { - "environment_key": "string", - "spec": { - "client": "1", - "dependencies": [ - "string" - ] - } - } - ], - "format": "SINGLE_TASK", - "git_source": { - "git_branch": "main", - "git_provider": "gitHub", - "git_url": "https://github.com/databricks/databricks-cli" - }, - "health": { - "rules": [ - { - "metric": "RUN_DURATION_SECONDS", - "op": "GREATER_THAN", - "value": 10 - } - ] - }, - "job_clusters": [ - { - "job_cluster_key": "auto_scaling_cluster", - "new_cluster": { - "autoscale": { - "max_workers": 16, - "min_workers": 2 - }, - "node_type_id": null, - "spark_conf": { - "spark.speculation": "true" - }, - "spark_version": "7.3.x-scala2.12" - } - } - ], - "max_concurrent_runs": 10, - "name": "A multitask job", - "notification_settings": { - "no_alert_for_canceled_runs": false, - "no_alert_for_skipped_runs": false - }, - "parameters": [ - { - "default": "users", - "name": "table" - } - ], - "performance_target": "PERFORMANCE_OPTIMIZED", - "queue": { - "enabled": true - }, - "run_as": { - "service_principal_name": "692bc6d0-ffa3-11ed-be56-0242ac120002", - "user_name": "user@databricks.com" - }, - "schedule": { - "pause_status": "UNPAUSED", - "quartz_cron_expression": "20 30 * * * ?", - "timezone_id": "Europe/London" - }, - "tags": { - "cost-center": "engineering", - "team": "jobs" - }, - "tasks": [ - { - "depends_on": [], - "description": "Extracts session data from events", - "existing_cluster_id": "0923-164208-meows279", - "libraries": [ - { - "jar": "dbfs:/mnt/databricks/Sessionize.jar" - } - ], - "max_retries": 3, - "min_retry_interval_millis": 2000, - "retry_on_timeout": false, - "spark_jar_task": { - "main_class_name": "com.databricks.Sessionize", - "parameters": [ - "--data", - "dbfs:/path/to/data.json" - ] - }, - "task_key": "Sessionize", - "timeout_seconds": 86400 - }, - { - "depends_on": [], - "description": "Ingests order data", - "job_cluster_key": "auto_scaling_cluster", - "libraries": [ - { - "jar": "dbfs:/mnt/databricks/OrderIngest.jar" - } - ], - "max_retries": 3, - "min_retry_interval_millis": 2000, - "retry_on_timeout": false, - "spark_jar_task": { - "main_class_name": "com.databricks.OrdersIngest", - "parameters": [ - "--data", - "dbfs:/path/to/order-data.json" - ] - }, - "task_key": "Orders_Ingest", - "timeout_seconds": 86400 - }, - { - "depends_on": [ - { - "task_key": "Orders_Ingest" - }, - { - "task_key": "Sessionize" - } - ], - "description": "Matches orders with user sessions", - "max_retries": 3, - "min_retry_interval_millis": 2000, - "new_cluster": { - "autoscale": { - "max_workers": 16, - "min_workers": 2 - }, - "node_type_id": null, - "spark_conf": { - "spark.speculation": "true" - }, - "spark_version": "7.3.x-scala2.12" - }, - "notebook_task": { - "base_parameters": { - "age": "35", - "name": "John Doe" - }, - "notebook_path": "/Users/user.name@databricks.com/Match" - }, - "retry_on_timeout": false, - "run_if": "ALL_SUCCESS", - "timeout_seconds": 86400 - } - ], - "timeout_seconds": 86400, - "trigger": { - "file_arrival": { - "min_time_between_triggers_seconds": 0, - "url": "string", - "wait_after_last_change_seconds": 0 - }, - "pause_status": "UNPAUSED", - "periodic": { - "interval": 0, - "unit": "HOURS" - } - } -}` - -func setupBundle(b *testing.B, numJobs int) bundle.Bundle { - allJobs := map[string]*resources.Job{} - for i := range numJobs { - job := jobs.JobSettings{} - err := json.Unmarshal([]byte(jobExample), &job) - require.NoError(b, err) - - allJobs[fmt.Sprintf("%d", i)] = &resources.Job{ - JobSettings: job, - } - } - - return bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: allJobs, - }, - }, - } -} - -func benchmarkRequired(b *testing.B, numJobs int) { - myBundle := setupBundle(b, numJobs) - - for b.Loop() { - diags := bundle.Apply(context.Background(), &myBundle, Required()) - assert.Len(b, diags, 2*numJobs) - } -} - -func benchmarkBaseline(b *testing.B, numJobs int) { - myBundle := setupBundle(b, numJobs) - - for b.Loop() { - paths := []dyn.Path{} - bundle.ApplyFunc(context.Background(), &myBundle, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - dyn.Walk(b.Config.Value(), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { - paths = append(paths, p) - return v, nil - }) - return nil - }) - } -} - -// This benchmark took 1.43 seconds to run on 13th June 2025. -// This benchmark compared the additional cost of using the dyn.MapByPattern -// function compared to using dyn.Walk which walks the entire configuration tree -// once. -func BenchmarkBaseline(b *testing.B) { - benchmarkBaseline(b, 10000) -} - -// This benchmark took 3.5 seconds to run on 13th June 2025. -func BenchmarkValidateRequired10000(b *testing.B) { - benchmarkRequired(b, 10000) -} - -// This benchmark took 0.28 seconds to run on 13th June 2025. -func BenchmarkValidateRequired1000(b *testing.B) { - benchmarkRequired(b, 1000) -} - -// This benchmark took 26 ms to run on 13th June 2025. -func BenchmarkValidateRequired100(b *testing.B) { - benchmarkRequired(b, 100) -} - -// This benchmark took 3 ms to run on 13th June 2025. -func BenchmarkValidateRequired10(b *testing.B) { - benchmarkRequired(b, 10) -} diff --git a/bundle/internal/bundletest/benchmark.go b/bundle/internal/bundletest/benchmark.go index 8b109f5dc4..6a6b40e4c7 100644 --- a/bundle/internal/bundletest/benchmark.go +++ b/bundle/internal/bundletest/benchmark.go @@ -235,3 +235,29 @@ func BundleV(b *testing.B, numJobs int) dyn.Value { return myBundle.Config.Value() } + +func Bundle(b *testing.B, numJobs int) *bundle.Bundle { + allJobs := map[string]*resources.Job{} + for i := range numJobs { + job := jobs.JobSettings{} + err := json.Unmarshal([]byte(jobExample), &job) + require.NoError(b, err) + + allJobs[strconv.Itoa(i)] = &resources.Job{ + JobSettings: job, + } + } + + myBundle := bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: allJobs, + }, + }, + } + + // Apply noop mutator to initialize the bundle value. + bundle.ApplyFuncContext(context.Background(), &myBundle, func(ctx context.Context, b *bundle.Bundle) {}) + + return &myBundle +} diff --git a/bundle/internal/bundletest/mutator_benchmark_test.go b/bundle/internal/bundletest/mutator_benchmark_test.go new file mode 100644 index 0000000000..a5dd74e043 --- /dev/null +++ b/bundle/internal/bundletest/mutator_benchmark_test.go @@ -0,0 +1,76 @@ +package bundletest + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/validate" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/assert" +) + +func benchmarkRequiredMutator(b *testing.B, numJobs int) { + myBundle := Bundle(b, numJobs) + + var diags diag.Diagnostics + for b.Loop() { + diags = bundle.Apply(context.Background(), myBundle, validate.Required()) + } + assert.NotEmpty(b, diags) +} + +func benchmarkWalkReadOnlyBaseline(b *testing.B, numJobs int) { + myBundle := Bundle(b, numJobs) + + for b.Loop() { + paths := []dyn.Path{} + bundle.ApplyFuncContext(context.Background(), myBundle, func(ctx context.Context, b *bundle.Bundle) { + dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { + paths = append(paths, p) + return nil + }) + }) + } +} + +func benchmarkNoopBaseline(b *testing.B, numJobs int) { + myBundle := Bundle(b, numJobs) + + for b.Loop() { + bundle.ApplyFuncContext(context.Background(), myBundle, func(ctx context.Context, b *bundle.Bundle) { + return + }) + } +} + +// This benchmark took 823ms to run on 10th July 2025. +func BenchmarkWalkReadOnlyBaseline(b *testing.B) { + benchmarkWalkReadOnlyBaseline(b, 10000) +} + +// This benchmark took 774ms to run on 10th July 2025. +func BenchmarkNoopBaseline(b *testing.B) { + benchmarkNoopBaseline(b, 10000) +} + +// This benchmark took 996ms to run on 10th July 2025. +func BenchmarkValidateRequired10000(b *testing.B) { + benchmarkRequiredMutator(b, 10000) +} + +// This benchmark took 98ms to run on 10th July 2025.l +func BenchmarkValidateRequired1000(b *testing.B) { + benchmarkRequiredMutator(b, 1000) +} + +// This benchmark took 10ms to run on 10th July 2025. +func BenchmarkValidateRequired100(b *testing.B) { + benchmarkRequiredMutator(b, 100) +} + +// This benchmark took 1.1ms to run on 10th July 2025. +func BenchmarkValidateRequired10(b *testing.B) { + benchmarkRequiredMutator(b, 10) +} diff --git a/libs/dyn/walk_read.go b/libs/dyn/walk_read.go deleted file mode 100644 index 46d9da9251..0000000000 --- a/libs/dyn/walk_read.go +++ /dev/null @@ -1,41 +0,0 @@ -package dyn - -// WalkRead walks the configuration tree in readonly mode and calls the given function on each node. -// The callback may return ErrSkip to skip traversal of a subtree. -// If the callback returns another error, the walk is aborted, and the error is returned. -func WalkRead(v Value, fn func(p Path, v Value) error) error { - return walkRead(v, EmptyPath, fn) -} - -// Unexported counterpart to WalkRead. -// It carries the path leading up to the current node, -// such that it can be passed to the callback function. -func walkRead(v Value, p Path, fn func(p Path, v Value) error) error { - if err := fn(p, v); err != nil { - if err == ErrSkip { - return nil - } - return err - } - - switch v.Kind() { - case KindMap: - m := v.MustMap() - for _, pair := range m.Pairs() { - pk := pair.Key - pv := pair.Value - if err := walkRead(pv, append(p, Key(pk.MustString())), fn); err != nil { - return err - } - } - case KindSequence: - s := v.MustSequence() - for i := range s { - if err := walkRead(s[i], append(p, Index(i)), fn); err != nil { - return err - } - } - } - - return nil -} diff --git a/libs/dyn/walk_read_test.go b/libs/dyn/walk_read_test.go deleted file mode 100644 index 319cf59c3a..0000000000 --- a/libs/dyn/walk_read_test.go +++ /dev/null @@ -1,251 +0,0 @@ -package dyn - -import ( - "errors" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestWalkRead(t *testing.T) { - tests := []struct { - name string - input Value - expectedPaths []Path - expectedValues []Value - }{ - { - name: "simple map", - input: V(map[string]Value{ - "a": V("1"), - "b": V("2"), - }), - expectedPaths: []Path{ - EmptyPath, - {Key("a")}, - {Key("b")}, - }, - expectedValues: []Value{ - V(map[string]Value{ - "a": V("1"), - "b": V("2"), - }), - V("1"), - V("2"), - }, - }, - { - name: "nested map", - input: V(map[string]Value{ - "a": V(map[string]Value{ - "b": V("1"), - }), - }), - expectedPaths: []Path{ - EmptyPath, - {Key("a")}, - {Key("a"), Key("b")}, - }, - expectedValues: []Value{ - V(map[string]Value{ - "a": V(map[string]Value{ - "b": V("1"), - }), - }), - V(map[string]Value{ - "b": V("1"), - }), - V("1"), - }, - }, - { - name: "sequence", - input: V([]Value{ - V("1"), - V("2"), - }), - expectedPaths: []Path{ - EmptyPath, - {Index(0)}, - {Index(1)}, - }, - expectedValues: []Value{ - V([]Value{ - V("1"), - V("2"), - }), - V("1"), - V("2"), - }, - }, - { - name: "nested sequence", - input: V([]Value{ - V([]Value{ - V("1"), - }), - }), - expectedPaths: []Path{ - EmptyPath, - {Index(0)}, - {Index(0), Index(0)}, - }, - expectedValues: []Value{ - V([]Value{ - V([]Value{ - V("1"), - }), - }), - V([]Value{ - V("1"), - }), - V("1"), - }, - }, - { - name: "complex structure", - input: V(map[string]Value{ - "a": V([]Value{ - V(map[string]Value{ - "b": V("1"), - }), - }), - }), - expectedPaths: []Path{ - EmptyPath, - {Key("a")}, - {Key("a"), Index(0)}, - {Key("a"), Index(0), Key("b")}, - }, - expectedValues: []Value{ - V(map[string]Value{ - "a": V([]Value{ - V(map[string]Value{ - "b": V("1"), - }), - }), - }), - V([]Value{ - V(map[string]Value{ - "b": V("1"), - }), - }), - V(map[string]Value{ - "b": V("1"), - }), - V("1"), - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - var visited []struct { - path Path - value Value - } - - err := WalkRead(tt.input, func(p Path, v Value) error { - visited = append(visited, struct { - path Path - value Value - }{p, v}) - return nil - }) - if err != nil { - t.Errorf("WalkRead() error = %v", err) - } - - if len(visited) != len(tt.expectedPaths) { - t.Errorf("WalkRead() visited %d nodes, want %d", len(visited), len(tt.expectedPaths)) - } - - for i, v := range visited { - if !v.path.Equal(tt.expectedPaths[i]) { - t.Errorf("WalkRead() path[%d] = %v, want %v", i, v.path, tt.expectedPaths[i]) - } - if v.value.AsAny() != tt.expectedValues[i].AsAny() { - t.Errorf("WalkRead() value[%d] = %v, want %v", i, v.value.AsAny(), tt.expectedValues[i].AsAny()) - } - } - }) - } -} - -func TestWalkReadError(t *testing.T) { - testErr := errors.New("test error") - input := V(map[string]Value{ - "a": V("1"), - }) - - err := WalkRead(input, func(p Path, v Value) error { - if p.Equal(Path{Key("a")}) { - return testErr - } - return nil - }) - - assert.Equal(t, err, testErr) -} - -func TestWalkReadSkipPaths(t *testing.T) { - input := V(map[string]Value{ - "a": V(map[string]Value{ - "b": V("1"), - "c": V("2"), - }), - "d": V(map[string]Value{ - "e": V("3"), - }), - "f": V("4"), - }) - - skipPaths := map[string]bool{ - "a.b": true, - "d": true, - } - - var visitedPaths []Path - var visitedValues []Value - - err := WalkRead(input, func(p Path, v Value) error { - _, ok := skipPaths[p.String()] - if ok { - return ErrSkip - } - - visitedPaths = append(visitedPaths, p) - visitedValues = append(visitedValues, v) - return nil - }) - require.NoError(t, err) - - expectedPaths := []Path{ - EmptyPath, - {Key("a")}, - {Key("a"), Key("c")}, - {Key("f")}, - } - expectedValues := []Value{ - V(map[string]Value{ - "a": V(map[string]Value{ - "b": V("1"), - "c": V("2"), - }), - "d": V(map[string]Value{ - "e": V("3"), - }), - "f": V("4"), - }), - V(map[string]Value{ - "b": V("1"), - "c": V("2"), - }), - V("2"), - V("4"), - } - - assert.Equal(t, visitedPaths, expectedPaths) - assert.Equal(t, visitedValues, expectedValues) -} From 7612cf9c4f578dc08aa97e73db5f121247d34188 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 10 Jul 2025 23:25:45 +0200 Subject: [PATCH 22/30] lint --- libs/dyn/pattern.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/libs/dyn/pattern.go b/libs/dyn/pattern.go index 3b08004a9b..2a15d12cc3 100644 --- a/libs/dyn/pattern.go +++ b/libs/dyn/pattern.go @@ -18,7 +18,7 @@ func (p Pattern) String() string { first := true for _, c := range p { - switch c.(type) { + switch c := c.(type) { case anyKeyComponent: if !first { buf.WriteString(".") @@ -27,14 +27,13 @@ func (p Pattern) String() string { case anyIndexComponent: buf.WriteString("[*]") case pathComponent: - v := c.(pathComponent) - if v.isKey() { + if c.isKey() { if !first { buf.WriteString(".") } - buf.WriteString(v.Key()) + buf.WriteString(c.Key()) } else { - buf.WriteString(fmt.Sprintf("[%d]", v.Index())) + buf.WriteString(fmt.Sprintf("[%d]", c.Index())) } default: buf.WriteString("???") From 159333e3f653883295726cc9c05ec75b2709ad0b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 11 Jul 2025 14:38:11 +0200 Subject: [PATCH 23/30] update acc tests --- .../bundle/artifacts/issue_3109/output.txt | 4 ++++ acceptance/bundle/debug/direct/out.stderr.txt | 1 + acceptance/bundle/debug/direct/output.txt | 2 +- acceptance/bundle/paths/fallback/output.txt | 12 +++++------ acceptance/bundle/paths/nominal/output.txt | 20 +++++++++---------- acceptance/bundle/quality_monitor/output.txt | 4 ++-- .../prepend-workspace-var/output.txt | 4 ++-- .../workspace/apps/run-local-node/output.txt | 12 +---------- .../bundletest/mutator_benchmark_test.go | 2 +- 9 files changed, 28 insertions(+), 33 deletions(-) diff --git a/acceptance/bundle/artifacts/issue_3109/output.txt b/acceptance/bundle/artifacts/issue_3109/output.txt index 4daf472fd4..9c7806df89 100644 --- a/acceptance/bundle/artifacts/issue_3109/output.txt +++ b/acceptance/bundle/artifacts/issue_3109/output.txt @@ -1,3 +1,7 @@ +Warning: required field "entry_point" is not set + at resources.jobs.job1.tasks[0].job_cluster_key + in src/job1/databricks_job/resources/job1.yaml:16:17 + Building wheels... Uploading dist/my_default_python-0.0.1-py3-none-any.whl... Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/dev/files... diff --git a/acceptance/bundle/debug/direct/out.stderr.txt b/acceptance/bundle/debug/direct/out.stderr.txt index 2f51e4ddd5..d3e3d47a6a 100644 --- a/acceptance/bundle/debug/direct/out.stderr.txt +++ b/acceptance/bundle/debug/direct/out.stderr.txt @@ -50,6 +50,7 @@ 10:07:59 Debug: Apply pid=12345 mutator=ProcessStaticResources mutator=TranslatePathsDashboards 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources) 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators) +10:07:59 Debug: Apply pid=12345 mutator=validate:required 10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions 10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths 10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning diff --git a/acceptance/bundle/debug/direct/output.txt b/acceptance/bundle/debug/direct/output.txt index 269cfc8203..85294af45b 100644 --- a/acceptance/bundle/debug/direct/output.txt +++ b/acceptance/bundle/debug/direct/output.txt @@ -14,7 +14,7 @@ Validation OK! +>>> [CLI] bundle validate --debug 10:07:59 Info: start pid=12345 version=[DEV_VERSION] args="[CLI], bundle, validate, --debug" 10:07:59 Debug: Found bundle root at [TEST_TMP_DIR] (file [TEST_TMP_DIR]/databricks.yml) pid=12345 -@@ -60,8 +62,4 @@ +@@ -61,8 +63,4 @@ 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotateJobs 10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotatePipelines -10:07:59 Debug: Apply pid=12345 mutator=terraform.Initialize diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt index 67f011a194..17b37e7fd9 100644 --- a/acceptance/bundle/paths/fallback/output.txt +++ b/acceptance/bundle/paths/fallback/output.txt @@ -1,5 +1,10 @@ >>> [CLI] bundle validate -t development -o json +Warning: required field "entry_point" is not set + at resources.jobs.my_job.tasks[2].python_wheel_task + in resources/my_job.yml:33:13 + override_job.yml:29:17 + Warn: path ../src/notebook.py is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_job.yml:9:32). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. Warn: path ../src/file.py is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_job.yml:13:30). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. Warn: path ../src/dbt_project is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_job.yml:17:36). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. @@ -8,15 +13,10 @@ Warn: path ../dist/wheel2.whl is defined relative to the [TEST_TMP_DIR]/resource Warn: path ../target/jar2.jar is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_job.yml:41:24). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. Warn: path ../src/notebook2.py is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_pipeline.yml:13:23). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. Warn: path ../src/file2.py is defined relative to the [TEST_TMP_DIR]/resources directory ([TEST_TMP_DIR]/override_pipeline.yml:10:23). Please update the path to be relative to the file where it is defined. The current value will no longer be valid in the next release. -Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[4].python_wheel_task - in resources/my_job.yml:33:13 - override_job.yml:29:17 - >>> [CLI] bundle validate -t error Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[4].python_wheel_task + at resources.jobs.my_job.tasks[2].task_key in resources/my_job.yml:33:13 Error: notebook "resources/this value is overridden" not found. Local notebook references are expected diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt index 7c44604d25..427ea121dd 100644 --- a/acceptance/bundle/paths/nominal/output.txt +++ b/acceptance/bundle/paths/nominal/output.txt @@ -1,50 +1,50 @@ >>> [CLI] bundle validate -t development -o json Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[4].python_wheel_task + at resources.jobs.my_job.tasks[4].task_key in resources/my_job.yml:33:13 override_job.yml:29:17 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[6].for_each_task + at resources.jobs.my_job.tasks[1].for_each_task in resources/my_job.yml:47:13 override_job.yml:45:17 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[7].for_each_task + at resources.jobs.my_job.tasks[2].for_each_task in resources/my_job.yml:53:13 override_job.yml:51:17 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[6].for_each_task.task + at resources.jobs.my_job.tasks[1].for_each_task.task in resources/my_job.yml:48:15 override_job.yml:46:19 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[7].for_each_task.task + at resources.jobs.my_job.tasks[2].for_each_task.task in resources/my_job.yml:54:15 override_job.yml:52:19 >>> [CLI] bundle validate -t error Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[4].python_wheel_task + at resources.jobs.my_job.tasks[4].task_key in resources/my_job.yml:33:13 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[6].for_each_task + at resources.jobs.my_job.tasks[1].task_key in resources/my_job.yml:47:13 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[7].for_each_task + at resources.jobs.my_job.tasks[2].for_each_task in resources/my_job.yml:53:13 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[6].for_each_task.task + at resources.jobs.my_job.tasks[1].task_key.task in resources/my_job.yml:48:15 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[7].for_each_task.task + at resources.jobs.my_job.tasks[2].for_each_task.task in resources/my_job.yml:54:15 Error: notebook "resources/this value is overridden" not found. Local notebook references are expected diff --git a/acceptance/bundle/quality_monitor/output.txt b/acceptance/bundle/quality_monitor/output.txt index 278e116606..b78f09eb89 100644 --- a/acceptance/bundle/quality_monitor/output.txt +++ b/acceptance/bundle/quality_monitor/output.txt @@ -1,11 +1,11 @@ >>> [CLI] bundle validate -o json -t development Warning: required field "quartz_cron_expression" is not set - at resources.quality_monitors.my_monitor.schedule + at resources.quality_monitors.my_monitor.table_name in databricks.yml:17:9 Warning: required field "timezone_id" is not set - at resources.quality_monitors.my_monitor.schedule + at resources.quality_monitors.my_monitor.table_name in databricks.yml:17:9 { diff --git a/acceptance/bundle/variables/prepend-workspace-var/output.txt b/acceptance/bundle/variables/prepend-workspace-var/output.txt index 201dc769af..5e6bc2026f 100644 --- a/acceptance/bundle/variables/prepend-workspace-var/output.txt +++ b/acceptance/bundle/variables/prepend-workspace-var/output.txt @@ -1,13 +1,13 @@ /Workspace should be prepended on all paths, but it is not the case: Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[0].python_wheel_task + at resources.jobs.my_job.tasks[0].task_key in databricks.yml:23:13 Warning: required field "name" is not set at bundle Warning: required field "package_name" is not set - at resources.jobs.my_job.tasks[0].python_wheel_task + at resources.jobs.my_job.tasks[0].task_key in databricks.yml:23:13 { diff --git a/acceptance/cmd/workspace/apps/run-local-node/output.txt b/acceptance/cmd/workspace/apps/run-local-node/output.txt index 0185dbe523..4de672232f 100644 --- a/acceptance/cmd/workspace/apps/run-local-node/output.txt +++ b/acceptance/cmd/workspace/apps/run-local-node/output.txt @@ -1,12 +1,2 @@ -Running command: node -e console.log('Hello, world') -Hello, world -=== Starting the app in background... -=== Waiting -=== Checking app is running... ->>> curl -s -o - http://127.0.0.1:$(port) -{"message":"Hello From App","timestamp":"[TIMESTAMP]","status":"running"} - -=== Sending shutdown request... ->>> curl -s -o /dev/null http://127.0.0.1:$(port)/shutdown -Process terminated +Exit code: 1 diff --git a/bundle/internal/bundletest/mutator_benchmark_test.go b/bundle/internal/bundletest/mutator_benchmark_test.go index a5dd74e043..5a14efc67c 100644 --- a/bundle/internal/bundletest/mutator_benchmark_test.go +++ b/bundle/internal/bundletest/mutator_benchmark_test.go @@ -60,7 +60,7 @@ func BenchmarkValidateRequired10000(b *testing.B) { benchmarkRequiredMutator(b, 10000) } -// This benchmark took 98ms to run on 10th July 2025.l +// This benchmark took 98ms to run on 10th July 2025. func BenchmarkValidateRequired1000(b *testing.B) { benchmarkRequiredMutator(b, 1000) } From 2c68220458fd5663d60a6d35d5103752556a383a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 11 Jul 2025 14:42:20 +0200 Subject: [PATCH 24/30] revert node test --- .../bundle/artifacts/issue_3109/output.txt | 2 +- acceptance/bundle/paths/fallback/output.txt | 4 ++-- acceptance/bundle/paths/nominal/output.txt | 16 ++++++++-------- acceptance/bundle/quality_monitor/output.txt | 4 ++-- .../cmd/workspace/apps/run-local-node/output.txt | 12 +++++++++++- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/acceptance/bundle/artifacts/issue_3109/output.txt b/acceptance/bundle/artifacts/issue_3109/output.txt index 9c7806df89..ff6b9968ce 100644 --- a/acceptance/bundle/artifacts/issue_3109/output.txt +++ b/acceptance/bundle/artifacts/issue_3109/output.txt @@ -1,5 +1,5 @@ Warning: required field "entry_point" is not set - at resources.jobs.job1.tasks[0].job_cluster_key + at resources.jobs.job1.tasks[0].python_wheel_task in src/job1/databricks_job/resources/job1.yaml:16:17 Building wheels... diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt index 17b37e7fd9..699c01d629 100644 --- a/acceptance/bundle/paths/fallback/output.txt +++ b/acceptance/bundle/paths/fallback/output.txt @@ -1,7 +1,7 @@ >>> [CLI] bundle validate -t development -o json Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[2].python_wheel_task + at resources.jobs.my_job.tasks[2].task_key in resources/my_job.yml:33:13 override_job.yml:29:17 @@ -16,7 +16,7 @@ Warn: path ../src/file2.py is defined relative to the [TEST_TMP_DIR]/resources d >>> [CLI] bundle validate -t error Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[2].task_key + at resources.jobs.my_job.tasks[2].job_cluster_key in resources/my_job.yml:33:13 Error: notebook "resources/this value is overridden" not found. Local notebook references are expected diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt index 427ea121dd..4230f1fe73 100644 --- a/acceptance/bundle/paths/nominal/output.txt +++ b/acceptance/bundle/paths/nominal/output.txt @@ -1,38 +1,38 @@ >>> [CLI] bundle validate -t development -o json Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[4].task_key + at resources.jobs.my_job.tasks[4].python_wheel_task in resources/my_job.yml:33:13 override_job.yml:29:17 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[1].for_each_task + at resources.jobs.my_job.tasks[1].job_cluster_key in resources/my_job.yml:47:13 override_job.yml:45:17 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[2].for_each_task + at resources.jobs.my_job.tasks[2].task_key in resources/my_job.yml:53:13 override_job.yml:51:17 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[1].for_each_task.task + at resources.jobs.my_job.tasks[1].job_cluster_key.task in resources/my_job.yml:48:15 override_job.yml:46:19 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[2].for_each_task.task + at resources.jobs.my_job.tasks[2].task_key.task in resources/my_job.yml:54:15 override_job.yml:52:19 >>> [CLI] bundle validate -t error Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[4].task_key + at resources.jobs.my_job.tasks[4].libraries in resources/my_job.yml:33:13 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[1].task_key + at resources.jobs.my_job.tasks[1].job_cluster_key in resources/my_job.yml:47:13 Warning: required field "inputs" is not set @@ -40,7 +40,7 @@ Warning: required field "inputs" is not set in resources/my_job.yml:53:13 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[1].task_key.task + at resources.jobs.my_job.tasks[1].job_cluster_key.task in resources/my_job.yml:48:15 Warning: required field "task_key" is not set diff --git a/acceptance/bundle/quality_monitor/output.txt b/acceptance/bundle/quality_monitor/output.txt index b78f09eb89..bf8f2ce429 100644 --- a/acceptance/bundle/quality_monitor/output.txt +++ b/acceptance/bundle/quality_monitor/output.txt @@ -1,11 +1,11 @@ >>> [CLI] bundle validate -o json -t development Warning: required field "quartz_cron_expression" is not set - at resources.quality_monitors.my_monitor.table_name + at resources.quality_monitors.my_monitor.output_schema_name in databricks.yml:17:9 Warning: required field "timezone_id" is not set - at resources.quality_monitors.my_monitor.table_name + at resources.quality_monitors.my_monitor.output_schema_name in databricks.yml:17:9 { diff --git a/acceptance/cmd/workspace/apps/run-local-node/output.txt b/acceptance/cmd/workspace/apps/run-local-node/output.txt index 4de672232f..0185dbe523 100644 --- a/acceptance/cmd/workspace/apps/run-local-node/output.txt +++ b/acceptance/cmd/workspace/apps/run-local-node/output.txt @@ -1,2 +1,12 @@ +Running command: node -e console.log('Hello, world') +Hello, world -Exit code: 1 +=== Starting the app in background... +=== Waiting +=== Checking app is running... +>>> curl -s -o - http://127.0.0.1:$(port) +{"message":"Hello From App","timestamp":"[TIMESTAMP]","status":"running"} + +=== Sending shutdown request... +>>> curl -s -o /dev/null http://127.0.0.1:$(port)/shutdown +Process terminated From e12d8f2fb13fe9251fee286a2f2b923338666a94 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 11 Jul 2025 15:26:16 +0200 Subject: [PATCH 25/30] fix tests --- acceptance/bundle/paths/fallback/output.txt | 4 ++-- acceptance/bundle/paths/nominal/output.txt | 14 +++++++------- acceptance/bundle/quality_monitor/output.txt | 4 ++-- .../variables/prepend-workspace-var/output.txt | 4 ++-- bundle/config/validate/required.go | 7 +++++-- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt index 699c01d629..6b819fe9c0 100644 --- a/acceptance/bundle/paths/fallback/output.txt +++ b/acceptance/bundle/paths/fallback/output.txt @@ -1,7 +1,7 @@ >>> [CLI] bundle validate -t development -o json Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[2].task_key + at resources.jobs.my_job.tasks[2].python_wheel_task in resources/my_job.yml:33:13 override_job.yml:29:17 @@ -16,7 +16,7 @@ Warn: path ../src/file2.py is defined relative to the [TEST_TMP_DIR]/resources d >>> [CLI] bundle validate -t error Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[2].job_cluster_key + at resources.jobs.my_job.tasks[2].python_wheel_task in resources/my_job.yml:33:13 Error: notebook "resources/this value is overridden" not found. Local notebook references are expected diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt index 4230f1fe73..39747336a7 100644 --- a/acceptance/bundle/paths/nominal/output.txt +++ b/acceptance/bundle/paths/nominal/output.txt @@ -6,33 +6,33 @@ Warning: required field "entry_point" is not set override_job.yml:29:17 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[1].job_cluster_key + at resources.jobs.my_job.tasks[1].for_each_task in resources/my_job.yml:47:13 override_job.yml:45:17 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[2].task_key + at resources.jobs.my_job.tasks[2].for_each_task in resources/my_job.yml:53:13 override_job.yml:51:17 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[1].job_cluster_key.task + at resources.jobs.my_job.tasks[1].for_each_task.task in resources/my_job.yml:48:15 override_job.yml:46:19 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[2].task_key.task + at resources.jobs.my_job.tasks[2].for_each_task.task in resources/my_job.yml:54:15 override_job.yml:52:19 >>> [CLI] bundle validate -t error Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[4].libraries + at resources.jobs.my_job.tasks[4].python_wheel_task in resources/my_job.yml:33:13 Warning: required field "inputs" is not set - at resources.jobs.my_job.tasks[1].job_cluster_key + at resources.jobs.my_job.tasks[1].for_each_task in resources/my_job.yml:47:13 Warning: required field "inputs" is not set @@ -40,7 +40,7 @@ Warning: required field "inputs" is not set in resources/my_job.yml:53:13 Warning: required field "task_key" is not set - at resources.jobs.my_job.tasks[1].job_cluster_key.task + at resources.jobs.my_job.tasks[1].for_each_task.task in resources/my_job.yml:48:15 Warning: required field "task_key" is not set diff --git a/acceptance/bundle/quality_monitor/output.txt b/acceptance/bundle/quality_monitor/output.txt index bf8f2ce429..278e116606 100644 --- a/acceptance/bundle/quality_monitor/output.txt +++ b/acceptance/bundle/quality_monitor/output.txt @@ -1,11 +1,11 @@ >>> [CLI] bundle validate -o json -t development Warning: required field "quartz_cron_expression" is not set - at resources.quality_monitors.my_monitor.output_schema_name + at resources.quality_monitors.my_monitor.schedule in databricks.yml:17:9 Warning: required field "timezone_id" is not set - at resources.quality_monitors.my_monitor.output_schema_name + at resources.quality_monitors.my_monitor.schedule in databricks.yml:17:9 { diff --git a/acceptance/bundle/variables/prepend-workspace-var/output.txt b/acceptance/bundle/variables/prepend-workspace-var/output.txt index 5e6bc2026f..201dc769af 100644 --- a/acceptance/bundle/variables/prepend-workspace-var/output.txt +++ b/acceptance/bundle/variables/prepend-workspace-var/output.txt @@ -1,13 +1,13 @@ /Workspace should be prepended on all paths, but it is not the case: Warning: required field "entry_point" is not set - at resources.jobs.my_job.tasks[0].task_key + at resources.jobs.my_job.tasks[0].python_wheel_task in databricks.yml:23:13 Warning: required field "name" is not set at bundle Warning: required field "package_name" is not set - at resources.jobs.my_job.tasks[0].task_key + at resources.jobs.my_job.tasks[0].python_wheel_task in databricks.yml:23:13 { diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go index 319367c85f..691a1cbee3 100644 --- a/bundle/config/validate/required.go +++ b/bundle/config/validate/required.go @@ -3,6 +3,7 @@ package validate import ( "context" "fmt" + "slices" "sort" "github.com/databricks/cli/bundle" @@ -46,6 +47,8 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics return nil } + cloneP := slices.Clone(p) + fields := generated.RequiredFields[pattern.String()] for _, field := range fields { vv := v.Get(field) @@ -54,7 +57,7 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics Severity: diag.Warning, Summary: fmt.Sprintf("required field %q is not set", field), Locations: v.Locations(), - Paths: []dyn.Path{p}, + Paths: []dyn.Path{cloneP}, }) } } @@ -68,7 +71,7 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics return diags[i].Summary < diags[j].Summary } - // Then sort by locations as a tie breaker if summaries are the same. + // Finally sort by locations as a tie breaker if summaries are the same. iLocs := fmt.Sprintf("%v", diags[i].Locations) jLocs := fmt.Sprintf("%v", diags[j].Locations) return iLocs < jLocs From 93ca6578b0b4d1599c9cd5e7e516b76a46299743 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 11 Jul 2025 15:44:27 +0200 Subject: [PATCH 26/30] lint --- bundle/config/validate/required.go | 5 ++++- bundle/internal/bundletest/mutator_benchmark_test.go | 8 +++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go index 691a1cbee3..84732d6034 100644 --- a/bundle/config/validate/required.go +++ b/bundle/config/validate/required.go @@ -39,7 +39,7 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics } } - dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { + err := dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { // If the path is not preset in the prefix tree, we do not need to validate any required // fields in it. pattern, ok := trie.SearchPath(p) @@ -63,6 +63,9 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics } return nil }) + if err != nil { + return diag.FromErr(err) + } // Sort diagnostics to make them deterministic sort.Slice(diags, func(i, j int) bool { diff --git a/bundle/internal/bundletest/mutator_benchmark_test.go b/bundle/internal/bundletest/mutator_benchmark_test.go index 5a14efc67c..035231c7d6 100644 --- a/bundle/internal/bundletest/mutator_benchmark_test.go +++ b/bundle/internal/bundletest/mutator_benchmark_test.go @@ -25,9 +25,9 @@ func benchmarkWalkReadOnlyBaseline(b *testing.B, numJobs int) { myBundle := Bundle(b, numJobs) for b.Loop() { - paths := []dyn.Path{} + var paths []dyn.Path bundle.ApplyFuncContext(context.Background(), myBundle, func(ctx context.Context, b *bundle.Bundle) { - dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { + _ = dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { paths = append(paths, p) return nil }) @@ -39,9 +39,7 @@ func benchmarkNoopBaseline(b *testing.B, numJobs int) { myBundle := Bundle(b, numJobs) for b.Loop() { - bundle.ApplyFuncContext(context.Background(), myBundle, func(ctx context.Context, b *bundle.Bundle) { - return - }) + bundle.ApplyFuncContext(context.Background(), myBundle, func(ctx context.Context, b *bundle.Bundle) {}) } } From 222996381e94c6879dc19c6766bab6ed34f13cab Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 11 Jul 2025 15:47:15 +0200 Subject: [PATCH 27/30] typo --- bundle/config/validate/required.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/config/validate/required.go b/bundle/config/validate/required.go index 84732d6034..87cea83532 100644 --- a/bundle/config/validate/required.go +++ b/bundle/config/validate/required.go @@ -40,7 +40,7 @@ func (f *required) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics } err := dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { - // If the path is not preset in the prefix tree, we do not need to validate any required + // If the path is not found in the prefix tree, we do not need to validate any required // fields in it. pattern, ok := trie.SearchPath(p) if !ok { From b4e1596f51a89c0f024db0bf85c26c819b7147e8 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 11 Jul 2025 15:51:46 +0200 Subject: [PATCH 28/30] add empty case --- acceptance/bundle/validate/required/databricks.yml | 4 ++++ acceptance/bundle/validate/required/output.txt | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/acceptance/bundle/validate/required/databricks.yml b/acceptance/bundle/validate/required/databricks.yml index 18947a0e77..3ac6ab033e 100644 --- a/acceptance/bundle/validate/required/databricks.yml +++ b/acceptance/bundle/validate/required/databricks.yml @@ -8,6 +8,10 @@ resources: foo: description: "hello" + # Empty string should not trigger a warning. + bar: + name: "" + volumes: baz: name: "baz" diff --git a/acceptance/bundle/validate/required/output.txt b/acceptance/bundle/validate/required/output.txt index 5d7d275998..88c70428b1 100644 --- a/acceptance/bundle/validate/required/output.txt +++ b/acceptance/bundle/validate/required/output.txt @@ -2,15 +2,15 @@ >>> [CLI] bundle validate Warning: required field "catalog_name" is not set at resources.volumes.baz - in databricks.yml:13:7 + in databricks.yml:15:7 Warning: required field "name" is not set at resources.models.foo - in databricks.yml:10:7 + in databricks.yml:12:7 Warning: required field "schema_name" is not set at resources.volumes.baz - in databricks.yml:13:7 + in databricks.yml:15:7 Warning: required field "source" is not set at artifacts.zab.files[0] From 6e9988f85446786ebb7e629f01897c84e20a6c93 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 11 Jul 2025 15:58:29 +0200 Subject: [PATCH 29/30] update test and make it better --- .../bundle/validate/required/databricks.yml | 23 +++++++++++++++---- .../bundle/validate/required/output.txt | 20 +++++++++------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/acceptance/bundle/validate/required/databricks.yml b/acceptance/bundle/validate/required/databricks.yml index 3ac6ab033e..fbda26fa81 100644 --- a/acceptance/bundle/validate/required/databricks.yml +++ b/acceptance/bundle/validate/required/databricks.yml @@ -1,17 +1,32 @@ artifacts: - zab: + my_artifact: files: - {} resources: + # Required field name is missing. models: - foo: + my_model_1: description: "hello" # Empty string should not trigger a warning. - bar: + my_model_2: name: "" + jobs: + my_job_1: + tasks: + # job_id being 0 should not trigger a warning. + - task_key: "task_key1" + run_job_task: + job_id: 0 + + # job_id not being set should trigger a warning. + - task_key: "task_key2" + run_job_task: + + # Catalog name and schema name are required. + # but are not set. volumes: - baz: + my_volume: name: "baz" diff --git a/acceptance/bundle/validate/required/output.txt b/acceptance/bundle/validate/required/output.txt index 88c70428b1..6b6d3a144b 100644 --- a/acceptance/bundle/validate/required/output.txt +++ b/acceptance/bundle/validate/required/output.txt @@ -1,19 +1,23 @@ >>> [CLI] bundle validate Warning: required field "catalog_name" is not set - at resources.volumes.baz - in databricks.yml:15:7 + at resources.volumes.my_volume + in databricks.yml:23:7 + +Warning: required field "job_id" is not set + at resources.jobs.my_job_1.tasks[1].run_job_task + in databricks.yml:14:25 Warning: required field "name" is not set - at resources.models.foo - in databricks.yml:12:7 + at resources.models.my_model_1 + in databricks.yml:18:7 Warning: required field "schema_name" is not set - at resources.volumes.baz - in databricks.yml:15:7 + at resources.volumes.my_volume + in databricks.yml:23:7 Warning: required field "source" is not set - at artifacts.zab.files[0] + at artifacts.my_artifact.files[0] in databricks.yml:4:9 Name: test-bundle @@ -22,4 +26,4 @@ Workspace: User: [USERNAME] Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default -Found 4 warnings +Found 5 warnings From 8a7e42329cbf74db8cae1f490ccca0df29f49a9c Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 18 Jul 2025 14:53:26 +0200 Subject: [PATCH 30/30] update golden files --- .../bundle/validate/empty_resources/empty_dict/output.txt | 4 ++++ .../bundle/validate/empty_resources/with_grants/output.txt | 4 ++++ .../validate/empty_resources/with_permissions/output.txt | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt index c891f934dd..e134fa92e3 100644 --- a/acceptance/bundle/validate/empty_resources/empty_dict/output.txt +++ b/acceptance/bundle/validate/empty_resources/empty_dict/output.txt @@ -185,6 +185,10 @@ app resource 'rname' is missing required source_code_path field } === resources.secret_scopes.rname === +Warning: required field "name" is not set + at resources.secret_scopes.rname + in databricks.yml:6:12 + { "secret_scopes": { "rname": { diff --git a/acceptance/bundle/validate/empty_resources/with_grants/output.txt b/acceptance/bundle/validate/empty_resources/with_grants/output.txt index 54f5b550bb..70c438c113 100644 --- a/acceptance/bundle/validate/empty_resources/with_grants/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_grants/output.txt @@ -230,6 +230,10 @@ Warning: unknown field: grants at resources.secret_scopes.rname in databricks.yml:7:7 +Warning: required field "name" is not set + at resources.secret_scopes.rname + in databricks.yml:7:7 + { "secret_scopes": { "rname": { diff --git a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt index e775ce25c5..c7af0f9133 100644 --- a/acceptance/bundle/validate/empty_resources/with_permissions/output.txt +++ b/acceptance/bundle/validate/empty_resources/with_permissions/output.txt @@ -203,6 +203,10 @@ app resource 'rname' is missing required source_code_path field } === resources.secret_scopes.rname === +Warning: required field "name" is not set + at resources.secret_scopes.rname + in databricks.yml:7:7 + { "secret_scopes": { "rname": {