From ea73de5d66c0147d7ab64f82c6618078116cb3fb Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Wed, 24 Dec 2025 15:12:29 +0100 Subject: [PATCH 01/18] Add init-template subcommands for jobs, pipelines, and empty bundles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors init-template into dedicated subcommands with comprehensive L2 guidance for each resource type: - Add job subcommand using default-python template - Add pipeline subcommand using lakeflow-pipelines template - Add empty subcommand using default-minimal template - Create L2 templates: target_jobs, target_pipelines, target_mixed - Add AGENTS.tmpl for scaffolded projects - Parameterize default catalog in discover flow - Update detector to show "mixed" guidance for non-app-only projects - Move implementation to cmd/init_template/ subpackage - Make databricks_discover description more forceful (MANDATORY) šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- experimental/apps-mcp/cmd/apps_mcp.go | 2 + .../app.go} | 136 +++-------------- .../apps-mcp/cmd/init_template/common.go | 132 ++++++++++++++++ .../apps-mcp/cmd/init_template/empty.go | 143 +++++++++++++++++ .../cmd/init_template/init_template.go | 25 +++ .../apps-mcp/cmd/init_template/job.go | 133 ++++++++++++++++ .../apps-mcp/cmd/init_template/pipeline.go | 144 ++++++++++++++++++ experimental/apps-mcp/cmd/tools.go | 3 +- experimental/apps-mcp/lib/common/output.go | 15 ++ .../apps-mcp/lib/detector/bundle_detector.go | 28 +++- experimental/apps-mcp/lib/prompts/AGENTS.tmpl | 35 +++++ experimental/apps-mcp/lib/prompts/flow.tmpl | 23 ++- .../apps-mcp/lib/prompts/target_jobs.tmpl | 57 +++++++ .../apps-mcp/lib/prompts/target_mixed.tmpl | 58 +++++++ .../lib/prompts/target_pipelines.tmpl | 61 ++++++++ .../lib/providers/clitools/discover.go | 38 +++-- .../lib/providers/clitools/provider.go | 2 +- 17 files changed, 900 insertions(+), 135 deletions(-) rename experimental/apps-mcp/cmd/{init_template.go => init_template/app.go} (69%) create mode 100644 experimental/apps-mcp/cmd/init_template/common.go create mode 100644 experimental/apps-mcp/cmd/init_template/empty.go create mode 100644 experimental/apps-mcp/cmd/init_template/init_template.go create mode 100644 experimental/apps-mcp/cmd/init_template/job.go create mode 100644 experimental/apps-mcp/cmd/init_template/pipeline.go create mode 100644 experimental/apps-mcp/lib/prompts/AGENTS.tmpl create mode 100644 experimental/apps-mcp/lib/prompts/target_jobs.tmpl create mode 100644 experimental/apps-mcp/lib/prompts/target_mixed.tmpl create mode 100644 experimental/apps-mcp/lib/prompts/target_pipelines.tmpl diff --git a/experimental/apps-mcp/cmd/apps_mcp.go b/experimental/apps-mcp/cmd/apps_mcp.go index 83da91447c..67bc8213be 100644 --- a/experimental/apps-mcp/cmd/apps_mcp.go +++ b/experimental/apps-mcp/cmd/apps_mcp.go @@ -2,6 +2,7 @@ package mcp import ( mcplib "github.com/databricks/cli/experimental/apps-mcp/lib" + "github.com/databricks/cli/experimental/apps-mcp/cmd/init_template" "github.com/databricks/cli/experimental/apps-mcp/lib/server" "github.com/databricks/cli/libs/log" "github.com/spf13/cobra" @@ -51,6 +52,7 @@ The server communicates via stdio using the Model Context Protocol.`, cmd.AddCommand(newInstallCmd()) cmd.AddCommand(newToolsCmd()) + cmd.AddCommand(init_template.NewInitTemplateCommand()) return cmd } diff --git a/experimental/apps-mcp/cmd/init_template.go b/experimental/apps-mcp/cmd/init_template/app.go similarity index 69% rename from experimental/apps-mcp/cmd/init_template.go rename to experimental/apps-mcp/cmd/init_template/app.go index a003b14d37..a27da91871 100644 --- a/experimental/apps-mcp/cmd/init_template.go +++ b/experimental/apps-mcp/cmd/init_template/app.go @@ -1,4 +1,4 @@ -package mcp +package init_template import ( "context" @@ -7,8 +7,6 @@ import ( "fmt" "os" "path/filepath" - "sort" - "strings" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/experimental/apps-mcp/lib/common" @@ -19,6 +17,13 @@ import ( "github.com/spf13/cobra" ) +const ( + defaultTemplateRepo = "https://github.com/databricks/cli" + defaultTemplateDir = "experimental/apps-mcp/templates/appkit" + defaultBranch = "main" + templatePathEnvVar = "DATABRICKS_APPKIT_TEMPLATE_PATH" +) + func validateAppNameLength(projectName string) error { const maxAppNameLength = 30 const devTargetPrefix = "dev-" @@ -73,103 +78,19 @@ func readClaudeMd(ctx context.Context, configFile string) { cmdio.LogString(ctx, "=================\n") } -// generateFileTree creates a tree-style visualization of the file structure. -// Collapses directories with more than 10 files to avoid clutter. -func generateFileTree(outputDir string) (string, error) { - const maxFilesToShow = 10 - - // collect all files in the output directory - var allFiles []string - err := filepath.Walk(outputDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if !info.IsDir() { - relPath, err := filepath.Rel(outputDir, path) - if err != nil { - return err - } - allFiles = append(allFiles, filepath.ToSlash(relPath)) - } - return nil - }) - if err != nil { - return "", err - } - - // build a tree structure - tree := make(map[string][]string) - - for _, relPath := range allFiles { - parts := strings.Split(relPath, "/") - - if len(parts) == 1 { - // root level file - tree[""] = append(tree[""], parts[0]) - } else { - // file in subdirectory - dir := strings.Join(parts[:len(parts)-1], "/") - fileName := parts[len(parts)-1] - tree[dir] = append(tree[dir], fileName) - } - } - - // format as tree - var output strings.Builder - var sortedDirs []string - for dir := range tree { - sortedDirs = append(sortedDirs, dir) - } - sort.Strings(sortedDirs) - - for _, dir := range sortedDirs { - filesInDir := tree[dir] - if dir == "" { - // root files - always show all - for _, file := range filesInDir { - output.WriteString(file) - output.WriteString("\n") - } - } else { - // directory - output.WriteString(dir) - output.WriteString("/\n") - if len(filesInDir) <= maxFilesToShow { - // show all files - for _, file := range filesInDir { - output.WriteString(" ") - output.WriteString(file) - output.WriteString("\n") - } - } else { - // collapse large directories - output.WriteString(fmt.Sprintf(" (%d files)\n", len(filesInDir))) - } - } - } - - return output.String(), nil -} - -const ( - defaultTemplateRepo = "https://github.com/databricks/cli" - defaultTemplateDir = "experimental/apps-mcp/templates/appkit" - defaultBranch = "main" - templatePathEnvVar = "DATABRICKS_APPKIT_TEMPLATE_PATH" -) - -func newInitTemplateCmd() *cobra.Command { +// newAppCmd creates the app subcommand for init-template. +func newAppCmd() *cobra.Command { cmd := &cobra.Command{ - Use: "init-template", + Use: "app", Short: "Initialize a Databricks App using the appkit template", Args: cobra.NoArgs, Long: `Initialize a Databricks App using the appkit template. Examples: - experimental apps-mcp tools init-template --name my-app - experimental apps-mcp tools init-template --name my-app --warehouse abc123 - experimental apps-mcp tools init-template --name my-app --description "My cool app" - experimental apps-mcp tools init-template --name my-app --output-dir ./projects + experimental apps-mcp tools init-template app --name my-app + experimental apps-mcp tools init-template app --name my-app --warehouse abc123 + experimental apps-mcp tools init-template app --name my-app --description "My cool app" + experimental apps-mcp tools init-template app --name my-app --output-dir ./projects Environment variables: DATABRICKS_APPKIT_TEMPLATE_PATH Override template source with local path (for development) @@ -264,24 +185,11 @@ After initialization: } // Write config to temp file - tmpFile, err := os.CreateTemp("", "mcp-template-config-*.json") - if err != nil { - return fmt.Errorf("create temp config file: %w", err) - } - defer os.Remove(tmpFile.Name()) - - configBytes, err := json.Marshal(configMap) + configFile, err := writeConfigToTempFile(configMap) if err != nil { - return fmt.Errorf("marshal config: %w", err) - } - if _, err := tmpFile.Write(configBytes); err != nil { - return fmt.Errorf("write config file: %w", err) - } - if err := tmpFile.Close(); err != nil { - return fmt.Errorf("close config file: %w", err) + return err } - - configFile := tmpFile.Name() + defer os.Remove(configFile) // Create output directory if specified and doesn't exist if outputDir != "" { @@ -317,17 +225,11 @@ After initialization: } // Count files and get absolute path - fileCount := 0 absOutputDir, err := filepath.Abs(actualOutputDir) if err != nil { absOutputDir = actualOutputDir } - _ = filepath.Walk(absOutputDir, func(path string, info os.FileInfo, err error) error { - if err == nil && !info.IsDir() { - fileCount++ - } - return nil - }) + fileCount := countFiles(absOutputDir) cmdio.LogString(ctx, common.FormatScaffoldSuccess("appkit", absOutputDir, fileCount)) // Generate and print file tree structure diff --git a/experimental/apps-mcp/cmd/init_template/common.go b/experimental/apps-mcp/cmd/init_template/common.go new file mode 100644 index 0000000000..87d720d7af --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/common.go @@ -0,0 +1,132 @@ +package init_template + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" +) + +// countFiles counts the number of files in a directory. +func countFiles(dir string) int { + count := 0 + _ = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err == nil && !info.IsDir() { + count++ + } + return nil + }) + return count +} + +// writeConfigToTempFile writes a config map to a temporary JSON file. +func writeConfigToTempFile(configMap map[string]any) (string, error) { + tmpFile, err := os.CreateTemp("", "mcp-template-config-*.json") + if err != nil { + return "", fmt.Errorf("create temp config file: %w", err) + } + + configBytes, err := json.Marshal(configMap) + if err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("marshal config: %w", err) + } + if _, err := tmpFile.Write(configBytes); err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("write config file: %w", err) + } + if err := tmpFile.Close(); err != nil { + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("close config file: %w", err) + } + + return tmpFile.Name(), nil +} + +// generateFileTree creates a tree-style visualization of the file structure. +// Collapses directories with more than 10 files to avoid clutter. +func generateFileTree(outputDir string) (string, error) { + const maxFilesToShow = 10 + + var allFiles []string + err := filepath.Walk(outputDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + relPath, err := filepath.Rel(outputDir, path) + if err != nil { + return err + } + allFiles = append(allFiles, filepath.ToSlash(relPath)) + } + return nil + }) + if err != nil { + return "", err + } + + tree := make(map[string][]string) + + for _, relPath := range allFiles { + parts := strings.Split(relPath, "/") + + if len(parts) == 1 { + tree[""] = append(tree[""], parts[0]) + } else { + dir := strings.Join(parts[:len(parts)-1], "/") + fileName := parts[len(parts)-1] + tree[dir] = append(tree[dir], fileName) + } + } + + var output strings.Builder + var sortedDirs []string + for dir := range tree { + sortedDirs = append(sortedDirs, dir) + } + sort.Strings(sortedDirs) + + for _, dir := range sortedDirs { + filesInDir := tree[dir] + if dir == "" { + for _, file := range filesInDir { + output.WriteString(file) + output.WriteString("\n") + } + } else { + output.WriteString(dir) + output.WriteString("/\n") + if len(filesInDir) <= maxFilesToShow { + for _, file := range filesInDir { + output.WriteString(" ") + output.WriteString(file) + output.WriteString("\n") + } + } else { + output.WriteString(fmt.Sprintf(" (%d files)\n", len(filesInDir))) + } + } + } + + return output.String(), nil +} + +// writeAgentFiles writes CLAUDE.md and AGENTS.md files to the output directory. +func writeAgentFiles(outputDir string, data map[string]any) error { + content := prompts.MustExecuteTemplate("AGENTS.tmpl", data) + + // Write both CLAUDE.md and AGENTS.md + if err := os.WriteFile(filepath.Join(outputDir, "CLAUDE.md"), []byte(content), 0o644); err != nil { + return fmt.Errorf("failed to write CLAUDE.md: %w", err) + } + if err := os.WriteFile(filepath.Join(outputDir, "AGENTS.md"), []byte(content), 0o644); err != nil { + return fmt.Errorf("failed to write AGENTS.md: %w", err) + } + + return nil +} diff --git a/experimental/apps-mcp/cmd/init_template/empty.go b/experimental/apps-mcp/cmd/init_template/empty.go new file mode 100644 index 0000000000..5c0aba6f5f --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/empty.go @@ -0,0 +1,143 @@ +package init_template + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/common" + "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/template" + "github.com/spf13/cobra" +) + +// newEmptyCmd creates the empty subcommand for init-template. +func newEmptyCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "empty", + Short: "Initialize an empty project for custom resources", + Args: cobra.NoArgs, + Long: `Initialize an empty Databricks Asset Bundle project. + +Use this for deploying resource types OTHER than apps, jobs, or pipelines, such as: +- Dashboards (Lakeview dashboards) +- Alerts (SQL alerts) +- Model serving endpoints +- Clusters +- Schemas and tables +- Any other Databricks resources + +This creates a minimal project structure without sample code. For apps, jobs, or pipelines, +use the dedicated 'app', 'job', or 'pipeline' commands instead. + +Examples: + experimental apps-mcp tools init-template empty --name my_dashboard_project + experimental apps-mcp tools init-template empty --name my_alerts --language sql --catalog my_catalog + experimental apps-mcp tools init-template empty --name my_project --output-dir ./projects + +After initialization: + Add resource definitions in resources/ (e.g., resources/my_dashboard.dashboard.yml) + Then deploy: databricks bundle deploy --target dev +`, + } + + var name string + var catalog string + var language string + var outputDir string + + cmd.Flags().StringVar(&name, "name", "", "Project name (required)") + cmd.Flags().StringVar(&catalog, "catalog", "", "Default catalog for tables (defaults to workspace default)") + cmd.Flags().StringVar(&language, "language", "python", "Initial language: 'python', 'sql', or 'other'") + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + if name == "" { + return errors.New("--name is required. Example: init-template empty --name my_project") + } + + if language != "python" && language != "sql" && language != "other" { + return fmt.Errorf("--language must be 'python', 'sql', or 'other', got '%s'", language) + } + + configMap := map[string]any{ + "project_name": name, + "include_job": "no", + "include_pipeline": "no", + "include_python": "no", + "serverless": "yes", + "personal_schemas": "yes", + "language_choice": language, + "lakeflow_only": "no", + "enable_pydabs": "no", + } + if catalog != "" { + configMap["default_catalog"] = catalog + } + + configFile, err := writeConfigToTempFile(configMap) + if err != nil { + return err + } + defer os.Remove(configFile) + + if outputDir != "" { + if err := os.MkdirAll(outputDir, 0o755); err != nil { + return fmt.Errorf("create output directory: %w", err) + } + } + + r := template.Resolver{ + TemplatePathOrUrl: string(template.DefaultMinimal), + ConfigFile: configFile, + OutputDir: outputDir, + } + + tmpl, err := r.Resolve(ctx) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) + if err != nil { + return err + } + tmpl.Writer.LogTelemetry(ctx) + + actualOutputDir := name + if outputDir != "" { + actualOutputDir = filepath.Join(outputDir, name) + } + + absOutputDir, err := filepath.Abs(actualOutputDir) + if err != nil { + absOutputDir = actualOutputDir + } + fileCount := countFiles(absOutputDir) + cmdio.LogString(ctx, common.FormatProjectScaffoldSuccess("empty", "šŸ“¦", "default-minimal", absOutputDir, fileCount, "")) + + fileTree, err := generateFileTree(absOutputDir) + if err == nil && fileTree != "" { + cmdio.LogString(ctx, "\nFile structure:") + cmdio.LogString(ctx, fileTree) + } + + // Write CLAUDE.md and AGENTS.md files + if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { + return fmt.Errorf("failed to write agent files: %w", err) + } + + targetMixed := prompts.MustExecuteTemplate("target_mixed.tmpl", map[string]any{}) + cmdio.LogString(ctx, targetMixed) + + return nil + } + return cmd +} diff --git a/experimental/apps-mcp/cmd/init_template/init_template.go b/experimental/apps-mcp/cmd/init_template/init_template.go new file mode 100644 index 0000000000..3e6adc2228 --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/init_template.go @@ -0,0 +1,25 @@ +package init_template + +import ( + "github.com/spf13/cobra" +) + +// NewInitTemplateCommand creates a command group for initializing project templates. +func NewInitTemplateCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "init-template", + Short: "Initialize project templates", + Long: `Initialize project templates for Databricks resources. + +Subcommands: + app Initialize a Databricks App using the appkit template + job Initialize a job project using the default-python template + pipeline Initialize a Lakeflow pipeline project + empty Initialize an empty bundle for custom resources (dashboards, alerts, etc.)`, + } + cmd.AddCommand(newAppCmd()) + cmd.AddCommand(newJobCmd()) + cmd.AddCommand(newPipelineCmd()) + cmd.AddCommand(newEmptyCmd()) + return cmd +} diff --git a/experimental/apps-mcp/cmd/init_template/job.go b/experimental/apps-mcp/cmd/init_template/job.go new file mode 100644 index 0000000000..2107544ba8 --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/job.go @@ -0,0 +1,133 @@ +package init_template + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/common" + "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/template" + "github.com/spf13/cobra" +) + +// newJobCmd creates the job subcommand for init-template. +func newJobCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "job", + Short: "Initialize a job project using the default-python template", + Args: cobra.NoArgs, + Long: `Initialize a job project using the default-python template. + +This creates a project with: +- Python notebooks in src/ directory +- A wheel package defined in pyproject.toml +- Job definitions in resources/ using databricks.yml +- Serverless compute enabled by default +- Personal schemas for development + +Examples: + experimental apps-mcp tools init-template job --name my_job + experimental apps-mcp tools init-template job --name my_job --catalog my_catalog + experimental apps-mcp tools init-template job --name my_job --output-dir ./projects + +After initialization: + databricks bundle deploy --target dev +`, + } + + var name string + var catalog string + var outputDir string + + cmd.Flags().StringVar(&name, "name", "", "Project name (required)") + cmd.Flags().StringVar(&catalog, "catalog", "", "Default catalog for tables (defaults to workspace default)") + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + if name == "" { + return errors.New("--name is required. Example: init-template job --name my_job") + } + + configMap := map[string]any{ + "project_name": name, + "include_job": "yes", + "include_pipeline": "no", + "include_python": "yes", + "serverless": "yes", + "personal_schemas": "yes", + } + if catalog != "" { + configMap["default_catalog"] = catalog + } + + configFile, err := writeConfigToTempFile(configMap) + if err != nil { + return err + } + defer os.Remove(configFile) + + if outputDir != "" { + if err := os.MkdirAll(outputDir, 0o755); err != nil { + return fmt.Errorf("create output directory: %w", err) + } + } + + r := template.Resolver{ + TemplatePathOrUrl: string(template.DefaultPython), + ConfigFile: configFile, + OutputDir: outputDir, + } + + tmpl, err := r.Resolve(ctx) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) + if err != nil { + return err + } + tmpl.Writer.LogTelemetry(ctx) + + actualOutputDir := name + if outputDir != "" { + actualOutputDir = filepath.Join(outputDir, name) + } + + absOutputDir, err := filepath.Abs(actualOutputDir) + if err != nil { + absOutputDir = actualOutputDir + } + fileCount := countFiles(absOutputDir) + cmdio.LogString(ctx, common.FormatProjectScaffoldSuccess("job", "āš™ļø", "default-python", absOutputDir, fileCount, "")) + + fileTree, err := generateFileTree(absOutputDir) + if err == nil && fileTree != "" { + cmdio.LogString(ctx, "\nFile structure:") + cmdio.LogString(ctx, fileTree) + } + + // Write CLAUDE.md and AGENTS.md files + if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { + return fmt.Errorf("failed to write agent files: %w", err) + } + + // Show L2 guidance: mixed (for adding any resource) + jobs (for developing jobs) + targetMixed := prompts.MustExecuteTemplate("target_mixed.tmpl", map[string]any{}) + cmdio.LogString(ctx, targetMixed) + + targetJobs := prompts.MustExecuteTemplate("target_jobs.tmpl", map[string]any{}) + cmdio.LogString(ctx, targetJobs) + + return nil + } + return cmd +} diff --git a/experimental/apps-mcp/cmd/init_template/pipeline.go b/experimental/apps-mcp/cmd/init_template/pipeline.go new file mode 100644 index 0000000000..563b6b14ec --- /dev/null +++ b/experimental/apps-mcp/cmd/init_template/pipeline.go @@ -0,0 +1,144 @@ +package init_template + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/common" + "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/template" + "github.com/spf13/cobra" +) + +// newPipelineCmd creates the pipeline subcommand for init-template. +func newPipelineCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "pipeline", + Short: "Initialize a Lakeflow pipeline project", + Args: cobra.NoArgs, + Long: `Initialize a Lakeflow Declarative Pipeline project. + +This creates a project with: +- Pipeline definitions in src/ directory (Python or SQL) +- Pipeline configuration in resources/ using databricks.yml +- Serverless compute enabled by default +- Personal schemas for development + +Examples: + experimental apps-mcp tools init-template pipeline --name my_pipeline --language python + experimental apps-mcp tools init-template pipeline --name my_pipeline --language sql + experimental apps-mcp tools init-template pipeline --name my_pipeline --language python --catalog my_catalog + experimental apps-mcp tools init-template pipeline --name my_pipeline --language sql --output-dir ./projects + +After initialization: + databricks bundle deploy --target dev +`, + } + + var name string + var language string + var catalog string + var outputDir string + + cmd.Flags().StringVar(&name, "name", "", "Project name (required)") + cmd.Flags().StringVar(&language, "language", "", "Pipeline language: 'python' or 'sql' (required)") + cmd.Flags().StringVar(&catalog, "catalog", "", "Default catalog for tables (defaults to workspace default)") + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + if name == "" { + return errors.New("--name is required. Example: init-template pipeline --name my_pipeline --language python") + } + if language == "" { + return errors.New("--language is required. Choose 'python' or 'sql'. Example: init-template pipeline --name my_pipeline --language python") + } + if language != "python" && language != "sql" { + return fmt.Errorf("--language must be 'python' or 'sql', got '%s'", language) + } + + configMap := map[string]any{ + "project_name": name, + "lakeflow_only": "yes", + "include_job": "no", + "include_pipeline": "yes", + "include_python": "no", + "serverless": "yes", + "personal_schemas": "yes", + "language": language, + } + if catalog != "" { + configMap["default_catalog"] = catalog + } + + configFile, err := writeConfigToTempFile(configMap) + if err != nil { + return err + } + defer os.Remove(configFile) + + if outputDir != "" { + if err := os.MkdirAll(outputDir, 0o755); err != nil { + return fmt.Errorf("create output directory: %w", err) + } + } + + r := template.Resolver{ + TemplatePathOrUrl: string(template.LakeflowPipelines), + ConfigFile: configFile, + OutputDir: outputDir, + } + + tmpl, err := r.Resolve(ctx) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) + if err != nil { + return err + } + tmpl.Writer.LogTelemetry(ctx) + + actualOutputDir := name + if outputDir != "" { + actualOutputDir = filepath.Join(outputDir, name) + } + + absOutputDir, err := filepath.Abs(actualOutputDir) + if err != nil { + absOutputDir = actualOutputDir + } + fileCount := countFiles(absOutputDir) + extraDetails := "Language: " + language + cmdio.LogString(ctx, common.FormatProjectScaffoldSuccess("pipeline", "šŸ”„", "lakeflow-pipelines", absOutputDir, fileCount, extraDetails)) + + fileTree, err := generateFileTree(absOutputDir) + if err == nil && fileTree != "" { + cmdio.LogString(ctx, "\nFile structure:") + cmdio.LogString(ctx, fileTree) + } + + // Write CLAUDE.md and AGENTS.md files + if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { + return fmt.Errorf("failed to write agent files: %w", err) + } + + // Show L2 guidance: mixed (for adding any resource) + pipelines (for developing pipelines) + targetMixed := prompts.MustExecuteTemplate("target_mixed.tmpl", map[string]any{}) + cmdio.LogString(ctx, targetMixed) + + targetPipelines := prompts.MustExecuteTemplate("target_pipelines.tmpl", map[string]any{}) + cmdio.LogString(ctx, targetPipelines) + + return nil + } + return cmd +} diff --git a/experimental/apps-mcp/cmd/tools.go b/experimental/apps-mcp/cmd/tools.go index 6d88451147..473bacddf3 100644 --- a/experimental/apps-mcp/cmd/tools.go +++ b/experimental/apps-mcp/cmd/tools.go @@ -1,6 +1,7 @@ package mcp import ( + "github.com/databricks/cli/experimental/apps-mcp/cmd/init_template" "github.com/spf13/cobra" ) @@ -13,7 +14,7 @@ func newToolsCmd() *cobra.Command { cmd.AddCommand(newQueryCmd()) cmd.AddCommand(newDiscoverSchemaCmd()) - cmd.AddCommand(newInitTemplateCmd()) + cmd.AddCommand(init_template.NewInitTemplateCommand()) cmd.AddCommand(newValidateCmd()) cmd.AddCommand(newDeployCmd()) diff --git a/experimental/apps-mcp/lib/common/output.go b/experimental/apps-mcp/lib/common/output.go index 7454950870..92e3f1d1a4 100644 --- a/experimental/apps-mcp/lib/common/output.go +++ b/experimental/apps-mcp/lib/common/output.go @@ -45,3 +45,18 @@ func FormatDeploymentFailure(appName, message string) string { return fmt.Sprintf("%sāŒ Deployment failed for '%s'\n\n%s\n", header, appName, message) } + +// FormatProjectScaffoldSuccess formats a success message for project scaffolding. +// projectType examples: "job", "pipeline", "empty" +// emoji examples: "āš™ļø" (job), "šŸ”„" (pipeline), "šŸ“¦" (empty) +// extraDetails can include additional info like "Language: python" +func FormatProjectScaffoldSuccess(projectType, emoji, templateName, workDir string, filesCopied int, extraDetails string) string { + header := FormatBrandedHeader(emoji, projectType+" project scaffolded successfully") + result := fmt.Sprintf("%sāœ… Created %s %s project at %s\n\n", + header, templateName, projectType, workDir) + if extraDetails != "" { + result += extraDetails + "\n" + } + result += fmt.Sprintf("Files copied: %d\n\nTemplate: %s\n", filesCopied, templateName) + return result +} diff --git a/experimental/apps-mcp/lib/detector/bundle_detector.go b/experimental/apps-mcp/lib/detector/bundle_detector.go index c88c5d19b1..859ff7ed18 100644 --- a/experimental/apps-mcp/lib/detector/bundle_detector.go +++ b/experimental/apps-mcp/lib/detector/bundle_detector.go @@ -40,15 +40,37 @@ func (d *BundleDetector) Detect(ctx context.Context, workDir string, detected *D } // extract target types from fully loaded resources - if len(b.Config.Resources.Apps) > 0 { + hasApps := len(b.Config.Resources.Apps) > 0 + hasJobs := len(b.Config.Resources.Jobs) > 0 + hasPipelines := len(b.Config.Resources.Pipelines) > 0 + + if hasApps { detected.TargetTypes = append(detected.TargetTypes, "apps") } - if len(b.Config.Resources.Jobs) > 0 { + if hasJobs { detected.TargetTypes = append(detected.TargetTypes, "jobs") } - if len(b.Config.Resources.Pipelines) > 0 { + if hasPipelines { detected.TargetTypes = append(detected.TargetTypes, "pipelines") } + // Include "mixed" guidance for all projects EXCEPT app-only projects. + // This provides general resource addition guidance (target_mixed.tmpl). + // We exclude app-only projects to provide a dedicated app development experience + // focused on app-specific patterns (target_apps.tmpl has comprehensive app guidance). + isAppOnly := hasApps && !hasJobs && !hasPipelines && + len(b.Config.Resources.Clusters) == 0 && + len(b.Config.Resources.Dashboards) == 0 && + len(b.Config.Resources.Experiments) == 0 && + len(b.Config.Resources.ModelServingEndpoints) == 0 && + len(b.Config.Resources.RegisteredModels) == 0 && + len(b.Config.Resources.Schemas) == 0 && + len(b.Config.Resources.QualityMonitors) == 0 && + len(b.Config.Resources.Volumes) == 0 + + if !isAppOnly { + detected.TargetTypes = append(detected.TargetTypes, "mixed") + } + return nil } diff --git a/experimental/apps-mcp/lib/prompts/AGENTS.tmpl b/experimental/apps-mcp/lib/prompts/AGENTS.tmpl new file mode 100644 index 0000000000..df7d451d75 --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/AGENTS.tmpl @@ -0,0 +1,35 @@ +{{- /* + * Template for the AGENTS.md / CLAUDE.md file created in new projects. + * + * Goal for this file is to reinforce with agents that they need to call the databricks_discover tool. + * A secondary goal is to offer a placeholder to users to extend with their own instructions. + * It is a non-goal to inline all instructions. + * + * Note the app template currently has its own AGENTS.md / CLAUDE.md file. + * + */ -}} + +# Databricks Asset Bundles Project + +This project uses Databricks Asset Bundles for deployment. Add project-specific instructions below. + +## For AI Agents: MANDATORY First Action + +**BEFORE any other action, call `mcp__databricks-mcp__databricks_discover` with this project's path.** + +This is required because the discover tool: +- Detects project state (new, deployed, running) and provides relevant guidance +- Shows available CLI commands and recommended next steps +- Configures workspace authentication + +Without this context, you cannot effectively help with this project. + +If the tool is unavailable, the user needs to set up the Databricks CLI MCP server: +1. Install CLI: https://docs.databricks.com/dev-tools/cli/install +2. Run: `databricks experimental apps-mcp install` + +--- + +## Project Instructions + + diff --git a/experimental/apps-mcp/lib/prompts/flow.tmpl b/experimental/apps-mcp/lib/prompts/flow.tmpl index 40da4cf0f7..12a1eafbdd 100644 --- a/experimental/apps-mcp/lib/prompts/flow.tmpl +++ b/experimental/apps-mcp/lib/prompts/flow.tmpl @@ -25,14 +25,29 @@ Use `invoke_databricks_cli ''` to run any Databricks CLI command. ### Project scaffolding +IMPORTANT: Always use 'experimental apps-mcp tools init-template' commands below instead of 'databricks bundle init'. +The init-template commands create agent-friendly projects with AGENTS.md/CLAUDE.md guidance files and proper MCP integration. + For apps: +invoke_databricks_cli 'experimental apps-mcp tools init-template app --name my-app --description "My app description"' -invoke_databricks_cli 'experimental apps-mcp tools init-template --name my-app --description "My app description"' +For jobs (Python notebooks with wheel package): +invoke_databricks_cli 'experimental apps-mcp tools init-template job --name my_job' +invoke_databricks_cli 'experimental apps-mcp tools init-template job --name my_job --catalog my_catalog' -- App name must be ≤26 characters (dev- prefix adds 4 chars, max total 30) -- Use lowercase letters, numbers, and hyphens only +For pipelines (Lakeflow Declarative Pipelines): +invoke_databricks_cli 'experimental apps-mcp tools init-template pipeline --name my_pipeline --language python' +invoke_databricks_cli 'experimental apps-mcp tools init-template pipeline --name my_pipeline --language sql --catalog my_catalog' +Note: --language is required (python or sql) -Other types of projects are not yet supported. +For custom resources (dashboards, alerts, model serving, etc.): +invoke_databricks_cli 'experimental apps-mcp tools init-template empty --name my_project' +Note: Use this for resources OTHER than apps, jobs, or pipelines + +Notes: +- App name must be ≤26 characters (dev- prefix adds 4 chars, max total 30) +- Job/pipeline/project names: letters, numbers, underscores only +- --catalog defaults to workspace default catalog{{if .DefaultCatalog}} (currently '{{.DefaultCatalog}}'){{end}} ### Custom SQL Queries diff --git a/experimental/apps-mcp/lib/prompts/target_jobs.tmpl b/experimental/apps-mcp/lib/prompts/target_jobs.tmpl new file mode 100644 index 0000000000..53762fc23e --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/target_jobs.tmpl @@ -0,0 +1,57 @@ +{{- /* + * L2: Target-specific guidance for Lakeflow Jobs. + * + * Injected when: target type "jobs" is detected or after init-template job. + * Contains: job-specific development patterns, task configuration, code examples. + * Note: For adding NEW resources (dashboards, alerts, etc.), see target_mixed.tmpl guidance. + */ -}} + +## Lakeflow Jobs Development + +This guidance is for DEVELOPING jobs in this project. To ADD other resource types (dashboards, alerts, pipelines, etc.), see the general resource guidance above. + +### Project Structure +- `src/` - Python notebooks (.ipynb) and source code +- `resources/` - Job definitions in databricks.yml format + +### Configuring Tasks +Edit `resources/.job.yml` to configure tasks: + +```yaml +tasks: + - task_key: my_notebook + notebook_task: + notebook_path: ../src/my_notebook.ipynb + - task_key: my_python + python_wheel_task: + package_name: my_package + entry_point: main +``` + +Task types: `notebook_task`, `python_wheel_task`, `spark_python_task`, `pipeline_task`, `sql_task` + +### Job Parameters +Parameters defined at job level are passed to ALL tasks (no need to repeat per task). Example: +```yaml +resources: + jobs: + my_job: + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} +``` + +### Writing Notebook Code +- Use `spark.read.table("catalog.schema.table")` to read tables +- Use `spark.sql("SELECT ...")` for SQL queries +- Use `dbutils.widgets` for parameters + +### Unit Testing +Run unit tests locally with: `uv run pytest` + +### Documentation +- Lakeflow Jobs: https://docs.databricks.com/jobs +- Task types: https://docs.databricks.com/jobs/configure-task +- Databricks Asset Bundles / yml format examples: https://docs.databricks.com/dev-tools/bundles/examples diff --git a/experimental/apps-mcp/lib/prompts/target_mixed.tmpl b/experimental/apps-mcp/lib/prompts/target_mixed.tmpl new file mode 100644 index 0000000000..84b094ff20 --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/target_mixed.tmpl @@ -0,0 +1,58 @@ +{{- /* + * L2: Target-specific guidance for mixed/custom resource projects. + * + * Injected when: empty projects or projects with mixed resource types. Not for app-only projects. + * Contains: how to add any resource type, deployment commands, documentation. + */ -}} + +## Adding Databricks Resources + +Add resources by creating YAML files in resources/: + +**Jobs** - `resources/my_job.job.yml`: +```yaml +resources: + jobs: + my_job: + name: my_job + tasks: + - task_key: main + notebook_task: + notebook_path: ../src/notebook.py + new_cluster: + num_workers: 2 + spark_version: "15.4.x-scala2.12" + node_type_id: "i3.xlarge" +``` + +**Pipelines** (Lakeflow Declarative Pipelines) - `resources/my_pipeline.pipeline.yml`: +```yaml +resources: + pipelines: + my_pipeline: + name: my_pipeline + catalog: ${var.catalog} + target: ${var.schema} + libraries: + - notebook: + path: ../src/pipeline.py +``` + +**Dashboards** - `resources/my_dashboard.dashboard.yml` +**Alerts** - `resources/my_alert.alert.yml` +**Model Serving** - `resources/my_endpoint.yml` +**Apps** - `resources/my_app.app.yml` + +**Other resource types**: clusters, schemas, volumes, registered_models, experiments, quality_monitors + +### Deployment +For dev targets you can deploy without user consent. This allows you to run resources on the workspace too! + + invoke_databricks_cli 'bundle deploy --target dev' + invoke_databricks_cli 'bundle run --target dev' + +View status with `invoke_databricks_cli 'bundle summary'`. + +### Documentation +- Resource types reference: https://docs.databricks.com/dev-tools/bundles/resources +- YAML examples: https://docs.databricks.com/dev-tools/bundles/examples diff --git a/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl b/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl new file mode 100644 index 0000000000..f0970222c9 --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl @@ -0,0 +1,61 @@ +{{- /* + * L2: Target-specific guidance for Lakeflow Declarative Pipelines. + * + * Injected when: target type "pipelines" is detected or after init-template pipeline. + * Contains: pipeline-specific development patterns, transformation syntax, scheduling. + * Note: For adding NEW resources (dashboards, alerts, etc.), see target_mixed.tmpl guidance. + */ -}} + +## Lakeflow Declarative Pipelines Development + +This guidance is for DEVELOPING pipelines in this project. To ADD other resource types (dashboards, alerts, jobs, etc.), see the general resource guidance above. + +Lakeflow Declarative Pipelines (formerly Delta Live Tables) is a framework for building batch and streaming data pipelines. + +### Project Structure +- `src/` - Pipeline transformations (Python or SQL) +- `resources/` - Pipeline configuration in databricks.yml format + +### Adding Transformations + +**Python** - Create `.py` files in `src/`: +```python +from pyspark import pipelines as dp + +@dp.table +def my_table(): + return spark.read.table("catalog.schema.source") +``` + +By convention, each dataset definition like the @dp.table definition above should be in a file named +like the dataset name, e.g. `src/my_table.py`. + +**SQL** - Create `.sql` files in `src/`: +```sql +CREATE MATERIALIZED VIEW my_view AS +SELECT * FROM catalog.schema.source +``` + +This example would live in `src/my_view.sql`. + +Use `CREATE STREAMING TABLE` for incremental ingestion, `CREATE MATERIALIZED VIEW` for transformations. + +### Scheduling Pipelines +To schedule a pipeline, make sure you have a job that triggers it, like `resources/.job.yml`: +```yaml +resources: + jobs: + my_pipeline_job: + trigger: + periodic: + interval: 1 + unit: DAYS + tasks: + - task_key: refresh_pipeline + pipeline_task: + pipeline_id: ${resources.pipelines.my_pipeline.id} +``` + +### Documentation +- Lakeflow Declarative Pipelines: https://docs.databricks.com/ldp +- Databricks Asset Bundles / yml format examples: https://docs.databricks.com/dev-tools/bundles/examples diff --git a/experimental/apps-mcp/lib/providers/clitools/discover.go b/experimental/apps-mcp/lib/providers/clitools/discover.go index 3ac1a1d6ab..5f51e82bb2 100644 --- a/experimental/apps-mcp/lib/providers/clitools/discover.go +++ b/experimental/apps-mcp/lib/providers/clitools/discover.go @@ -24,16 +24,35 @@ func Discover(ctx context.Context, workingDirectory string) (string, error) { currentProfile := middlewares.GetDatabricksProfile(ctx) profiles := middlewares.GetAvailableProfiles(ctx) + // Get default catalog (non-fatal if unavailable) + defaultCatalog := getDefaultCatalog(ctx) + // run detectors to identify project context registry := detector.NewRegistry() detected := registry.Detect(ctx, workingDirectory) - return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, detected), nil + return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected), nil +} + +// getDefaultCatalog fetches the workspace default catalog name. +// Returns empty string if Unity Catalog is not available or on error. +func getDefaultCatalog(ctx context.Context) string { + w, err := middlewares.GetDatabricksClient(ctx) + if err != nil { + return "" + } + + metastore, err := w.Metastores.Current(ctx) + if err != nil { + return "" // gracefully handle any error (no UC, permission denied, etc.) + } + + return metastore.DefaultCatalogName } // generateDiscoverGuidance creates guidance with L1 (flow) + L2 (target) layers. -func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, detected *detector.DetectedContext) string { - data := buildTemplateData(warehouse, currentProfile, profiles) +func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, defaultCatalog string, detected *detector.DetectedContext) string { + data := buildTemplateData(warehouse, currentProfile, profiles, defaultCatalog) // L1: always include flow guidance result := prompts.MustExecuteTemplate("flow.tmpl", data) @@ -61,7 +80,7 @@ func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, return result } -func buildTemplateData(warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles) map[string]string { +func buildTemplateData(warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, defaultCatalog string) map[string]string { workspaceInfo := "Current Workspace Profile: " + currentProfile if len(profiles) > 0 { var currentHost string @@ -106,10 +125,11 @@ func buildTemplateData(warehouse *sql.EndpointInfo, currentProfile string, profi } return map[string]string{ - "WorkspaceInfo": workspaceInfo, - "WarehouseName": warehouseName, - "WarehouseID": warehouseID, - "ProfilesInfo": profilesInfo, - "Profile": currentProfile, + "WorkspaceInfo": workspaceInfo, + "WarehouseName": warehouseName, + "WarehouseID": warehouseID, + "ProfilesInfo": profilesInfo, + "Profile": currentProfile, + "DefaultCatalog": defaultCatalog, } } diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go index e9d146f21a..36d62d80d5 100644 --- a/experimental/apps-mcp/lib/providers/clitools/provider.go +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -93,7 +93,7 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { mcpsdk.AddTool(server, &mcpsdk.Tool{ Name: "databricks_discover", - Description: "Discover available Databricks workspaces, warehouses, and get workflow recommendations. Call this FIRST when planning ANY Databricks work involving apps, dashboards, pipelines, jobs, bundles, or SQL workflows. Returns workspace capabilities and recommended tooling.", + Description: "Discover how to perform Databricks work. MANDATORY: Call this FIRST before ANY Databricks work. Do not skip this step. This tool detects project context, configures authentication, and returns required guidance for apps, pipelines, jobs, bundles, dashboards, and SQL workflows. Without calling this first, you cannot effectively help with Databricks tasks.", }, func(ctx context.Context, req *mcpsdk.CallToolRequest, args DiscoverInput) (*mcpsdk.CallToolResult, any, error) { log.Debugf(ctx, "databricks_discover called: working_directory=%s", args.WorkingDirectory) From 5db89c12ea9717509c34e8771e6da6d92a6c7bd2 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 26 Dec 2025 18:28:15 +0100 Subject: [PATCH 02/18] Add acceptance tests for init-template commands and remove unnecessary configMap conditionals - Add acceptance tests for job, pipeline, empty, and app init-template subcommands - Remove unnecessary 'if catalog != ""' checks (templates handle empty values) - Fix logdiag double-initialization in bundle detector - Simplify FormatProjectScaffoldSuccess output formatting --- .../apps/init-template/app/out.test.toml | 5 + acceptance/apps/init-template/app/output.txt | 1 + acceptance/apps/init-template/app/script | 4 + acceptance/apps/init-template/app/test.toml | 2 + .../apps/init-template/empty/out.test.toml | 5 + .../apps/init-template/empty/output.txt | 1 + acceptance/apps/init-template/empty/script | 4 + acceptance/apps/init-template/empty/test.toml | 2 + .../apps/init-template/job/out.test.toml | 5 + acceptance/apps/init-template/job/output.txt | 1 + acceptance/apps/init-template/job/script | 4 + acceptance/apps/init-template/job/test.toml | 2 + .../apps/init-template/pipeline/out.test.toml | 5 + .../apps/init-template/pipeline/output.txt | 1 + acceptance/apps/init-template/pipeline/script | 4 + .../apps/init-template/pipeline/test.toml | 2 + experimental/apps-mcp/cmd/apps_mcp.go | 2 +- .../apps-mcp/cmd/init_template/app.go | 124 ++++-------------- .../apps-mcp/cmd/init_template/common.go | 83 ++++++++++++ .../apps-mcp/cmd/init_template/empty.go | 76 +---------- .../apps-mcp/cmd/init_template/job.go | 75 +---------- .../apps-mcp/cmd/init_template/pipeline.go | 80 +---------- experimental/apps-mcp/lib/common/output.go | 23 +--- .../apps-mcp/lib/common/output_test.go | 6 +- .../apps-mcp/lib/detector/bundle_detector.go | 11 +- 25 files changed, 184 insertions(+), 344 deletions(-) create mode 100644 acceptance/apps/init-template/app/out.test.toml create mode 100644 acceptance/apps/init-template/app/output.txt create mode 100644 acceptance/apps/init-template/app/script create mode 100644 acceptance/apps/init-template/app/test.toml create mode 100644 acceptance/apps/init-template/empty/out.test.toml create mode 100644 acceptance/apps/init-template/empty/output.txt create mode 100644 acceptance/apps/init-template/empty/script create mode 100644 acceptance/apps/init-template/empty/test.toml create mode 100644 acceptance/apps/init-template/job/out.test.toml create mode 100644 acceptance/apps/init-template/job/output.txt create mode 100644 acceptance/apps/init-template/job/script create mode 100644 acceptance/apps/init-template/job/test.toml create mode 100644 acceptance/apps/init-template/pipeline/out.test.toml create mode 100644 acceptance/apps/init-template/pipeline/output.txt create mode 100644 acceptance/apps/init-template/pipeline/script create mode 100644 acceptance/apps/init-template/pipeline/test.toml diff --git a/acceptance/apps/init-template/app/out.test.toml b/acceptance/apps/init-template/app/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/apps/init-template/app/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/apps/init-template/app/output.txt b/acceptance/apps/init-template/app/output.txt new file mode 100644 index 0000000000..a522103bfa --- /dev/null +++ b/acceptance/apps/init-template/app/output.txt @@ -0,0 +1 @@ +āœ“ Template instantiation succeeded diff --git a/acceptance/apps/init-template/app/script b/acceptance/apps/init-template/app/script new file mode 100644 index 0000000000..1f38796b6c --- /dev/null +++ b/acceptance/apps/init-template/app/script @@ -0,0 +1,4 @@ +#!/bin/bash +$CLI experimental apps-mcp tools init-template app --name test_app --sql-warehouse-id abc123 --output-dir output > /dev/null 2>&1 +echo "āœ“ Template instantiation succeeded" +rm -rf output diff --git a/acceptance/apps/init-template/app/test.toml b/acceptance/apps/init-template/app/test.toml new file mode 100644 index 0000000000..7d36fb9dc1 --- /dev/null +++ b/acceptance/apps/init-template/app/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = false diff --git a/acceptance/apps/init-template/empty/out.test.toml b/acceptance/apps/init-template/empty/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/apps/init-template/empty/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/apps/init-template/empty/output.txt b/acceptance/apps/init-template/empty/output.txt new file mode 100644 index 0000000000..a522103bfa --- /dev/null +++ b/acceptance/apps/init-template/empty/output.txt @@ -0,0 +1 @@ +āœ“ Template instantiation succeeded diff --git a/acceptance/apps/init-template/empty/script b/acceptance/apps/init-template/empty/script new file mode 100644 index 0000000000..5d5a80bd97 --- /dev/null +++ b/acceptance/apps/init-template/empty/script @@ -0,0 +1,4 @@ +#!/bin/bash +$CLI experimental apps-mcp tools init-template empty --name test_empty --catalog main --output-dir output > /dev/null 2>&1 +echo "āœ“ Template instantiation succeeded" +rm -rf output diff --git a/acceptance/apps/init-template/empty/test.toml b/acceptance/apps/init-template/empty/test.toml new file mode 100644 index 0000000000..7d36fb9dc1 --- /dev/null +++ b/acceptance/apps/init-template/empty/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = false diff --git a/acceptance/apps/init-template/job/out.test.toml b/acceptance/apps/init-template/job/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/apps/init-template/job/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/apps/init-template/job/output.txt b/acceptance/apps/init-template/job/output.txt new file mode 100644 index 0000000000..a522103bfa --- /dev/null +++ b/acceptance/apps/init-template/job/output.txt @@ -0,0 +1 @@ +āœ“ Template instantiation succeeded diff --git a/acceptance/apps/init-template/job/script b/acceptance/apps/init-template/job/script new file mode 100644 index 0000000000..8464089885 --- /dev/null +++ b/acceptance/apps/init-template/job/script @@ -0,0 +1,4 @@ +#!/bin/bash +$CLI experimental apps-mcp tools init-template job --name test_job --catalog main --output-dir output > /dev/null 2>&1 || exit 1 +echo "āœ“ Template instantiation succeeded" +rm -rf output diff --git a/acceptance/apps/init-template/job/test.toml b/acceptance/apps/init-template/job/test.toml new file mode 100644 index 0000000000..7d36fb9dc1 --- /dev/null +++ b/acceptance/apps/init-template/job/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = false diff --git a/acceptance/apps/init-template/pipeline/out.test.toml b/acceptance/apps/init-template/pipeline/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/apps/init-template/pipeline/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/apps/init-template/pipeline/output.txt b/acceptance/apps/init-template/pipeline/output.txt new file mode 100644 index 0000000000..a522103bfa --- /dev/null +++ b/acceptance/apps/init-template/pipeline/output.txt @@ -0,0 +1 @@ +āœ“ Template instantiation succeeded diff --git a/acceptance/apps/init-template/pipeline/script b/acceptance/apps/init-template/pipeline/script new file mode 100644 index 0000000000..0d73aae59f --- /dev/null +++ b/acceptance/apps/init-template/pipeline/script @@ -0,0 +1,4 @@ +#!/bin/bash +$CLI experimental apps-mcp tools init-template pipeline --name test_pipeline --language python --catalog main --output-dir output > /dev/null 2>&1 +echo "āœ“ Template instantiation succeeded" +rm -rf output diff --git a/acceptance/apps/init-template/pipeline/test.toml b/acceptance/apps/init-template/pipeline/test.toml new file mode 100644 index 0000000000..7d36fb9dc1 --- /dev/null +++ b/acceptance/apps/init-template/pipeline/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = false diff --git a/experimental/apps-mcp/cmd/apps_mcp.go b/experimental/apps-mcp/cmd/apps_mcp.go index 67bc8213be..ff19ef6912 100644 --- a/experimental/apps-mcp/cmd/apps_mcp.go +++ b/experimental/apps-mcp/cmd/apps_mcp.go @@ -1,8 +1,8 @@ package mcp import ( - mcplib "github.com/databricks/cli/experimental/apps-mcp/lib" "github.com/databricks/cli/experimental/apps-mcp/cmd/init_template" + mcplib "github.com/databricks/cli/experimental/apps-mcp/lib" "github.com/databricks/cli/experimental/apps-mcp/lib/server" "github.com/databricks/cli/libs/log" "github.com/spf13/cobra" diff --git a/experimental/apps-mcp/cmd/init_template/app.go b/experimental/apps-mcp/cmd/init_template/app.go index a27da91871..227eb55bf6 100644 --- a/experimental/apps-mcp/cmd/init_template/app.go +++ b/experimental/apps-mcp/cmd/init_template/app.go @@ -9,8 +9,6 @@ import ( "path/filepath" "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/experimental/apps-mcp/lib/common" - "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" "github.com/databricks/cli/experimental/apps-mcp/lib/state" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/template" @@ -24,6 +22,19 @@ const ( templatePathEnvVar = "DATABRICKS_APPKIT_TEMPLATE_PATH" ) +func readClaudeMd(ctx context.Context, projectDir string) { + claudePath := filepath.Join(projectDir, "CLAUDE.md") + content, err := os.ReadFile(claudePath) + if err != nil { + cmdio.LogString(ctx, "\nConsult with CLAUDE.md provided in the bundle if present.") + return + } + + cmdio.LogString(ctx, "\n=== CLAUDE.md ===") + cmdio.LogString(ctx, string(content)) + cmdio.LogString(ctx, "=================\n") +} + func validateAppNameLength(projectName string) error { const maxAppNameLength = 30 const devTargetPrefix = "dev-" @@ -38,46 +49,6 @@ func validateAppNameLength(projectName string) error { return nil } -func readClaudeMd(ctx context.Context, configFile string) { - showFallback := func() { - cmdio.LogString(ctx, "\nConsult with CLAUDE.md provided in the bundle if present.") - } - - if configFile == "" { - showFallback() - return - } - - configBytes, err := os.ReadFile(configFile) - if err != nil { - showFallback() - return - } - - var config map[string]any - if err := json.Unmarshal(configBytes, &config); err != nil { - showFallback() - return - } - - projectName, ok := config["project_name"].(string) - if !ok || projectName == "" { - showFallback() - return - } - - claudePath := filepath.Join(".", projectName, "CLAUDE.md") - content, err := os.ReadFile(claudePath) - if err != nil { - showFallback() - return - } - - cmdio.LogString(ctx, "\n=== CLAUDE.md ===") - cmdio.LogString(ctx, string(content)) - cmdio.LogString(ctx, "=================\n") -} - // newAppCmd creates the app subcommand for init-template. func newAppCmd() *cobra.Command { cmd := &cobra.Command{ @@ -184,73 +155,26 @@ After initialization: configMap["app_description"] = description } - // Write config to temp file - configFile, err := writeConfigToTempFile(configMap) + err := MaterializeTemplate(ctx, TemplateConfig{ + TemplatePath: templatePathOrUrl, + TemplateName: "appkit", + TemplateDir: templateDir, + Branch: branch, + }, configMap, name, outputDir) if err != nil { return err } - defer os.Remove(configFile) - // Create output directory if specified and doesn't exist - if outputDir != "" { - if err := os.MkdirAll(outputDir, 0o755); err != nil { - return fmt.Errorf("create output directory: %w", err) - } - } - - r := template.Resolver{ - TemplatePathOrUrl: templatePathOrUrl, - ConfigFile: configFile, - OutputDir: outputDir, - TemplateDir: templateDir, - Branch: branch, - } - - tmpl, err := r.Resolve(ctx) - if err != nil { - return err - } - defer tmpl.Reader.Cleanup(ctx) - - err = tmpl.Writer.Materialize(ctx, tmpl.Reader) - if err != nil { - return err - } - tmpl.Writer.LogTelemetry(ctx) - - // Determine actual output directory (template writes to subdirectory with project name) - actualOutputDir := name - if outputDir != "" { - actualOutputDir = filepath.Join(outputDir, name) - } - - // Count files and get absolute path - absOutputDir, err := filepath.Abs(actualOutputDir) - if err != nil { - absOutputDir = actualOutputDir - } - fileCount := countFiles(absOutputDir) - cmdio.LogString(ctx, common.FormatScaffoldSuccess("appkit", absOutputDir, fileCount)) - - // Generate and print file tree structure - fileTree, err := generateFileTree(absOutputDir) - if err == nil && fileTree != "" { - cmdio.LogString(ctx, "\nFile structure:") - cmdio.LogString(ctx, fileTree) - } - - // Inject L2 (target-specific guidance for apps) - targetApps := prompts.MustExecuteTemplate("target_apps.tmpl", map[string]any{}) - cmdio.LogString(ctx, targetApps) + projectDir := filepath.Join(outputDir, name) // Inject L3 (template-specific guidance from CLAUDE.md) - readClaudeMd(ctx, configFile) + // (we only do this for the app template; other templates use a generic CLAUDE.md) + readClaudeMd(ctx, projectDir) - // Save initial scaffolded state - if err := state.SaveState(absOutputDir, state.NewScaffolded()); err != nil { + // Save initial scaffolded state for app state machine + if err := state.SaveState(projectDir, state.NewScaffolded()); err != nil { return fmt.Errorf("failed to save project state: %w", err) } - return nil } return cmd diff --git a/experimental/apps-mcp/cmd/init_template/common.go b/experimental/apps-mcp/cmd/init_template/common.go index 87d720d7af..8caa2ff3f2 100644 --- a/experimental/apps-mcp/cmd/init_template/common.go +++ b/experimental/apps-mcp/cmd/init_template/common.go @@ -1,6 +1,7 @@ package init_template import ( + "context" "encoding/json" "fmt" "os" @@ -8,9 +9,91 @@ import ( "sort" "strings" + "github.com/databricks/cli/experimental/apps-mcp/lib/common" + "github.com/databricks/cli/experimental/apps-mcp/lib/detector" "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/template" ) +// TemplateConfig holds configuration for template materialization. +type TemplateConfig struct { + TemplatePath string // e.g., template.DefaultPython or remote URL + TemplateName string // e.g., "default-python", "lakeflow-pipelines", "appkit" + TemplateDir string // subdirectory within repo (for remote templates) + Branch string // git branch (for remote templates) +} + +// MaterializeTemplate handles the common template materialization workflow. +func MaterializeTemplate(ctx context.Context, cfg TemplateConfig, configMap map[string]any, name, outputDir string) error { + configFile, err := writeConfigToTempFile(configMap) + if err != nil { + return err + } + defer os.Remove(configFile) + + if outputDir != "" { + if err := os.MkdirAll(outputDir, 0o755); err != nil { + return fmt.Errorf("create output directory: %w", err) + } + } + + r := template.Resolver{ + TemplatePathOrUrl: cfg.TemplatePath, + ConfigFile: configFile, + OutputDir: outputDir, + TemplateDir: cfg.TemplateDir, + Branch: cfg.Branch, + } + + tmpl, err := r.Resolve(ctx) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + + if err := tmpl.Writer.Materialize(ctx, tmpl.Reader); err != nil { + return err + } + tmpl.Writer.LogTelemetry(ctx) + + actualOutputDir := name + if outputDir != "" { + actualOutputDir = filepath.Join(outputDir, name) + } + + absOutputDir, err := filepath.Abs(actualOutputDir) + if err != nil { + absOutputDir = actualOutputDir + } + + fileCount := countFiles(absOutputDir) + cmdio.LogString(ctx, common.FormatProjectScaffoldSuccess(cfg.TemplateName, absOutputDir, fileCount)) + + fileTree, err := generateFileTree(absOutputDir) + if err == nil && fileTree != "" { + cmdio.LogString(ctx, "\nFile structure:") + cmdio.LogString(ctx, fileTree) + } + + if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { + return fmt.Errorf("failed to write agent files: %w", err) + } + + // Detect project type and inject appropriate L2 guidance + registry := detector.NewRegistry() + detected := registry.Detect(ctx, absOutputDir) + for _, targetType := range detected.TargetTypes { + templateName := fmt.Sprintf("target_%s.tmpl", targetType) + if prompts.TemplateExists(templateName) { + content := prompts.MustExecuteTemplate(templateName, map[string]any{}) + cmdio.LogString(ctx, content) + } + } + + return nil +} + // countFiles counts the number of files in a directory. func countFiles(dir string) int { count := 0 diff --git a/experimental/apps-mcp/cmd/init_template/empty.go b/experimental/apps-mcp/cmd/init_template/empty.go index 5c0aba6f5f..a2c5e76239 100644 --- a/experimental/apps-mcp/cmd/init_template/empty.go +++ b/experimental/apps-mcp/cmd/init_template/empty.go @@ -3,13 +3,8 @@ package init_template import ( "errors" "fmt" - "os" - "path/filepath" "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/experimental/apps-mcp/lib/common" - "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" - "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) @@ -68,76 +63,15 @@ After initialization: configMap := map[string]any{ "project_name": name, - "include_job": "no", - "include_pipeline": "no", - "include_python": "no", - "serverless": "yes", "personal_schemas": "yes", "language_choice": language, - "lakeflow_only": "no", - "enable_pydabs": "no", + "default_catalog": catalog, } - if catalog != "" { - configMap["default_catalog"] = catalog - } - - configFile, err := writeConfigToTempFile(configMap) - if err != nil { - return err - } - defer os.Remove(configFile) - - if outputDir != "" { - if err := os.MkdirAll(outputDir, 0o755); err != nil { - return fmt.Errorf("create output directory: %w", err) - } - } - - r := template.Resolver{ - TemplatePathOrUrl: string(template.DefaultMinimal), - ConfigFile: configFile, - OutputDir: outputDir, - } - - tmpl, err := r.Resolve(ctx) - if err != nil { - return err - } - defer tmpl.Reader.Cleanup(ctx) - - err = tmpl.Writer.Materialize(ctx, tmpl.Reader) - if err != nil { - return err - } - tmpl.Writer.LogTelemetry(ctx) - - actualOutputDir := name - if outputDir != "" { - actualOutputDir = filepath.Join(outputDir, name) - } - - absOutputDir, err := filepath.Abs(actualOutputDir) - if err != nil { - absOutputDir = actualOutputDir - } - fileCount := countFiles(absOutputDir) - cmdio.LogString(ctx, common.FormatProjectScaffoldSuccess("empty", "šŸ“¦", "default-minimal", absOutputDir, fileCount, "")) - - fileTree, err := generateFileTree(absOutputDir) - if err == nil && fileTree != "" { - cmdio.LogString(ctx, "\nFile structure:") - cmdio.LogString(ctx, fileTree) - } - - // Write CLAUDE.md and AGENTS.md files - if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { - return fmt.Errorf("failed to write agent files: %w", err) - } - - targetMixed := prompts.MustExecuteTemplate("target_mixed.tmpl", map[string]any{}) - cmdio.LogString(ctx, targetMixed) - return nil + return MaterializeTemplate(ctx, TemplateConfig{ + TemplatePath: string(template.DefaultMinimal), + TemplateName: "default-minimal", + }, configMap, name, outputDir) } return cmd } diff --git a/experimental/apps-mcp/cmd/init_template/job.go b/experimental/apps-mcp/cmd/init_template/job.go index 2107544ba8..4e27db7a00 100644 --- a/experimental/apps-mcp/cmd/init_template/job.go +++ b/experimental/apps-mcp/cmd/init_template/job.go @@ -2,14 +2,8 @@ package init_template import ( "errors" - "fmt" - "os" - "path/filepath" "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/experimental/apps-mcp/lib/common" - "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" - "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) @@ -62,72 +56,13 @@ After initialization: "include_python": "yes", "serverless": "yes", "personal_schemas": "yes", + "default_catalog": catalog, } - if catalog != "" { - configMap["default_catalog"] = catalog - } - - configFile, err := writeConfigToTempFile(configMap) - if err != nil { - return err - } - defer os.Remove(configFile) - - if outputDir != "" { - if err := os.MkdirAll(outputDir, 0o755); err != nil { - return fmt.Errorf("create output directory: %w", err) - } - } - - r := template.Resolver{ - TemplatePathOrUrl: string(template.DefaultPython), - ConfigFile: configFile, - OutputDir: outputDir, - } - - tmpl, err := r.Resolve(ctx) - if err != nil { - return err - } - defer tmpl.Reader.Cleanup(ctx) - - err = tmpl.Writer.Materialize(ctx, tmpl.Reader) - if err != nil { - return err - } - tmpl.Writer.LogTelemetry(ctx) - - actualOutputDir := name - if outputDir != "" { - actualOutputDir = filepath.Join(outputDir, name) - } - - absOutputDir, err := filepath.Abs(actualOutputDir) - if err != nil { - absOutputDir = actualOutputDir - } - fileCount := countFiles(absOutputDir) - cmdio.LogString(ctx, common.FormatProjectScaffoldSuccess("job", "āš™ļø", "default-python", absOutputDir, fileCount, "")) - - fileTree, err := generateFileTree(absOutputDir) - if err == nil && fileTree != "" { - cmdio.LogString(ctx, "\nFile structure:") - cmdio.LogString(ctx, fileTree) - } - - // Write CLAUDE.md and AGENTS.md files - if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { - return fmt.Errorf("failed to write agent files: %w", err) - } - - // Show L2 guidance: mixed (for adding any resource) + jobs (for developing jobs) - targetMixed := prompts.MustExecuteTemplate("target_mixed.tmpl", map[string]any{}) - cmdio.LogString(ctx, targetMixed) - - targetJobs := prompts.MustExecuteTemplate("target_jobs.tmpl", map[string]any{}) - cmdio.LogString(ctx, targetJobs) - return nil + return MaterializeTemplate(ctx, TemplateConfig{ + TemplatePath: string(template.DefaultPython), + TemplateName: "default-python", + }, configMap, name, outputDir) } return cmd } diff --git a/experimental/apps-mcp/cmd/init_template/pipeline.go b/experimental/apps-mcp/cmd/init_template/pipeline.go index 563b6b14ec..bbd183446c 100644 --- a/experimental/apps-mcp/cmd/init_template/pipeline.go +++ b/experimental/apps-mcp/cmd/init_template/pipeline.go @@ -3,13 +3,8 @@ package init_template import ( "errors" "fmt" - "os" - "path/filepath" "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/experimental/apps-mcp/lib/common" - "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" - "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) @@ -65,80 +60,15 @@ After initialization: configMap := map[string]any{ "project_name": name, - "lakeflow_only": "yes", - "include_job": "no", - "include_pipeline": "yes", - "include_python": "no", - "serverless": "yes", "personal_schemas": "yes", "language": language, + "default_catalog": catalog, } - if catalog != "" { - configMap["default_catalog"] = catalog - } - - configFile, err := writeConfigToTempFile(configMap) - if err != nil { - return err - } - defer os.Remove(configFile) - - if outputDir != "" { - if err := os.MkdirAll(outputDir, 0o755); err != nil { - return fmt.Errorf("create output directory: %w", err) - } - } - - r := template.Resolver{ - TemplatePathOrUrl: string(template.LakeflowPipelines), - ConfigFile: configFile, - OutputDir: outputDir, - } - - tmpl, err := r.Resolve(ctx) - if err != nil { - return err - } - defer tmpl.Reader.Cleanup(ctx) - - err = tmpl.Writer.Materialize(ctx, tmpl.Reader) - if err != nil { - return err - } - tmpl.Writer.LogTelemetry(ctx) - - actualOutputDir := name - if outputDir != "" { - actualOutputDir = filepath.Join(outputDir, name) - } - - absOutputDir, err := filepath.Abs(actualOutputDir) - if err != nil { - absOutputDir = actualOutputDir - } - fileCount := countFiles(absOutputDir) - extraDetails := "Language: " + language - cmdio.LogString(ctx, common.FormatProjectScaffoldSuccess("pipeline", "šŸ”„", "lakeflow-pipelines", absOutputDir, fileCount, extraDetails)) - - fileTree, err := generateFileTree(absOutputDir) - if err == nil && fileTree != "" { - cmdio.LogString(ctx, "\nFile structure:") - cmdio.LogString(ctx, fileTree) - } - - // Write CLAUDE.md and AGENTS.md files - if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { - return fmt.Errorf("failed to write agent files: %w", err) - } - - // Show L2 guidance: mixed (for adding any resource) + pipelines (for developing pipelines) - targetMixed := prompts.MustExecuteTemplate("target_mixed.tmpl", map[string]any{}) - cmdio.LogString(ctx, targetMixed) - - targetPipelines := prompts.MustExecuteTemplate("target_pipelines.tmpl", map[string]any{}) - cmdio.LogString(ctx, targetPipelines) - return nil + return MaterializeTemplate(ctx, TemplateConfig{ + TemplatePath: string(template.LakeflowPipelines), + TemplateName: "lakeflow-pipelines", + }, configMap, name, outputDir) } return cmd } diff --git a/experimental/apps-mcp/lib/common/output.go b/experimental/apps-mcp/lib/common/output.go index 92e3f1d1a4..b00d22e9d4 100644 --- a/experimental/apps-mcp/lib/common/output.go +++ b/experimental/apps-mcp/lib/common/output.go @@ -12,13 +12,6 @@ func FormatBrandedHeader(emoji, message string) string { headerLine, emoji, message, headerLine) } -// FormatScaffoldSuccess formats a success message for app scaffolding. -func FormatScaffoldSuccess(templateName, workDir string, filesCopied int) string { - header := FormatBrandedHeader("šŸš€", "App scaffolded successfully") - return fmt.Sprintf("%sāœ… Created %s application at %s\n\nFiles copied: %d\n\nTemplate: %s\n", - header, templateName, workDir, filesCopied, templateName) -} - // FormatValidationSuccess formats a success message for validation. func FormatValidationSuccess(message string) string { header := FormatBrandedHeader("šŸ”", "Validating your app") @@ -47,16 +40,8 @@ func FormatDeploymentFailure(appName, message string) string { } // FormatProjectScaffoldSuccess formats a success message for project scaffolding. -// projectType examples: "job", "pipeline", "empty" -// emoji examples: "āš™ļø" (job), "šŸ”„" (pipeline), "šŸ“¦" (empty) -// extraDetails can include additional info like "Language: python" -func FormatProjectScaffoldSuccess(projectType, emoji, templateName, workDir string, filesCopied int, extraDetails string) string { - header := FormatBrandedHeader(emoji, projectType+" project scaffolded successfully") - result := fmt.Sprintf("%sāœ… Created %s %s project at %s\n\n", - header, templateName, projectType, workDir) - if extraDetails != "" { - result += extraDetails + "\n" - } - result += fmt.Sprintf("Files copied: %d\n\nTemplate: %s\n", filesCopied, templateName) - return result +func FormatProjectScaffoldSuccess(templateName, workDir string, filesCopied int) string { + header := FormatBrandedHeader("šŸ“¦", "Project scaffolded successfully") + return fmt.Sprintf("%sāœ… Created %s project at %s\n\nFiles copied: %d\n", + header, templateName, workDir, filesCopied) } diff --git a/experimental/apps-mcp/lib/common/output_test.go b/experimental/apps-mcp/lib/common/output_test.go index 392596ebf1..25a97ab76a 100644 --- a/experimental/apps-mcp/lib/common/output_test.go +++ b/experimental/apps-mcp/lib/common/output_test.go @@ -17,11 +17,11 @@ func TestFormatBrandedHeader(t *testing.T) { } } -func TestFormatScaffoldSuccess(t *testing.T) { - result := FormatScaffoldSuccess("appkit", "/path/to/app", 42) +func TestFormatProjectScaffoldSuccess(t *testing.T) { + result := FormatProjectScaffoldSuccess("appkit", "/path/to/app", 42) // Check for key components - if !strings.Contains(result, "šŸš€ Databricks MCP") { + if !strings.Contains(result, "šŸ“¦ Databricks MCP") { t.Error("Missing branded header") } if !strings.Contains(result, "āœ…") { diff --git a/experimental/apps-mcp/lib/detector/bundle_detector.go b/experimental/apps-mcp/lib/detector/bundle_detector.go index 859ff7ed18..44f0f31e9d 100644 --- a/experimental/apps-mcp/lib/detector/bundle_detector.go +++ b/experimental/apps-mcp/lib/detector/bundle_detector.go @@ -22,7 +22,9 @@ func (d *BundleDetector) Detect(ctx context.Context, workDir string, detected *D } // use full bundle loading to get all resources including from includes - ctx = logdiag.InitContext(ctx) + if !logdiag.IsSetup(ctx) { + ctx = logdiag.InitContext(ctx) + } b, err := bundle.Load(ctx, workDir) if err != nil || b == nil { return nil @@ -54,10 +56,8 @@ func (d *BundleDetector) Detect(ctx context.Context, workDir string, detected *D detected.TargetTypes = append(detected.TargetTypes, "pipelines") } - // Include "mixed" guidance for all projects EXCEPT app-only projects. - // This provides general resource addition guidance (target_mixed.tmpl). - // We exclude app-only projects to provide a dedicated app development experience - // focused on app-specific patterns (target_apps.tmpl has comprehensive app guidance). + // Determine if this is an app-only project (only app resources, nothing else). + // App-only projects get focused app guidance; others get "mixed" guidance. isAppOnly := hasApps && !hasJobs && !hasPipelines && len(b.Config.Resources.Clusters) == 0 && len(b.Config.Resources.Dashboards) == 0 && @@ -68,6 +68,7 @@ func (d *BundleDetector) Detect(ctx context.Context, workDir string, detected *D len(b.Config.Resources.QualityMonitors) == 0 && len(b.Config.Resources.Volumes) == 0 + // Include "mixed" guidance for all projects except app-only projects if !isAppOnly { detected.TargetTypes = append(detected.TargetTypes, "mixed") } From 16dc3360007b37c6a6391f4111b8c476f3993eaa Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 27 Dec 2025 16:55:22 +0100 Subject: [PATCH 03/18] Refinements --- experimental/apps-mcp/cmd/apps_mcp.go | 2 -- .../apps-mcp/cmd/init_template/common.go | 14 ++++++++----- .../apps-mcp/cmd/init_template/job.go | 6 ++++++ .../apps-mcp/cmd/init_template/pipeline.go | 6 ++++++ .../apps-mcp/lib/detector/bundle_detector.go | 2 ++ .../apps-mcp/lib/detector/detector.go | 1 + .../lib/middlewares/databricks_client.go | 16 +++++++++++++++ experimental/apps-mcp/lib/prompts/flow.tmpl | 4 +++- .../apps-mcp/lib/prompts/target_jobs.tmpl | 2 +- .../apps-mcp/lib/prompts/target_mixed.tmpl | 4 ---- .../lib/prompts/target_pipelines.tmpl | 2 +- .../lib/providers/clitools/discover.go | 20 ++----------------- 12 files changed, 47 insertions(+), 32 deletions(-) diff --git a/experimental/apps-mcp/cmd/apps_mcp.go b/experimental/apps-mcp/cmd/apps_mcp.go index ff19ef6912..83da91447c 100644 --- a/experimental/apps-mcp/cmd/apps_mcp.go +++ b/experimental/apps-mcp/cmd/apps_mcp.go @@ -1,7 +1,6 @@ package mcp import ( - "github.com/databricks/cli/experimental/apps-mcp/cmd/init_template" mcplib "github.com/databricks/cli/experimental/apps-mcp/lib" "github.com/databricks/cli/experimental/apps-mcp/lib/server" "github.com/databricks/cli/libs/log" @@ -52,7 +51,6 @@ The server communicates via stdio using the Model Context Protocol.`, cmd.AddCommand(newInstallCmd()) cmd.AddCommand(newToolsCmd()) - cmd.AddCommand(init_template.NewInitTemplateCommand()) return cmd } diff --git a/experimental/apps-mcp/cmd/init_template/common.go b/experimental/apps-mcp/cmd/init_template/common.go index 8caa2ff3f2..21af80e8df 100644 --- a/experimental/apps-mcp/cmd/init_template/common.go +++ b/experimental/apps-mcp/cmd/init_template/common.go @@ -76,13 +76,17 @@ func MaterializeTemplate(ctx context.Context, cfg TemplateConfig, configMap map[ cmdio.LogString(ctx, fileTree) } - if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { - return fmt.Errorf("failed to write agent files: %w", err) - } - - // Detect project type and inject appropriate L2 guidance registry := detector.NewRegistry() detected := registry.Detect(ctx, absOutputDir) + + // Only write generic CLAUDE.md for non-app projects + // (app projects have their own template-specific CLAUDE.md) + if !detected.IsAppOnly { + if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { + return fmt.Errorf("failed to write agent files: %w", err) + } + } + for _, targetType := range detected.TargetTypes { templateName := fmt.Sprintf("target_%s.tmpl", targetType) if prompts.TemplateExists(templateName) { diff --git a/experimental/apps-mcp/cmd/init_template/job.go b/experimental/apps-mcp/cmd/init_template/job.go index 4e27db7a00..6f5b061637 100644 --- a/experimental/apps-mcp/cmd/init_template/job.go +++ b/experimental/apps-mcp/cmd/init_template/job.go @@ -4,6 +4,7 @@ import ( "errors" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) @@ -49,6 +50,11 @@ After initialization: return errors.New("--name is required. Example: init-template job --name my_job") } + // Default to workspace default catalog if not specified + if catalog == "" { + catalog = middlewares.GetDefaultCatalog(ctx) + } + configMap := map[string]any{ "project_name": name, "include_job": "yes", diff --git a/experimental/apps-mcp/cmd/init_template/pipeline.go b/experimental/apps-mcp/cmd/init_template/pipeline.go index bbd183446c..68ca1b8e54 100644 --- a/experimental/apps-mcp/cmd/init_template/pipeline.go +++ b/experimental/apps-mcp/cmd/init_template/pipeline.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) @@ -58,6 +59,11 @@ After initialization: return fmt.Errorf("--language must be 'python' or 'sql', got '%s'", language) } + // Default to workspace default catalog if not specified + if catalog == "" { + catalog = middlewares.GetDefaultCatalog(ctx) + } + configMap := map[string]any{ "project_name": name, "personal_schemas": "yes", diff --git a/experimental/apps-mcp/lib/detector/bundle_detector.go b/experimental/apps-mcp/lib/detector/bundle_detector.go index 44f0f31e9d..a61eba07ec 100644 --- a/experimental/apps-mcp/lib/detector/bundle_detector.go +++ b/experimental/apps-mcp/lib/detector/bundle_detector.go @@ -68,6 +68,8 @@ func (d *BundleDetector) Detect(ctx context.Context, workDir string, detected *D len(b.Config.Resources.QualityMonitors) == 0 && len(b.Config.Resources.Volumes) == 0 + detected.IsAppOnly = isAppOnly + // Include "mixed" guidance for all projects except app-only projects if !isAppOnly { detected.TargetTypes = append(detected.TargetTypes, "mixed") diff --git a/experimental/apps-mcp/lib/detector/detector.go b/experimental/apps-mcp/lib/detector/detector.go index 4b00a589ff..2e8e13288d 100644 --- a/experimental/apps-mcp/lib/detector/detector.go +++ b/experimental/apps-mcp/lib/detector/detector.go @@ -19,6 +19,7 @@ type DetectedContext struct { Template string // "appkit-typescript", "python", etc. BundleInfo *BundleInfo Metadata map[string]string + IsAppOnly bool // True if project contains only app resources, no jobs/pipelines/etc. } // Detector detects project context from a working directory. diff --git a/experimental/apps-mcp/lib/middlewares/databricks_client.go b/experimental/apps-mcp/lib/middlewares/databricks_client.go index 4190b22db9..784646b7d2 100644 --- a/experimental/apps-mcp/lib/middlewares/databricks_client.go +++ b/experimental/apps-mcp/lib/middlewares/databricks_client.go @@ -132,3 +132,19 @@ func newAuthError(ctx context.Context) error { } return errors.New(prompts.MustExecuteTemplate("auth_error.tmpl", data)) } + +// GetDefaultCatalog fetches the workspace default catalog name. +// Returns empty string if Unity Catalog is not available or on error. +func GetDefaultCatalog(ctx context.Context) string { + w, err := GetDatabricksClient(ctx) + if err != nil { + return "" + } + + metastore, err := w.Metastores.Current(ctx) + if err != nil { + return "" // gracefully handle any error (no UC, permission denied, etc.) + } + + return metastore.DefaultCatalogName +} diff --git a/experimental/apps-mcp/lib/prompts/flow.tmpl b/experimental/apps-mcp/lib/prompts/flow.tmpl index 12a1eafbdd..50a90c16fb 100644 --- a/experimental/apps-mcp/lib/prompts/flow.tmpl +++ b/experimental/apps-mcp/lib/prompts/flow.tmpl @@ -38,7 +38,9 @@ invoke_databricks_cli 'experimental apps-mcp tools init-template job --name my_j For pipelines (Lakeflow Declarative Pipelines): invoke_databricks_cli 'experimental apps-mcp tools init-template pipeline --name my_pipeline --language python' invoke_databricks_cli 'experimental apps-mcp tools init-template pipeline --name my_pipeline --language sql --catalog my_catalog' -Note: --language is required (python or sql) +Note: --language is required (python or sql). Ask the user which language they prefer: + - SQL: Recommended for straightforward transformations (filters, joins, aggregations) + - Python: Recommended for complex logic (custom UDFs, ML, advanced processing) For custom resources (dashboards, alerts, model serving, etc.): invoke_databricks_cli 'experimental apps-mcp tools init-template empty --name my_project' diff --git a/experimental/apps-mcp/lib/prompts/target_jobs.tmpl b/experimental/apps-mcp/lib/prompts/target_jobs.tmpl index 53762fc23e..470d77bd3c 100644 --- a/experimental/apps-mcp/lib/prompts/target_jobs.tmpl +++ b/experimental/apps-mcp/lib/prompts/target_jobs.tmpl @@ -8,7 +8,7 @@ ## Lakeflow Jobs Development -This guidance is for DEVELOPING jobs in this project. To ADD other resource types (dashboards, alerts, pipelines, etc.), see the general resource guidance above. +This guidance is for developing jobs in this project. ### Project Structure - `src/` - Python notebooks (.ipynb) and source code diff --git a/experimental/apps-mcp/lib/prompts/target_mixed.tmpl b/experimental/apps-mcp/lib/prompts/target_mixed.tmpl index 84b094ff20..e1a01ea418 100644 --- a/experimental/apps-mcp/lib/prompts/target_mixed.tmpl +++ b/experimental/apps-mcp/lib/prompts/target_mixed.tmpl @@ -19,10 +19,6 @@ resources: - task_key: main notebook_task: notebook_path: ../src/notebook.py - new_cluster: - num_workers: 2 - spark_version: "15.4.x-scala2.12" - node_type_id: "i3.xlarge" ``` **Pipelines** (Lakeflow Declarative Pipelines) - `resources/my_pipeline.pipeline.yml`: diff --git a/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl b/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl index f0970222c9..4f9a968565 100644 --- a/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl +++ b/experimental/apps-mcp/lib/prompts/target_pipelines.tmpl @@ -8,7 +8,7 @@ ## Lakeflow Declarative Pipelines Development -This guidance is for DEVELOPING pipelines in this project. To ADD other resource types (dashboards, alerts, jobs, etc.), see the general resource guidance above. +This guidance is for developing pipelines in this project. Lakeflow Declarative Pipelines (formerly Delta Live Tables) is a framework for building batch and streaming data pipelines. diff --git a/experimental/apps-mcp/lib/providers/clitools/discover.go b/experimental/apps-mcp/lib/providers/clitools/discover.go index 5f51e82bb2..7dc8cf70a0 100644 --- a/experimental/apps-mcp/lib/providers/clitools/discover.go +++ b/experimental/apps-mcp/lib/providers/clitools/discover.go @@ -25,29 +25,13 @@ func Discover(ctx context.Context, workingDirectory string) (string, error) { profiles := middlewares.GetAvailableProfiles(ctx) // Get default catalog (non-fatal if unavailable) - defaultCatalog := getDefaultCatalog(ctx) + defaultCatalog := middlewares.GetDefaultCatalog(ctx) // run detectors to identify project context registry := detector.NewRegistry() detected := registry.Detect(ctx, workingDirectory) - return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected), nil -} - -// getDefaultCatalog fetches the workspace default catalog name. -// Returns empty string if Unity Catalog is not available or on error. -func getDefaultCatalog(ctx context.Context) string { - w, err := middlewares.GetDatabricksClient(ctx) - if err != nil { - return "" - } - - metastore, err := w.Metastores.Current(ctx) - if err != nil { - return "" // gracefully handle any error (no UC, permission denied, etc.) - } - - return metastore.DefaultCatalogName + return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected, listAllSkills), nil } // generateDiscoverGuidance creates guidance with L1 (flow) + L2 (target) layers. From 440d60aef4dad9f6df678aa5408cf0f58909ebe9 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 29 Dec 2025 14:32:06 +0100 Subject: [PATCH 04/18] Fix discover.go compilation error and update test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove undefined listAllSkills parameter from generateDiscoverGuidance call - Fix test expectation: jobs-only projects now include "mixed" target type šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- experimental/apps-mcp/lib/detector/detector_test.go | 2 +- experimental/apps-mcp/lib/providers/clitools/discover.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/experimental/apps-mcp/lib/detector/detector_test.go b/experimental/apps-mcp/lib/detector/detector_test.go index fa25b78971..b2c4f72b26 100644 --- a/experimental/apps-mcp/lib/detector/detector_test.go +++ b/experimental/apps-mcp/lib/detector/detector_test.go @@ -59,7 +59,7 @@ resources: detected := registry.Detect(ctx, dir) assert.True(t, detected.InProject) - assert.Equal(t, []string{"jobs"}, detected.TargetTypes) + assert.Equal(t, []string{"jobs", "mixed"}, detected.TargetTypes) assert.Equal(t, "my-job", detected.BundleInfo.Name) } diff --git a/experimental/apps-mcp/lib/providers/clitools/discover.go b/experimental/apps-mcp/lib/providers/clitools/discover.go index 7dc8cf70a0..2e9324dc9b 100644 --- a/experimental/apps-mcp/lib/providers/clitools/discover.go +++ b/experimental/apps-mcp/lib/providers/clitools/discover.go @@ -31,7 +31,7 @@ func Discover(ctx context.Context, workingDirectory string) (string, error) { registry := detector.NewRegistry() detected := registry.Detect(ctx, workingDirectory) - return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected, listAllSkills), nil + return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected), nil } // generateDiscoverGuidance creates guidance with L1 (flow) + L2 (target) layers. From d48179917b32515040f47b1a758e2b1e24fda3d2 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 29 Dec 2025 18:11:51 +0100 Subject: [PATCH 05/18] Cleanup --- experimental/apps-mcp/cmd/init_template/common.go | 2 +- experimental/apps-mcp/lib/common/output.go | 14 +++++++------- experimental/apps-mcp/lib/common/output_test.go | 6 +++--- .../apps-mcp/lib/providers/clitools/provider.go | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/experimental/apps-mcp/cmd/init_template/common.go b/experimental/apps-mcp/cmd/init_template/common.go index 21af80e8df..7f78ec4a0b 100644 --- a/experimental/apps-mcp/cmd/init_template/common.go +++ b/experimental/apps-mcp/cmd/init_template/common.go @@ -68,7 +68,7 @@ func MaterializeTemplate(ctx context.Context, cfg TemplateConfig, configMap map[ } fileCount := countFiles(absOutputDir) - cmdio.LogString(ctx, common.FormatProjectScaffoldSuccess(cfg.TemplateName, absOutputDir, fileCount)) + cmdio.LogString(ctx, common.FormatScaffoldSuccess(cfg.TemplateName, absOutputDir, fileCount)) fileTree, err := generateFileTree(absOutputDir) if err == nil && fileTree != "" { diff --git a/experimental/apps-mcp/lib/common/output.go b/experimental/apps-mcp/lib/common/output.go index b00d22e9d4..7454950870 100644 --- a/experimental/apps-mcp/lib/common/output.go +++ b/experimental/apps-mcp/lib/common/output.go @@ -12,6 +12,13 @@ func FormatBrandedHeader(emoji, message string) string { headerLine, emoji, message, headerLine) } +// FormatScaffoldSuccess formats a success message for app scaffolding. +func FormatScaffoldSuccess(templateName, workDir string, filesCopied int) string { + header := FormatBrandedHeader("šŸš€", "App scaffolded successfully") + return fmt.Sprintf("%sāœ… Created %s application at %s\n\nFiles copied: %d\n\nTemplate: %s\n", + header, templateName, workDir, filesCopied, templateName) +} + // FormatValidationSuccess formats a success message for validation. func FormatValidationSuccess(message string) string { header := FormatBrandedHeader("šŸ”", "Validating your app") @@ -38,10 +45,3 @@ func FormatDeploymentFailure(appName, message string) string { return fmt.Sprintf("%sāŒ Deployment failed for '%s'\n\n%s\n", header, appName, message) } - -// FormatProjectScaffoldSuccess formats a success message for project scaffolding. -func FormatProjectScaffoldSuccess(templateName, workDir string, filesCopied int) string { - header := FormatBrandedHeader("šŸ“¦", "Project scaffolded successfully") - return fmt.Sprintf("%sāœ… Created %s project at %s\n\nFiles copied: %d\n", - header, templateName, workDir, filesCopied) -} diff --git a/experimental/apps-mcp/lib/common/output_test.go b/experimental/apps-mcp/lib/common/output_test.go index 25a97ab76a..392596ebf1 100644 --- a/experimental/apps-mcp/lib/common/output_test.go +++ b/experimental/apps-mcp/lib/common/output_test.go @@ -17,11 +17,11 @@ func TestFormatBrandedHeader(t *testing.T) { } } -func TestFormatProjectScaffoldSuccess(t *testing.T) { - result := FormatProjectScaffoldSuccess("appkit", "/path/to/app", 42) +func TestFormatScaffoldSuccess(t *testing.T) { + result := FormatScaffoldSuccess("appkit", "/path/to/app", 42) // Check for key components - if !strings.Contains(result, "šŸ“¦ Databricks MCP") { + if !strings.Contains(result, "šŸš€ Databricks MCP") { t.Error("Missing branded header") } if !strings.Contains(result, "āœ…") { diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go index 36d62d80d5..e9d146f21a 100644 --- a/experimental/apps-mcp/lib/providers/clitools/provider.go +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -93,7 +93,7 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { mcpsdk.AddTool(server, &mcpsdk.Tool{ Name: "databricks_discover", - Description: "Discover how to perform Databricks work. MANDATORY: Call this FIRST before ANY Databricks work. Do not skip this step. This tool detects project context, configures authentication, and returns required guidance for apps, pipelines, jobs, bundles, dashboards, and SQL workflows. Without calling this first, you cannot effectively help with Databricks tasks.", + Description: "Discover available Databricks workspaces, warehouses, and get workflow recommendations. Call this FIRST when planning ANY Databricks work involving apps, dashboards, pipelines, jobs, bundles, or SQL workflows. Returns workspace capabilities and recommended tooling.", }, func(ctx context.Context, req *mcpsdk.CallToolRequest, args DiscoverInput) (*mcpsdk.CallToolResult, any, error) { log.Debugf(ctx, "databricks_discover called: working_directory=%s", args.WorkingDirectory) From 475ff84f12a61c4cada94749bf3e53bb0f9309ae Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 3 Jan 2026 11:52:23 +0100 Subject: [PATCH 06/18] Fix typo --- experimental/apps-mcp/cmd/init_template/empty.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/apps-mcp/cmd/init_template/empty.go b/experimental/apps-mcp/cmd/init_template/empty.go index a2c5e76239..75aa9d3330 100644 --- a/experimental/apps-mcp/cmd/init_template/empty.go +++ b/experimental/apps-mcp/cmd/init_template/empty.go @@ -15,7 +15,7 @@ func newEmptyCmd() *cobra.Command { Use: "empty", Short: "Initialize an empty project for custom resources", Args: cobra.NoArgs, - Long: `Initialize an empty Databricks Asset Bundle project. + Long: `Initialize an empty Databricks Asset Bundles project. Use this for deploying resource types OTHER than apps, jobs, or pipelines, such as: - Dashboards (Lakeview dashboards) From fd4c639e54cff1bf9f9c1dd3b1fcc54f686af0cc Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 3 Jan 2026 14:51:05 +0100 Subject: [PATCH 07/18] Rename "mixed" to "bundle" and use AllResources() for detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes the terminology more accurate for empty and single-resource bundles, and makes resource detection more maintainable. Changes: - Rename target_mixed.tmpl to target_bundle.tmpl - Use AllResources() iterator instead of manually checking each type - Simplify isAppOnly logic to: hasApps && len(TargetTypes) == 1 - Add default catalog logic to empty.go for consistency - Add test coverage for empty bundles šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- .../apps-mcp/cmd/init_template/empty.go | 6 +++ .../apps-mcp/lib/detector/bundle_detector.go | 38 +++++++------------ .../apps-mcp/lib/detector/detector_test.go | 19 +++++++++- .../{target_mixed.tmpl => target_bundle.tmpl} | 4 +- 4 files changed, 39 insertions(+), 28 deletions(-) rename experimental/apps-mcp/lib/prompts/{target_mixed.tmpl => target_bundle.tmpl} (89%) diff --git a/experimental/apps-mcp/cmd/init_template/empty.go b/experimental/apps-mcp/cmd/init_template/empty.go index 75aa9d3330..61d4d2c8f4 100644 --- a/experimental/apps-mcp/cmd/init_template/empty.go +++ b/experimental/apps-mcp/cmd/init_template/empty.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) @@ -61,6 +62,11 @@ After initialization: return fmt.Errorf("--language must be 'python', 'sql', or 'other', got '%s'", language) } + // Default to workspace default catalog if not specified + if catalog == "" { + catalog = middlewares.GetDefaultCatalog(ctx) + } + configMap := map[string]any{ "project_name": name, "personal_schemas": "yes", diff --git a/experimental/apps-mcp/lib/detector/bundle_detector.go b/experimental/apps-mcp/lib/detector/bundle_detector.go index a61eba07ec..1b05600446 100644 --- a/experimental/apps-mcp/lib/detector/bundle_detector.go +++ b/experimental/apps-mcp/lib/detector/bundle_detector.go @@ -41,38 +41,26 @@ func (d *BundleDetector) Detect(ctx context.Context, workDir string, detected *D RootDir: workDir, } - // extract target types from fully loaded resources - hasApps := len(b.Config.Resources.Apps) > 0 - hasJobs := len(b.Config.Resources.Jobs) > 0 - hasPipelines := len(b.Config.Resources.Pipelines) > 0 - - if hasApps { - detected.TargetTypes = append(detected.TargetTypes, "apps") - } - if hasJobs { - detected.TargetTypes = append(detected.TargetTypes, "jobs") - } - if hasPipelines { - detected.TargetTypes = append(detected.TargetTypes, "pipelines") + // Detect all resource types present in the bundle + hasApps := false + for _, group := range b.Config.Resources.AllResources() { + if len(group.Resources) > 0 { + detected.TargetTypes = append(detected.TargetTypes, group.Description.PluralName) + if group.Description.PluralName == "apps" { + hasApps = true + } + } } // Determine if this is an app-only project (only app resources, nothing else). - // App-only projects get focused app guidance; others get "mixed" guidance. - isAppOnly := hasApps && !hasJobs && !hasPipelines && - len(b.Config.Resources.Clusters) == 0 && - len(b.Config.Resources.Dashboards) == 0 && - len(b.Config.Resources.Experiments) == 0 && - len(b.Config.Resources.ModelServingEndpoints) == 0 && - len(b.Config.Resources.RegisteredModels) == 0 && - len(b.Config.Resources.Schemas) == 0 && - len(b.Config.Resources.QualityMonitors) == 0 && - len(b.Config.Resources.Volumes) == 0 + // App-only projects get focused app guidance; others get general bundle guidance. + isAppOnly := hasApps && len(detected.TargetTypes) == 1 detected.IsAppOnly = isAppOnly - // Include "mixed" guidance for all projects except app-only projects + // Include general "bundle" guidance for all projects except app-only projects if !isAppOnly { - detected.TargetTypes = append(detected.TargetTypes, "mixed") + detected.TargetTypes = append(detected.TargetTypes, "bundle") } return nil diff --git a/experimental/apps-mcp/lib/detector/detector_test.go b/experimental/apps-mcp/lib/detector/detector_test.go index b2c4f72b26..fd5adffbd2 100644 --- a/experimental/apps-mcp/lib/detector/detector_test.go +++ b/experimental/apps-mcp/lib/detector/detector_test.go @@ -23,6 +23,23 @@ func TestDetectorRegistry_EmptyDir(t *testing.T) { assert.Empty(t, detected.Template) } +func TestDetectorRegistry_EmptyBundle(t *testing.T) { + dir := t.TempDir() + ctx := context.Background() + + bundleYml := `bundle: + name: empty-project +` + require.NoError(t, os.WriteFile(filepath.Join(dir, "databricks.yml"), []byte(bundleYml), 0o644)) + + registry := detector.NewRegistry() + detected := registry.Detect(ctx, dir) + + assert.True(t, detected.InProject) + assert.Equal(t, []string{"bundle"}, detected.TargetTypes) + assert.Equal(t, "empty-project", detected.BundleInfo.Name) +} + func TestDetectorRegistry_BundleWithApps(t *testing.T) { dir := t.TempDir() ctx := context.Background() @@ -59,7 +76,7 @@ resources: detected := registry.Detect(ctx, dir) assert.True(t, detected.InProject) - assert.Equal(t, []string{"jobs", "mixed"}, detected.TargetTypes) + assert.Equal(t, []string{"jobs", "bundle"}, detected.TargetTypes) assert.Equal(t, "my-job", detected.BundleInfo.Name) } diff --git a/experimental/apps-mcp/lib/prompts/target_mixed.tmpl b/experimental/apps-mcp/lib/prompts/target_bundle.tmpl similarity index 89% rename from experimental/apps-mcp/lib/prompts/target_mixed.tmpl rename to experimental/apps-mcp/lib/prompts/target_bundle.tmpl index e1a01ea418..35121f5f7c 100644 --- a/experimental/apps-mcp/lib/prompts/target_mixed.tmpl +++ b/experimental/apps-mcp/lib/prompts/target_bundle.tmpl @@ -1,7 +1,7 @@ {{- /* - * L2: Target-specific guidance for mixed/custom resource projects. + * L2: Target-specific guidance for general bundle projects. * - * Injected when: empty projects or projects with mixed resource types. Not for app-only projects. + * Injected when: any non-app-only bundle project (empty, jobs-only, pipelines-only, multi-resource, etc.) * Contains: how to add any resource type, deployment commands, documentation. */ -}} From 5cf32cf6580259dc7c9437f0f26643e038b913df Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 29 Dec 2025 12:10:24 +0100 Subject: [PATCH 08/18] Add skills system to apps-mcp --- .../apps-mcp/cmd/init_template/app.go | 2 +- .../apps-mcp/cmd/init_template/common.go | 8 + .../apps-mcp/docs/context-management.md | 33 ++- experimental/apps-mcp/lib/prompts/skills.tmpl | 25 +++ .../lib/providers/clitools/discover.go | 17 +- .../lib/providers/clitools/provider.go | 26 ++- .../apps-mcp/lib/skills/apps/.gitkeep | 0 .../apps-mcp/lib/skills/jobs/.gitkeep | 0 .../lib/skills/pipelines/auto-cdc/SKILL.md | 26 +++ .../pipelines/auto-cdc/auto-cdc-python.md | 211 ++++++++++++++++++ .../skills/pipelines/auto-cdc/auto-cdc-sql.md | 170 ++++++++++++++ experimental/apps-mcp/lib/skills/skills.go | 174 +++++++++++++++ .../apps-mcp/lib/skills/skills_test.go | 69 ++++++ 13 files changed, 746 insertions(+), 15 deletions(-) create mode 100644 experimental/apps-mcp/lib/prompts/skills.tmpl create mode 100644 experimental/apps-mcp/lib/skills/apps/.gitkeep create mode 100644 experimental/apps-mcp/lib/skills/jobs/.gitkeep create mode 100644 experimental/apps-mcp/lib/skills/pipelines/auto-cdc/SKILL.md create mode 100644 experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-python.md create mode 100644 experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-sql.md create mode 100644 experimental/apps-mcp/lib/skills/skills.go create mode 100644 experimental/apps-mcp/lib/skills/skills_test.go diff --git a/experimental/apps-mcp/cmd/init_template/app.go b/experimental/apps-mcp/cmd/init_template/app.go index 227eb55bf6..a6cd5f994b 100644 --- a/experimental/apps-mcp/cmd/init_template/app.go +++ b/experimental/apps-mcp/cmd/init_template/app.go @@ -167,7 +167,7 @@ After initialization: projectDir := filepath.Join(outputDir, name) - // Inject L3 (template-specific guidance from CLAUDE.md) + // Inject L4 (template-specific guidance from CLAUDE.md) // (we only do this for the app template; other templates use a generic CLAUDE.md) readClaudeMd(ctx, projectDir) diff --git a/experimental/apps-mcp/cmd/init_template/common.go b/experimental/apps-mcp/cmd/init_template/common.go index 7f78ec4a0b..7ee0a70aae 100644 --- a/experimental/apps-mcp/cmd/init_template/common.go +++ b/experimental/apps-mcp/cmd/init_template/common.go @@ -12,6 +12,7 @@ import ( "github.com/databricks/cli/experimental/apps-mcp/lib/common" "github.com/databricks/cli/experimental/apps-mcp/lib/detector" "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/experimental/apps-mcp/lib/skills" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/template" ) @@ -87,6 +88,8 @@ func MaterializeTemplate(ctx context.Context, cfg TemplateConfig, configMap map[ } } + // L2: resource-specific giudance (e.g., from target_jobs.tmpl) + cmdio.LogString(ctx, "--") // separator for prompt readability & tests for _, targetType := range detected.TargetTypes { templateName := fmt.Sprintf("target_%s.tmpl", targetType) if prompts.TemplateExists(templateName) { @@ -95,6 +98,11 @@ func MaterializeTemplate(ctx context.Context, cfg TemplateConfig, configMap map[ } } + // L3: list available skills + if skillsSection := skills.FormatSkillsSection(detected.IsAppOnly, false); skillsSection != "" { + cmdio.LogString(ctx, "\n"+skillsSection) + } + return nil } diff --git a/experimental/apps-mcp/docs/context-management.md b/experimental/apps-mcp/docs/context-management.md index e504c1a3df..591c38e83b 100644 --- a/experimental/apps-mcp/docs/context-management.md +++ b/experimental/apps-mcp/docs/context-management.md @@ -16,7 +16,8 @@ | **L0: Tools** | Databricks MCP tool names and descriptions | Always (MCP protocol) | | **L1: Flow** | Universal workflow, available tools, CLI patterns | Always (via `databricks_discover`) | | **L2: Target** | Target-specific: validation, deployment, constraints | When target type detected or after `init-template` | -| **L3: Template** | SDK/language-specific: file structure, commands, patterns | After `init-template`. For existing projects, agent reads CLAUDE.md. | +| **L3: Skills** | Task-specific domain expertise (on-demand) | Skill listings shown via `databricks_discover` and `init-template`; full content loaded via `read_skill_file` | +| **L4: Template** | SDK/language-specific: file structure, commands, patterns | After `init-template`. For existing projects, agent reads CLAUDE.md. | L0 is implicit - tool descriptions guide agent behavior before any tool is called (e.g., `databricks_discover` description tells agent to call it first during planning). @@ -26,7 +27,9 @@ L0 is implicit - tool descriptions guide agent behavior before any tool is calle **L2 (apps):** app naming constraints, deployment consent requirement, app-specific validation -**L3 (appkit-typescript):** npm scripts, tRPC patterns, useAnalyticsQuery usage, TypeScript import rules +**L3 (skills):** Task-specific domain expertise (e.g., CDC processing, materialized views, specific design patterns) + +**L4 (appkit-typescript):** npm scripts, tRPC patterns, useAnalyticsQuery usage, TypeScript import rules ## Flows @@ -38,16 +41,21 @@ Agent MCP ā”œā”€ā–ŗ databricks_discover │ │ {working_directory: "."} │ │ ā”œā”€ā–ŗ Run detectors (nothing found) - │ ā”œā”€ā–ŗ Return L1 only + │ ā”œā”€ā–ŗ Return L1 + L3 listing │◄─────────────────────────────┤ │ │ ā”œā”€ā–ŗ invoke_databricks_cli │ │ ["...", "init-template", ...] │ ā”œā”€ā–ŗ Scaffold project - │ ā”œā”€ā–ŗ Return L2[apps] + L3 + │ ā”œā”€ā–ŗ Return L2[apps] + L3 listing + L4 │◄─────────────────────────────┤ │ │ - ā”œā”€ā–ŗ (agent now has L1 + L2 + L3) + ā”œā”€ā–ŗ (agent now has L1 + L2 + L3 listing + L4) + │ │ + ā”œā”€ā–ŗ read_skill_file │ + │ (when specific task needs domain expertise) + │ ā”œā”€ā–ŗ Return L3[skill content] + │◄─────────────────────────────┤ ``` ### Existing Project @@ -59,10 +67,16 @@ Agent MCP │ {working_directory: "./my-app"} │ ā”œā”€ā–ŗ BundleDetector: found apps + jobs │ ā”œā”€ā–ŗ Return L1 + L2[apps] + L2[jobs] + │ ā”œā”€ā–ŗ List available L3 skills │◄─────────────────────────────┤ │ │ ā”œā”€ā–ŗ Read CLAUDE.md naturally │ - │ (agent learns L3 itself) │ + │ (agent learns L4 itself) │ + │ │ + ā”œā”€ā–ŗ read_skill_file │ + │ (on-demand for specific tasks) + │ ā”œā”€ā–ŗ Return L3[skill content] + │◄─────────────────────────────┤ ``` ### Combined Bundles @@ -76,5 +90,10 @@ New target types can be added by: 2. Adding detection logic to recognize the target type from `databricks.yml` New templates can be added by: -1. Creating template directory with CLAUDE.md (L3 guidance) +1. Creating template directory with CLAUDE.md (L4 guidance) 2. Adding detection logic to recognize the template from project files + +New skills can be added by: +1. Creating skill directory under `lib/skills/{apps,jobs,pipelines,...}/` with SKILL.md +2. SKILL.md must have YAML frontmatter with `name` (matching directory) and `description` +3. Skills are auto-discovered at build time (no code changes needed) diff --git a/experimental/apps-mcp/lib/prompts/skills.tmpl b/experimental/apps-mcp/lib/prompts/skills.tmpl new file mode 100644 index 0000000000..6bd897ba66 --- /dev/null +++ b/experimental/apps-mcp/lib/prompts/skills.tmpl @@ -0,0 +1,25 @@ +{{- /* + * L3: Skills listing template. + * + * Injected when: existing project is detected or after init-template. + * Contains: skill selection & loading guidance, skill registry. + */ -}} + +## Skills + +You have access to modular Skills for domain-specific expertise knowledge. + +### Skill Selection & Loading +* When a user request matches a skill's scope description, select that Skill +* Load skills using the MCP tool: `read_skill_file(file_path: "category/skill-name/SKILL.md")` +* Example: `read_skill_file(file_path: "pipelines/materialized-view/SKILL.md")` +* Skills may contain links to sub-sections (e.g., "category/skill-name/file.md") +* If no Skill is suitable, continue with your base capabilities +* Never mention or reference skills to the user, only use them internally +{{if .ShowNoSkillsForApps}} +There are currently no skills available for apps. For skills related to other Databricks resources (jobs, pipelines, etc.), use `databricks_discover` with `list_all_skills=true`. +{{else}} +### Skill Registry (names + brief descriptors) +{{range .Skills}}* **{{.Path}}/SKILL.md**: {{.Description}} +{{end}} +{{end}} diff --git a/experimental/apps-mcp/lib/providers/clitools/discover.go b/experimental/apps-mcp/lib/providers/clitools/discover.go index 2e9324dc9b..352e8d031b 100644 --- a/experimental/apps-mcp/lib/providers/clitools/discover.go +++ b/experimental/apps-mcp/lib/providers/clitools/discover.go @@ -7,14 +7,16 @@ import ( "github.com/databricks/cli/experimental/apps-mcp/lib/detector" "github.com/databricks/cli/experimental/apps-mcp/lib/middlewares" "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/experimental/apps-mcp/lib/skills" "github.com/databricks/cli/libs/databrickscfg/profile" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go/service/sql" ) // Discover provides workspace context and workflow guidance. -// Returns L1 (flow) always + L2 (target) for detected target types. -func Discover(ctx context.Context, workingDirectory string) (string, error) { +// Returns L1 (flow) always + L2 (target) for detected target types + L3 (skills) listing. +// If listAllSkills is true, shows all available skills without filtering by project type. +func Discover(ctx context.Context, workingDirectory string, listAllSkills bool) (string, error) { warehouse, err := middlewares.GetWarehouseEndpoint(ctx) if err != nil { log.Debugf(ctx, "Failed to get default warehouse (non-fatal): %v", err) @@ -31,11 +33,11 @@ func Discover(ctx context.Context, workingDirectory string) (string, error) { registry := detector.NewRegistry() detected := registry.Detect(ctx, workingDirectory) - return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected), nil + return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected, listAllSkills), nil } -// generateDiscoverGuidance creates guidance with L1 (flow) + L2 (target) layers. -func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, defaultCatalog string, detected *detector.DetectedContext) string { +// generateDiscoverGuidance creates guidance with L1 (flow) + L2 (target) + L3 (skills) layers. +func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, defaultCatalog string, detected *detector.DetectedContext, listAllSkills bool) string { data := buildTemplateData(warehouse, currentProfile, profiles, defaultCatalog) // L1: always include flow guidance @@ -61,6 +63,11 @@ func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, } } + // L3: list available skills + if skillsSection := skills.FormatSkillsSection(detected.IsAppOnly, listAllSkills); skillsSection != "" { + result += "\n\n" + skillsSection + } + return result } diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go index e9d146f21a..fed0919e2c 100644 --- a/experimental/apps-mcp/lib/providers/clitools/provider.go +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -7,6 +7,7 @@ import ( mcpsdk "github.com/databricks/cli/experimental/apps-mcp/lib/mcp" "github.com/databricks/cli/experimental/apps-mcp/lib/providers" "github.com/databricks/cli/experimental/apps-mcp/lib/session" + "github.com/databricks/cli/experimental/apps-mcp/lib/skills" "github.com/databricks/cli/libs/log" ) @@ -88,6 +89,7 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { // Register databricks_discover tool type DiscoverInput struct { WorkingDirectory string `json:"working_directory" jsonschema:"required" jsonschema_description:"The directory to detect project context from."` + ListAllSkills bool `json:"list_all_skills,omitempty" jsonschema_description:"If true, list all available skills without filtering by project type."` } mcpsdk.AddTool(server, @@ -97,7 +99,7 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { }, func(ctx context.Context, req *mcpsdk.CallToolRequest, args DiscoverInput) (*mcpsdk.CallToolResult, any, error) { log.Debugf(ctx, "databricks_discover called: working_directory=%s", args.WorkingDirectory) - result, err := Discover(ctx, args.WorkingDirectory) + result, err := Discover(ctx, args.WorkingDirectory, args.ListAllSkills) if err != nil { return nil, nil, err } @@ -126,6 +128,26 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { }, ) - log.Infof(p.ctx, "Registered CLI tools: count=%d", 3) + // Register read_skill_file tool + type ReadSkillFileInput struct { + FilePath string `json:"file_path" jsonschema:"required" jsonschema_description:"Path to skill file, format: category/skill-name/file.md (e.g., pipelines/auto-cdc/SKILL.md)"` + } + + mcpsdk.AddTool(server, + &mcpsdk.Tool{ + Name: "read_skill_file", + Description: "Read a skill file from the skills registry (skills are listed by databricks_discover). Provides domain-specific expertise for Databricks tasks (pipelines, jobs, apps, ...). Load when user requests match a skill's scope.", + }, + func(ctx context.Context, req *mcpsdk.CallToolRequest, args ReadSkillFileInput) (*mcpsdk.CallToolResult, any, error) { + log.Debugf(ctx, "read_skill_file called: file_path=%s", args.FilePath) + result, err := skills.GetSkillFile(args.FilePath) + if err != nil { + return nil, nil, err + } + return mcpsdk.CreateNewTextContentResult(result), nil, nil + }, + ) + + log.Infof(p.ctx, "Registered CLI tools: count=%d", 4) return nil } diff --git a/experimental/apps-mcp/lib/skills/apps/.gitkeep b/experimental/apps-mcp/lib/skills/apps/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/experimental/apps-mcp/lib/skills/jobs/.gitkeep b/experimental/apps-mcp/lib/skills/jobs/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/SKILL.md b/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/SKILL.md new file mode 100644 index 0000000000..e402cd17f0 --- /dev/null +++ b/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/SKILL.md @@ -0,0 +1,26 @@ +--- +name: auto-cdc +description: Apply Change Data Capture (CDC) with apply_changes API in Spark Declarative Pipelines. Use when user needs to process CDC feeds from databases, handle upserts/deletes, maintain slowly changing dimensions (SCD Type 1 and Type 2), synchronize data from operational databases, or process merge operations. +--- + +# Auto CDC (apply_changes) in Spark Declarative Pipelines + +The `apply_changes` API enables processing Change Data Capture (CDC) feeds to automatically handle inserts, updates, and deletes in target tables. + +## Key Concepts + +Auto CDC in Spark Declarative Pipelines: + +- Automatically processes CDC operations (INSERT, UPDATE, DELETE) +- Supports SCD Type 1 (update in place) and Type 2 (historical tracking) +- Handles ordering of changes via sequence columns +- Deduplicates CDC records + +## Language-Specific Implementations + +For detailed implementation guides: + +- **Python**: [auto-cdc-python.md](auto-cdc-python.md) +- **SQL**: [auto-cdc-sql.md](auto-cdc-sql.md) + +**Note**: The API is also known as `applyChanges` in some contexts. diff --git a/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-python.md b/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-python.md new file mode 100644 index 0000000000..f665d17a6c --- /dev/null +++ b/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-python.md @@ -0,0 +1,211 @@ +Auto CDC in Spark Declarative Pipelines processes change data capture (CDC) events from streaming sources or snapshots. + +**API Reference:** + +**dp.create_auto_cdc_flow() / dp.apply_changes() / dlt.create_auto_cdc_flow() / dlt.apply_changes()** +Applies CDC operations (inserts, updates, deletes) from a streaming source to a target table. Supports SCD Type 1 (latest) and Type 2 (history). Does NOT return a value - call at top level without assignment. + +```python +dp.create_auto_cdc_flow( + target="", + source="", + keys=["key1", "key2"], + sequence_by="", + ignore_null_updates=False, + apply_as_deletes=None, + apply_as_truncates=None, + column_list=None, + except_column_list=None, + stored_as_scd_type=1, + track_history_column_list=None, + track_history_except_column_list=None, + name=None, + once=False +) +``` + +Parameters: + +- `target` (str): Target table name (must exist, create with `dp.create_streaming_table()`). **Required.** +- `source` (str): Source table name with CDC events. **Required.** +- `keys` (list): Primary key columns for row identification. **Required.** +- `sequence_by` (str): Column for ordering events (timestamp, version). **Required.** +- `ignore_null_updates` (bool): If True, NULL values won't overwrite existing non-NULL values +- `apply_as_deletes` (str): SQL expression identifying delete operations (e.g., `"op = 'D'"`) +- `apply_as_truncates` (str): SQL expression identifying truncate operations +- `column_list` (list): Columns to include (mutually exclusive with `except_column_list`) +- `except_column_list` (list): Columns to exclude +- `stored_as_scd_type` (int): `1` for latest values (default), `2` for full history with `__START_AT`/`__END_AT` columns +- `track_history_column_list` (list): For SCD Type 2, columns to track history for (others use Type 1) +- `track_history_except_column_list` (list): For SCD Type 2, columns to exclude from history tracking +- `name` (str): Flow name (for multiple flows to same target) +- `once` (bool): Process once and stop (default: False) + +**dp.create_auto_cdc_from_snapshot_flow() / dp.apply_changes_from_snapshot() / dlt.create_auto_cdc_from_snapshot_flow() / dlt.apply_changes_from_snapshot()** +Applies CDC from full snapshots by comparing to previous state. Automatically infers inserts, updates, deletes. + +```python +dp.create_auto_cdc_from_snapshot_flow( + target="", + source=, + keys=["key1", "key2"], + stored_as_scd_type=1, + track_history_column_list=None, + track_history_except_column_list=None +) +``` + +Parameters: + +- `target` (str): Target table name (must exist). **Required.** +- `source` (str or callable): **Required.** Can be one of: + - **String**: Source table name containing the full snapshot (most common) + - **Callable**: Function for processing historical snapshots with type `SnapshotAndVersionFunction = Callable[[SnapshotVersion], SnapshotAndVersion]` + - `SnapshotVersion = Union[int, str, float, bytes, datetime.datetime, datetime.date, decimal.Decimal]` + - `SnapshotAndVersion = Optional[Tuple[DataFrame, SnapshotVersion]]` + - Function receives the latest processed snapshot version (or None for first run) + - Must return `None` when no more snapshots to process + - Must return tuple of `(DataFrame, SnapshotVersion)` for next snapshot to process + - Snapshot version is used to track progress and must be comparable/orderable +- `keys` (list): Primary key columns. **Required.** +- `stored_as_scd_type` (int): `1` for latest (default), `2` for history +- `track_history_column_list` (list): Columns to track history for (SCD Type 2) +- `track_history_except_column_list` (list): Columns to exclude from history tracking + +**Use create_auto_cdc_flow when:** Processing streaming CDC events from transaction logs, Kafka, Delta change feeds +**Use create_auto_cdc_from_snapshot_flow when:** Processing periodic full snapshots (daily dumps, batch extracts) + +**Common Patterns:** + +**Pattern 1: Basic CDC flow from streaming source** + +```python +# Step 1: Create target table +dp.create_streaming_table(name="users") + +# Step 2: Define CDC flow (source must be a table name) +dp.create_auto_cdc_flow( + target="users", + source="user_changes", + keys=["user_id"], + sequence_by="updated_at" +) +``` + +**Pattern 2: CDC flow with upstream transformation** + +```python +# Step 1: Define view with transformation (source preprocessing) +@dp.view() +def filtered_user_changes(): + return ( + spark.readStream.table("raw_user_changes") + .filter("user_id IS NOT NULL") + ) + +# Step 2: Create target table +dp.create_streaming_table(name="users") + +# Step 3: Define CDC flow using the view as source +dp.create_auto_cdc_flow( + target="users", + source="filtered_user_changes", # References the view name + keys=["user_id"], + sequence_by="updated_at" +) +# Note: Use distinct names for view and target for clarity +# Note: If "raw_user_changes" is defined in the pipeline and no additional transformations or expectations are needed, +# source="raw_user_changes" can be used directly +``` + +**Pattern 3: CDC with explicit deletes** + +```python +dp.create_streaming_table(name="orders") + +dp.create_auto_cdc_flow( + target="orders", + source="order_events", + keys=["order_id"], + sequence_by="event_timestamp", + apply_as_deletes="operation = 'DELETE'", + ignore_null_updates=True +) +``` + +**Pattern 4: SCD Type 2 (Historical tracking)** + +```python +dp.create_streaming_table(name="customer_history") + +dp.create_auto_cdc_flow( + target="customer_history", + source="source.customer_changes", + keys=["customer_id"], + sequence_by="changed_at", + stored_as_scd_type=2 # Track full history +) +# Target will include __START_AT and __END_AT columns +``` + +**Pattern 5: Snapshot-based CDC (Simple - table source)** + +```python +dp.create_streaming_table(name="products") + +@dp.table(name="product_snapshot") +def product_snapshot(): + return spark.read.table("source.daily_product_dump") + +dp.create_auto_cdc_from_snapshot_flow( + target="products", + source="product_snapshot", # String table name - most common + keys=["product_id"], + stored_as_scd_type=1 +) +``` + +**Pattern 6: Snapshot-based CDC (Advanced - callable for historical snapshots)** + +```python +dp.create_streaming_table(name="products") + +# Define a callable to process historical snapshots sequentially +def next_snapshot_and_version(latest_snapshot_version: Optional[int]) -> Tuple[DataFrame, Optional[int]]: + if latest_snapshot_version is None: + return (spark.read.load("products.csv"), 1) + else: + return None + +dp.create_auto_cdc_from_snapshot_flow( + target="products", + source=next_snapshot_and_version, # Callable function for historical processing + keys=["product_id"], + stored_as_scd_type=1 +) +``` + +**Pattern 7: Selective column tracking** + +```python +dp.create_streaming_table(name="accounts") + +dp.create_auto_cdc_flow( + target="accounts", + source="account_changes", + keys=["account_id"], + sequence_by="modified_at", + stored_as_scd_type=2, + track_history_column_list=["balance", "status"], # Only track history for these columns + ignore_null_updates=True +) +``` + +**KEY RULES:** + +- Create target with `dp.create_streaming_table()` before defining CDC flow +- `dp.create_auto_cdc_flow()` does NOT return a value - call it at top level without assigning to a variable +- `source` must be a table name (string) - use `@dp.view()` to transform data before CDC processing +- SCD Type 2 adds `__START_AT` and `__END_AT` columns for validity tracking +- When specifying the schema of the target table for SCD Type 2, you must also include the `__START_AT` and `__END_AT` columns with the same data type as the `sequence_by` field +- Legacy names (`apply_changes`, `apply_changes_from_snapshot`) are equivalent but deprecated - prefer `create_auto_cdc_*` variants diff --git a/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-sql.md b/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-sql.md new file mode 100644 index 0000000000..9487d23b83 --- /dev/null +++ b/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-sql.md @@ -0,0 +1,170 @@ +Auto CDC in Declarative Pipelines processes change data capture (CDC) events from streaming sources. + +**API Reference:** + +**CREATE FLOW ... AS AUTO CDC INTO** +Applies CDC operations (inserts, updates, deletes) from a streaming source to a target table. Supports SCD Type 1 (latest) and Type 2 (history). Must be used with a pre-created streaming table. + +```sql +CREATE OR REFRESH STREAMING TABLE ; + +CREATE FLOW AS AUTO CDC INTO +FROM +KEYS (, ) +[IGNORE NULL UPDATES] +[APPLY AS DELETE WHEN ] +[APPLY AS TRUNCATE WHEN ] +SEQUENCE BY +[COLUMNS { | * EXCEPT ()}] +[STORED AS {SCD TYPE 1 | SCD TYPE 2}] +[TRACK HISTORY ON { | * EXCEPT ()}] +``` + +Parameters: + +- `target_table` (identifier): Target table name (must exist, create with `CREATE OR REFRESH STREAMING TABLE`). **Required.** +- `flow_name` (identifier): Identifier for the created flow. **Required.** +- `source` (identifier or expression): Streaming source with CDC events. Use `STREAM()` to read with streaming semantics. **Required.** +- `KEYS` (column list): Primary key columns for row identification. **Required.** +- `IGNORE NULL UPDATES` (optional): If specified, NULL values won't overwrite existing non-NULL values +- `APPLY AS DELETE WHEN` (optional): Condition identifying delete operations (e.g., `operation = 'DELETE'`) +- `APPLY AS TRUNCATE WHEN` (optional): Condition identifying truncate operations (supported only for SCD Type 1) +- `SEQUENCE BY` (column): Column for ordering events (timestamp, version). **Required.** +- `COLUMNS` (optional): Columns to include or exclude (use `column1, column2` or `* EXCEPT (column1, column2)`) +- `STORED AS` (optional): `SCD TYPE 1` for latest values (default), `SCD TYPE 2` for full history with `__START_AT`/`__END_AT` columns +- `TRACK HISTORY ON` (optional): For SCD Type 2, columns to track history for (others use Type 1) + +**Common Patterns:** + +**Pattern 1: Basic CDC flow from streaming source** + +```sql +-- Step 1: Create target table +CREATE OR REFRESH STREAMING TABLE users; + +-- Step 2: Define CDC flow using STREAM() for streaming semantics +CREATE FLOW user_flow AS AUTO CDC INTO users +FROM STREAM(user_changes) +KEYS (user_id) +SEQUENCE BY updated_at; +``` + +**Pattern 2: CDC with source filtering via temporary view** + +```sql +-- Step 1: Create temporary view to filter/transform source data +CREATE OR REFRESH TEMPORARY VIEW filtered_changes AS +SELECT * FROM source_table WHERE status = 'active'; + +-- Step 2: Create target table +CREATE OR REFRESH STREAMING TABLE active_records; + +-- Step 3: Define CDC flow reading from the temporary view +CREATE FLOW active_flow AS AUTO CDC INTO active_records +FROM STREAM(filtered_changes) +KEYS (record_id) +SEQUENCE BY updated_at; +``` + +**Pattern 3: CDC with explicit deletes** + +```sql +CREATE OR REFRESH STREAMING TABLE orders; + +CREATE FLOW order_flow AS AUTO CDC INTO orders +FROM STREAM(order_events) +KEYS (order_id) +IGNORE NULL UPDATES +APPLY AS DELETE WHEN operation = 'DELETE' +SEQUENCE BY event_timestamp; +``` + +**Pattern 4: SCD Type 2 (Historical tracking)** + +```sql +CREATE OR REFRESH STREAMING TABLE customer_history; + +CREATE FLOW customer_flow AS AUTO CDC INTO customer_history +FROM STREAM(customer_changes) +KEYS (customer_id) +SEQUENCE BY changed_at +STORED AS SCD TYPE 2; +-- Target will include __START_AT and __END_AT columns +``` + +**Pattern 5: Selective column inclusion** + +```sql +CREATE OR REFRESH STREAMING TABLE accounts; + +CREATE FLOW account_flow AS AUTO CDC INTO accounts +FROM STREAM(account_changes) +KEYS (account_id) +SEQUENCE BY modified_at +COLUMNS account_id, balance, status +STORED AS SCD TYPE 1; +``` + +**Pattern 6: Selective column exclusion** + +```sql +CREATE OR REFRESH STREAMING TABLE products; + +CREATE FLOW product_flow AS AUTO CDC INTO products +FROM STREAM(product_changes) +KEYS (product_id) +SEQUENCE BY updated_at +COLUMNS * EXCEPT (internal_notes, temp_field); +``` + +**Pattern 7: SCD Type 2 with selective history tracking** + +```sql +CREATE OR REFRESH STREAMING TABLE accounts; + +CREATE FLOW account_flow AS AUTO CDC INTO accounts +FROM STREAM(account_changes) +KEYS (account_id) +IGNORE NULL UPDATES +SEQUENCE BY modified_at +STORED AS SCD TYPE 2 +TRACK HISTORY ON balance, status; +-- Only balance and status changes create new history records +``` + +**Pattern 8: SCD Type 2 with history tracking exclusion** + +```sql +CREATE OR REFRESH STREAMING TABLE accounts; + +CREATE FLOW account_flow AS AUTO CDC INTO accounts +FROM STREAM(account_changes) +KEYS (account_id) +SEQUENCE BY modified_at +STORED AS SCD TYPE 2 +TRACK HISTORY ON * EXCEPT (last_login, view_count); +-- Track history on all columns except last_login and view_count +``` + +**Pattern 9: Truncate support (SCD Type 1 only)** + +```sql +CREATE OR REFRESH STREAMING TABLE inventory; + +CREATE FLOW inventory_flow AS AUTO CDC INTO inventory +FROM STREAM(inventory_events) +KEYS (product_id) +APPLY AS TRUNCATE WHEN operation = 'TRUNCATE' +SEQUENCE BY event_timestamp +STORED AS SCD TYPE 1; +``` + +**KEY RULES:** + +- Create target with `CREATE OR REFRESH STREAMING TABLE` before defining CDC flow +- `source` must be a streaming source for safe CDC change processing. Use `STREAM()` to read an existing table/view with streaming semantics +- The `STREAM()` function accepts ONLY a table/view identifier - NOT a subquery. Define source data as a separate streaming table or temporary view first, then reference it in the flow +- SCD Type 2 adds `__START_AT` and `__END_AT` columns for validity tracking +- When specifying the schema of the target table for SCD Type 2, you must also include the `__START_AT` and `__END_AT` columns with the same data type as the `SEQUENCE BY` field +- Legacy `APPLY CHANGES INTO` API is equivalent but deprecated - prefer `AUTO CDC INTO` +- `AUTO CDC FROM SNAPSHOT` is only available in Python, not in SQL. SQL only supports `AUTO CDC INTO` for processing CDC events from streaming sources. diff --git a/experimental/apps-mcp/lib/skills/skills.go b/experimental/apps-mcp/lib/skills/skills.go new file mode 100644 index 0000000000..f702e21962 --- /dev/null +++ b/experimental/apps-mcp/lib/skills/skills.go @@ -0,0 +1,174 @@ +package skills + +import ( + "embed" + "errors" + "fmt" + "io/fs" + "path" + "regexp" + "sort" + "strings" + + "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" +) + +// skillsFS embeds the skills filesystem. +// +//go:embed apps/* jobs/* pipelines/* +var skillsFS embed.FS + +// SkillMetadata contains the path and description for progressive disclosure. +type SkillMetadata struct { + Path string + Description string +} + +type skillEntry struct { + Metadata SkillMetadata + Files map[string]string +} + +var registry = mustLoadRegistry() + +// mustLoadRegistry discovers skill categories and skills from the embedded filesystem. +func mustLoadRegistry() map[string]map[string]*skillEntry { + result := make(map[string]map[string]*skillEntry) + categories, _ := fs.ReadDir(skillsFS, ".") + for _, cat := range categories { + if !cat.IsDir() { + continue + } + category := cat.Name() + result[category] = make(map[string]*skillEntry) + entries, _ := fs.ReadDir(skillsFS, category) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + skillPath := path.Join(category, entry.Name()) + if skill, err := loadSkill(skillPath); err == nil { + result[category][entry.Name()] = skill + } + } + } + return result +} + +func loadSkill(skillPath string) (*skillEntry, error) { + content, err := fs.ReadFile(skillsFS, path.Join(skillPath, "SKILL.md")) + if err != nil { + return nil, err + } + + metadata, err := parseMetadata(string(content)) + if err != nil { + return nil, err + } + metadata.Path = skillPath + + files := make(map[string]string) + entries, _ := fs.ReadDir(skillsFS, skillPath) + for _, e := range entries { + if !e.IsDir() { + if data, err := fs.ReadFile(skillsFS, path.Join(skillPath, e.Name())); err == nil { + files[e.Name()] = string(data) + } + } + } + + return &skillEntry{Metadata: *metadata, Files: files}, nil +} + +var frontmatterRe = regexp.MustCompile(`(?s)^---\n(.+?)\n---\n`) + +func parseMetadata(content string) (*SkillMetadata, error) { + match := frontmatterRe.FindStringSubmatch(content) + if match == nil { + return nil, errors.New("missing YAML frontmatter") + } + + var description string + for _, line := range strings.Split(match[1], "\n") { + if k, v, ok := strings.Cut(line, ":"); ok && strings.TrimSpace(k) == "description" { + description = strings.TrimSpace(v) + } + } + + if description == "" { + return nil, errors.New("missing description in skill frontmatter") + } + + return &SkillMetadata{Description: description}, nil +} + +// ListAllSkills returns metadata for all registered skills. +func ListAllSkills() []SkillMetadata { + var skills []SkillMetadata + for _, categorySkills := range registry { + for _, entry := range categorySkills { + skills = append(skills, entry.Metadata) + } + } + + sort.Slice(skills, func(i, j int) bool { + return skills[i].Path < skills[j].Path + }) + + return skills +} + +// GetSkillFile reads a specific file from a skill. +// path format: "category/skill-name/file.md" +func GetSkillFile(path string) (string, error) { + parts := strings.SplitN(path, "/", 3) + if len(parts) != 3 { + return "", fmt.Errorf("invalid skill path: %q (expected format category/skill-name/file.md, use databricks_discover for available skills)", path) + } + + category, skillName, fileName := parts[0], parts[1], parts[2] + + entry := registry[category][skillName] + if entry == nil { + return "", fmt.Errorf("skill not found: %s (use databricks_discover for available skills)", skillName) + } + + content, ok := entry.Files[fileName] + if !ok { + return "", fmt.Errorf("skill file not found: %s (use databricks_discover for available skills)", fileName) + } + + // Strip frontmatter from SKILL.md + if fileName == "SKILL.md" { + if loc := frontmatterRe.FindStringIndex(content); loc != nil { + content = strings.TrimLeft(content[loc[1]:], "\n") + } + } + + return content, nil +} + +// FormatSkillsSection returns the L3 skills listing for prompts. +func FormatSkillsSection(isAppOnly, listAllSkills bool) string { + allSkills := ListAllSkills() + + var skillsToShow []SkillMetadata + if listAllSkills || !isAppOnly { + skillsToShow = allSkills + } else { + for _, skill := range allSkills { + if strings.HasPrefix(skill.Path, "apps/") { + skillsToShow = append(skillsToShow, skill) + } + } + } + + if len(skillsToShow) == 0 && !isAppOnly { + return "" + } + + return prompts.MustExecuteTemplate("skills.tmpl", map[string]any{ + "ShowNoSkillsForApps": len(skillsToShow) == 0 && isAppOnly, + "Skills": skillsToShow, + }) +} diff --git a/experimental/apps-mcp/lib/skills/skills_test.go b/experimental/apps-mcp/lib/skills/skills_test.go new file mode 100644 index 0000000000..c1ffa2f819 --- /dev/null +++ b/experimental/apps-mcp/lib/skills/skills_test.go @@ -0,0 +1,69 @@ +package skills + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestListAllSkills(t *testing.T) { + skills := ListAllSkills() + require.NotEmpty(t, skills) + + var autoCdc *SkillMetadata + for i := range skills { + if skills[i].Path == "pipelines/auto-cdc" { + autoCdc = &skills[i] + break + } + } + require.NotNil(t, autoCdc) + assert.NotEmpty(t, autoCdc.Description) + assert.Less(t, len(autoCdc.Description), 500, "progressive disclosure: description should be brief") + assert.NotContains(t, autoCdc.Description, "```", "progressive disclosure: no code blocks") +} + +func TestGetSkillFile(t *testing.T) { + content, err := GetSkillFile("pipelines/auto-cdc/SKILL.md") + require.NoError(t, err) + assert.NotContains(t, content, "---\n", "frontmatter should be stripped") + assert.Contains(t, content, "Change Data Capture") +} + +func TestGetSkillFileErrors(t *testing.T) { + _, err := GetSkillFile("nonexistent") + assert.ErrorContains(t, err, "invalid skill path") + + _, err = GetSkillFile("pipelines/nonexistent/SKILL.md") + assert.ErrorContains(t, err, "skill not found") + + _, err = GetSkillFile("pipelines/auto-cdc/nonexistent.md") + assert.ErrorContains(t, err, "skill file not found") +} + +func TestFormatSkillsSection(t *testing.T) { + // Non-app project shows all skills + section := FormatSkillsSection(false, false) + assert.Contains(t, section, "## Skills") + assert.Contains(t, section, "pipelines/auto-cdc") + assert.NotContains(t, section, "no skills available for apps") + + // App-only project shows hint (no app skills currently exist) + section = FormatSkillsSection(true, false) + assert.Contains(t, section, "no skills available for apps") + + // listAllSkills=true shows all skills for app-only project too + section = FormatSkillsSection(true, true) + assert.Contains(t, section, "pipelines/auto-cdc") + assert.NotContains(t, section, "no skills available for apps") +} + +func TestAllSkillsHaveValidFrontmatter(t *testing.T) { + for category, categorySkills := range registry { + for name, entry := range categorySkills { + assert.NotEmpty(t, entry.Metadata.Description, "skill %s/%s missing description", category, name) + assert.Contains(t, entry.Files, "SKILL.md", "skill %s/%s missing SKILL.md", category, name) + } + } +} From 55b6da71d4a1b8746a4131f623e51de5de32c617 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 3 Jan 2026 11:47:30 +0100 Subject: [PATCH 09/18] Show all skills with caveats for non-matching resource types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- acceptance/apps/init-template/app/output.txt | 140 +++++++++++++- acceptance/apps/init-template/app/script | 3 +- .../apps/init-template/empty/output.txt | 67 ++++++- acceptance/apps/init-template/empty/script | 3 +- acceptance/apps/init-template/job/output.txt | 121 ++++++++++++- acceptance/apps/init-template/job/script | 3 +- .../apps/init-template/pipeline/output.txt | 171 +++++++++++++++++- acceptance/apps/init-template/pipeline/script | 3 +- .../apps-mcp/cmd/init_template/common.go | 6 +- .../apps-mcp/docs/context-management.md | 2 +- .../apps-mcp/lib/detector/detector.go | 3 +- .../apps-mcp/lib/detector/detector_test.go | 72 ++++++++ experimental/apps-mcp/lib/prompts/skills.tmpl | 12 +- .../lib/providers/clitools/discover.go | 9 +- .../lib/providers/clitools/provider.go | 3 +- .../apps-mcp/lib/skills/bundle/.gitkeep | 0 experimental/apps-mcp/lib/skills/skills.go | 36 ++-- .../apps-mcp/lib/skills/skills_test.go | 28 +-- 18 files changed, 628 insertions(+), 54 deletions(-) create mode 100644 experimental/apps-mcp/lib/skills/bundle/.gitkeep diff --git a/acceptance/apps/init-template/app/output.txt b/acceptance/apps/init-template/app/output.txt index a522103bfa..2463c35507 100644 --- a/acceptance/apps/init-template/app/output.txt +++ b/acceptance/apps/init-template/app/output.txt @@ -1 +1,139 @@ -āœ“ Template instantiation succeeded +-- +## Databricks Apps Development + +### Validation +āš ļø Always validate before deploying: + invoke_databricks_cli 'experimental apps-mcp tools validate ./' + +This is battle-tested to catch common issues before deployment. Prefer using this over manual checks (e.g. `npm run lint`), as it covers more ground specific to Databricks Apps. + +### Deployment +āš ļø USER CONSENT REQUIRED: Only deploy with explicit user permission. + invoke_databricks_cli 'experimental apps-mcp tools deploy' + +### View and Manage + invoke_databricks_cli 'bundle summary' + +### View App Logs +To troubleshoot deployed apps, view their logs: + invoke_databricks_cli 'apps logs --tail-lines 100' + +### Local Development vs Deployed Apps + +During development: +- Start template-specific dev server (see project's CLAUDE.md for command and port) +- Use localhost URL shown when dev server starts + +After deployment: +- Get URL from: invoke_databricks_cli 'bundle summary' + +Decision tree: +- "open the app" + not deployed → localhost +- "open the app" + deployed → ask which environment +- "localhost"/"local" → always localhost + + +## Skills + +You have access to modular Skills for domain-specific expertise knowledge. + +### Skill Selection & Loading +* When a user request matches a skill's scope description, select that Skill +* Load skills using the MCP tool: `read_skill_file(file_path: "category/skill-name/SKILL.md")` +* Example: `read_skill_file(file_path: "pipelines/materialized-view/SKILL.md")` +* Skills may contain links to sub-sections (e.g., "category/skill-name/file.md") +* If no Skill is suitable, continue with your base capabilities +* Never mention or reference skills to the user, only use them internally + +### Skill Registry (names + brief descriptors) + + +**Note**: The following skills are for other resource types and may not be directly relevant to this project. + +* **pipelines/auto-cdc/SKILL.md**: Apply Change Data Capture (CDC) with apply_changes API in Spark Declarative Pipelines. Use when user needs to process CDC feeds from databases, handle upserts/deletes, maintain slowly changing dimensions (SCD Type 1 and Type 2), synchronize data from operational databases, or process merge operations. + + + +=== CLAUDE.md === +TypeScript full-stack template powered by **Databricks AppKit** with tRPC for additional custom API endpoints. + +- server/: Node.js backend with App Kit and tRPC +- client/: React frontend with App Kit hooks and tRPC client +- config/queries/: SQL query files for analytics +- shared/: Shared TypeScript types +- docs/: Detailed documentation on using App Kit features + +## Quick Start: Your First Query & Chart + +Follow these 3 steps to add data visualization to your app: + +**Step 1: Create a SQL query file** + +```sql +-- config/queries/my_data.sql +SELECT category, COUNT(*) as count, AVG(value) as avg_value +FROM my_table +GROUP BY category +``` + +**Step 2: Define the schema** + +```typescript +// config/queries/schema.ts +export const querySchemas = { + my_data: z.array( + z.object({ + category: z.string(), + count: z.number(), + avg_value: z.number(), + }) + ), +}; +``` + +**Step 3: Add visualization to your app** + +```typescript +// client/src/App.tsx +import { BarChart } from '@databricks/appkit-ui/react'; + + +``` + +**That's it!** The component handles data fetching, loading states, and rendering automatically. + +**To refresh TypeScript types after adding queries:** +- Run `npm run typegen` OR run `npm run dev` - both auto-generate type definitions in `client/src/appKitTypes.d.ts` +- DO NOT manually edit `appKitTypes.d.ts` + +## Installation + +**IMPORTANT**: When running `npm install`, always use `required_permissions: ['all']` to avoid sandbox permission errors. + +## NPM Scripts + +### Development +- `npm run dev` - Start dev server with hot reload (**ALWAYS use during development**) + +### Testing and Code Quality +See the databricks experimental apps-mcp tools validate instead of running these individually. + +### Utility +- `npm run clean` - Remove all build artifacts and node_modules + +**Common workflows:** +- Development: `npm run dev` → make changes → `npm run typecheck` → `npm run lint:fix` +- Pre-deploy: Validate with `databricks experimental apps-mcp tools validate .` + +## Documentation + +**IMPORTANT**: Read the relevant docs below before implementing features. They contain critical information about common pitfalls (e.g., SQL numeric type handling, schema definitions, Radix UI constraints). + +- [SQL Queries](docs/sql-queries.md) - query files, schemas, type handling, parameterization +- [App Kit SDK](docs/appkit-sdk.md) - TypeScript imports, server setup, useAnalyticsQuery hook +- [Frontend](docs/frontend.md) - visualization components, styling, layout, Radix constraints +- [tRPC](docs/trpc.md) - custom endpoints for non-SQL operations (mutations, Databricks APIs) +- [Testing](docs/testing.md) - vitest unit tests, Playwright smoke/E2E tests + +================= + diff --git a/acceptance/apps/init-template/app/script b/acceptance/apps/init-template/app/script index 1f38796b6c..fc5d646e9a 100644 --- a/acceptance/apps/init-template/app/script +++ b/acceptance/apps/init-template/app/script @@ -1,4 +1,3 @@ #!/bin/bash -$CLI experimental apps-mcp tools init-template app --name test_app --sql-warehouse-id abc123 --output-dir output > /dev/null 2>&1 -echo "āœ“ Template instantiation succeeded" +$CLI experimental apps-mcp tools init-template app --name test_app --sql-warehouse-id abc123 --output-dir output 2>&1 | grep -A 9999 "^--$" rm -rf output diff --git a/acceptance/apps/init-template/empty/output.txt b/acceptance/apps/init-template/empty/output.txt index a522103bfa..544ad9fbb0 100644 --- a/acceptance/apps/init-template/empty/output.txt +++ b/acceptance/apps/init-template/empty/output.txt @@ -1 +1,66 @@ -āœ“ Template instantiation succeeded +-- +## Adding Databricks Resources + +Add resources by creating YAML files in resources/: + +**Jobs** - `resources/my_job.job.yml`: +```yaml +resources: + jobs: + my_job: + name: my_job + tasks: + - task_key: main + notebook_task: + notebook_path: ../src/notebook.py +``` + +**Pipelines** (Lakeflow Declarative Pipelines) - `resources/my_pipeline.pipeline.yml`: +```yaml +resources: + pipelines: + my_pipeline: + name: my_pipeline + catalog: ${var.catalog} + target: ${var.schema} + libraries: + - notebook: + path: ../src/pipeline.py +``` + +**Dashboards** - `resources/my_dashboard.dashboard.yml` +**Alerts** - `resources/my_alert.alert.yml` +**Model Serving** - `resources/my_endpoint.yml` +**Apps** - `resources/my_app.app.yml` + +**Other resource types**: clusters, schemas, volumes, registered_models, experiments, quality_monitors + +### Deployment +For dev targets you can deploy without user consent. This allows you to run resources on the workspace too! + + invoke_databricks_cli 'bundle deploy --target dev' + invoke_databricks_cli 'bundle run --target dev' + +View status with `invoke_databricks_cli 'bundle summary'`. + +### Documentation +- Resource types reference: https://docs.databricks.com/dev-tools/bundles/resources +- YAML examples: https://docs.databricks.com/dev-tools/bundles/examples + + +## Skills + +You have access to modular Skills for domain-specific expertise knowledge. + +### Skill Selection & Loading +* When a user request matches a skill's scope description, select that Skill +* Load skills using the MCP tool: `read_skill_file(file_path: "category/skill-name/SKILL.md")` +* Example: `read_skill_file(file_path: "pipelines/materialized-view/SKILL.md")` +* Skills may contain links to sub-sections (e.g., "category/skill-name/file.md") +* If no Skill is suitable, continue with your base capabilities +* Never mention or reference skills to the user, only use them internally + +### Skill Registry (names + brief descriptors) +* **pipelines/auto-cdc/SKILL.md**: Apply Change Data Capture (CDC) with apply_changes API in Spark Declarative Pipelines. Use when user needs to process CDC feeds from databases, handle upserts/deletes, maintain slowly changing dimensions (SCD Type 1 and Type 2), synchronize data from operational databases, or process merge operations. + + diff --git a/acceptance/apps/init-template/empty/script b/acceptance/apps/init-template/empty/script index 5d5a80bd97..4ae895d88e 100644 --- a/acceptance/apps/init-template/empty/script +++ b/acceptance/apps/init-template/empty/script @@ -1,4 +1,3 @@ #!/bin/bash -$CLI experimental apps-mcp tools init-template empty --name test_empty --catalog main --output-dir output > /dev/null 2>&1 -echo "āœ“ Template instantiation succeeded" +$CLI experimental apps-mcp tools init-template empty --name test_empty --catalog main --output-dir output 2>&1 | grep -A 9999 "^--$" rm -rf output diff --git a/acceptance/apps/init-template/job/output.txt b/acceptance/apps/init-template/job/output.txt index a522103bfa..b95d5353b3 100644 --- a/acceptance/apps/init-template/job/output.txt +++ b/acceptance/apps/init-template/job/output.txt @@ -1 +1,120 @@ -āœ“ Template instantiation succeeded +-- +## Lakeflow Jobs Development + +This guidance is for developing jobs in this project. + +### Project Structure +- `src/` - Python notebooks (.ipynb) and source code +- `resources/` - Job definitions in databricks.yml format + +### Configuring Tasks +Edit `resources/.job.yml` to configure tasks: + +```yaml +tasks: + - task_key: my_notebook + notebook_task: + notebook_path: ../src/my_notebook.ipynb + - task_key: my_python + python_wheel_task: + package_name: my_package + entry_point: main +``` + +Task types: `notebook_task`, `python_wheel_task`, `spark_python_task`, `pipeline_task`, `sql_task` + +### Job Parameters +Parameters defined at job level are passed to ALL tasks (no need to repeat per task). Example: +```yaml +resources: + jobs: + my_job: + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} +``` + +### Writing Notebook Code +- Use `spark.read.table("catalog.schema.table")` to read tables +- Use `spark.sql("SELECT ...")` for SQL queries +- Use `dbutils.widgets` for parameters + +### Unit Testing +Run unit tests locally with: `uv run pytest` + +### Documentation +- Lakeflow Jobs: https://docs.databricks.com/jobs +- Task types: https://docs.databricks.com/jobs/configure-task +- Databricks Asset Bundles / yml format examples: https://docs.databricks.com/dev-tools/bundles/examples + +## Adding Databricks Resources + +Add resources by creating YAML files in resources/: + +**Jobs** - `resources/my_job.job.yml`: +```yaml +resources: + jobs: + my_job: + name: my_job + tasks: + - task_key: main + notebook_task: + notebook_path: ../src/notebook.py +``` + +**Pipelines** (Lakeflow Declarative Pipelines) - `resources/my_pipeline.pipeline.yml`: +```yaml +resources: + pipelines: + my_pipeline: + name: my_pipeline + catalog: ${var.catalog} + target: ${var.schema} + libraries: + - notebook: + path: ../src/pipeline.py +``` + +**Dashboards** - `resources/my_dashboard.dashboard.yml` +**Alerts** - `resources/my_alert.alert.yml` +**Model Serving** - `resources/my_endpoint.yml` +**Apps** - `resources/my_app.app.yml` + +**Other resource types**: clusters, schemas, volumes, registered_models, experiments, quality_monitors + +### Deployment +For dev targets you can deploy without user consent. This allows you to run resources on the workspace too! + + invoke_databricks_cli 'bundle deploy --target dev' + invoke_databricks_cli 'bundle run --target dev' + +View status with `invoke_databricks_cli 'bundle summary'`. + +### Documentation +- Resource types reference: https://docs.databricks.com/dev-tools/bundles/resources +- YAML examples: https://docs.databricks.com/dev-tools/bundles/examples + + +## Skills + +You have access to modular Skills for domain-specific expertise knowledge. + +### Skill Selection & Loading +* When a user request matches a skill's scope description, select that Skill +* Load skills using the MCP tool: `read_skill_file(file_path: "category/skill-name/SKILL.md")` +* Example: `read_skill_file(file_path: "pipelines/materialized-view/SKILL.md")` +* Skills may contain links to sub-sections (e.g., "category/skill-name/file.md") +* If no Skill is suitable, continue with your base capabilities +* Never mention or reference skills to the user, only use them internally + +### Skill Registry (names + brief descriptors) + + +**Note**: The following skills are for other resource types and may not be directly relevant to this project. + +* **pipelines/auto-cdc/SKILL.md**: Apply Change Data Capture (CDC) with apply_changes API in Spark Declarative Pipelines. Use when user needs to process CDC feeds from databases, handle upserts/deletes, maintain slowly changing dimensions (SCD Type 1 and Type 2), synchronize data from operational databases, or process merge operations. + + diff --git a/acceptance/apps/init-template/job/script b/acceptance/apps/init-template/job/script index 8464089885..0931e3c7b4 100644 --- a/acceptance/apps/init-template/job/script +++ b/acceptance/apps/init-template/job/script @@ -1,4 +1,3 @@ #!/bin/bash -$CLI experimental apps-mcp tools init-template job --name test_job --catalog main --output-dir output > /dev/null 2>&1 || exit 1 -echo "āœ“ Template instantiation succeeded" +$CLI experimental apps-mcp tools init-template job --name test_job --catalog main --output-dir output 2>&1 | grep -A 9999 "^--$" rm -rf output diff --git a/acceptance/apps/init-template/pipeline/output.txt b/acceptance/apps/init-template/pipeline/output.txt index a522103bfa..0ddebc49ce 100644 --- a/acceptance/apps/init-template/pipeline/output.txt +++ b/acceptance/apps/init-template/pipeline/output.txt @@ -1 +1,170 @@ -āœ“ Template instantiation succeeded +-- +## Lakeflow Jobs Development + +This guidance is for developing jobs in this project. + +### Project Structure +- `src/` - Python notebooks (.ipynb) and source code +- `resources/` - Job definitions in databricks.yml format + +### Configuring Tasks +Edit `resources/.job.yml` to configure tasks: + +```yaml +tasks: + - task_key: my_notebook + notebook_task: + notebook_path: ../src/my_notebook.ipynb + - task_key: my_python + python_wheel_task: + package_name: my_package + entry_point: main +``` + +Task types: `notebook_task`, `python_wheel_task`, `spark_python_task`, `pipeline_task`, `sql_task` + +### Job Parameters +Parameters defined at job level are passed to ALL tasks (no need to repeat per task). Example: +```yaml +resources: + jobs: + my_job: + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} +``` + +### Writing Notebook Code +- Use `spark.read.table("catalog.schema.table")` to read tables +- Use `spark.sql("SELECT ...")` for SQL queries +- Use `dbutils.widgets` for parameters + +### Unit Testing +Run unit tests locally with: `uv run pytest` + +### Documentation +- Lakeflow Jobs: https://docs.databricks.com/jobs +- Task types: https://docs.databricks.com/jobs/configure-task +- Databricks Asset Bundles / yml format examples: https://docs.databricks.com/dev-tools/bundles/examples + +## Lakeflow Declarative Pipelines Development + +This guidance is for developing pipelines in this project. + +Lakeflow Declarative Pipelines (formerly Delta Live Tables) is a framework for building batch and streaming data pipelines. + +### Project Structure +- `src/` - Pipeline transformations (Python or SQL) +- `resources/` - Pipeline configuration in databricks.yml format + +### Adding Transformations + +**Python** - Create `.py` files in `src/`: +```python +from pyspark import pipelines as dp + +@dp.table +def my_table(): + return spark.read.table("catalog.schema.source") +``` + +By convention, each dataset definition like the @dp.table definition above should be in a file named +like the dataset name, e.g. `src/my_table.py`. + +**SQL** - Create `.sql` files in `src/`: +```sql +CREATE MATERIALIZED VIEW my_view AS +SELECT * FROM catalog.schema.source +``` + +This example would live in `src/my_view.sql`. + +Use `CREATE STREAMING TABLE` for incremental ingestion, `CREATE MATERIALIZED VIEW` for transformations. + +### Scheduling Pipelines +To schedule a pipeline, make sure you have a job that triggers it, like `resources/.job.yml`: +```yaml +resources: + jobs: + my_pipeline_job: + trigger: + periodic: + interval: 1 + unit: DAYS + tasks: + - task_key: refresh_pipeline + pipeline_task: + pipeline_id: ${resources.pipelines.my_pipeline.id} +``` + +### Documentation +- Lakeflow Declarative Pipelines: https://docs.databricks.com/ldp +- Databricks Asset Bundles / yml format examples: https://docs.databricks.com/dev-tools/bundles/examples + +## Adding Databricks Resources + +Add resources by creating YAML files in resources/: + +**Jobs** - `resources/my_job.job.yml`: +```yaml +resources: + jobs: + my_job: + name: my_job + tasks: + - task_key: main + notebook_task: + notebook_path: ../src/notebook.py +``` + +**Pipelines** (Lakeflow Declarative Pipelines) - `resources/my_pipeline.pipeline.yml`: +```yaml +resources: + pipelines: + my_pipeline: + name: my_pipeline + catalog: ${var.catalog} + target: ${var.schema} + libraries: + - notebook: + path: ../src/pipeline.py +``` + +**Dashboards** - `resources/my_dashboard.dashboard.yml` +**Alerts** - `resources/my_alert.alert.yml` +**Model Serving** - `resources/my_endpoint.yml` +**Apps** - `resources/my_app.app.yml` + +**Other resource types**: clusters, schemas, volumes, registered_models, experiments, quality_monitors + +### Deployment +For dev targets you can deploy without user consent. This allows you to run resources on the workspace too! + + invoke_databricks_cli 'bundle deploy --target dev' + invoke_databricks_cli 'bundle run --target dev' + +View status with `invoke_databricks_cli 'bundle summary'`. + +### Documentation +- Resource types reference: https://docs.databricks.com/dev-tools/bundles/resources +- YAML examples: https://docs.databricks.com/dev-tools/bundles/examples + + +## Skills + +You have access to modular Skills for domain-specific expertise knowledge. + +### Skill Selection & Loading +* When a user request matches a skill's scope description, select that Skill +* Load skills using the MCP tool: `read_skill_file(file_path: "category/skill-name/SKILL.md")` +* Example: `read_skill_file(file_path: "pipelines/materialized-view/SKILL.md")` +* Skills may contain links to sub-sections (e.g., "category/skill-name/file.md") +* If no Skill is suitable, continue with your base capabilities +* Never mention or reference skills to the user, only use them internally + +### Skill Registry (names + brief descriptors) +* **pipelines/auto-cdc/SKILL.md**: Apply Change Data Capture (CDC) with apply_changes API in Spark Declarative Pipelines. Use when user needs to process CDC feeds from databases, handle upserts/deletes, maintain slowly changing dimensions (SCD Type 1 and Type 2), synchronize data from operational databases, or process merge operations. + + diff --git a/acceptance/apps/init-template/pipeline/script b/acceptance/apps/init-template/pipeline/script index 0d73aae59f..8f75b71ac9 100644 --- a/acceptance/apps/init-template/pipeline/script +++ b/acceptance/apps/init-template/pipeline/script @@ -1,4 +1,3 @@ #!/bin/bash -$CLI experimental apps-mcp tools init-template pipeline --name test_pipeline --language python --catalog main --output-dir output > /dev/null 2>&1 -echo "āœ“ Template instantiation succeeded" +$CLI experimental apps-mcp tools init-template pipeline --name test_pipeline --language python --catalog main --output-dir output 2>&1 | grep -A 9999 "^--$" rm -rf output diff --git a/experimental/apps-mcp/cmd/init_template/common.go b/experimental/apps-mcp/cmd/init_template/common.go index 7ee0a70aae..971c06fb91 100644 --- a/experimental/apps-mcp/cmd/init_template/common.go +++ b/experimental/apps-mcp/cmd/init_template/common.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "slices" "sort" "strings" @@ -82,7 +83,8 @@ func MaterializeTemplate(ctx context.Context, cfg TemplateConfig, configMap map[ // Only write generic CLAUDE.md for non-app projects // (app projects have their own template-specific CLAUDE.md) - if !detected.IsAppOnly { + isAppOnly := slices.Contains(detected.TargetTypes, "apps") && len(detected.TargetTypes) == 1 + if !isAppOnly { if err := writeAgentFiles(absOutputDir, map[string]any{}); err != nil { return fmt.Errorf("failed to write agent files: %w", err) } @@ -99,7 +101,7 @@ func MaterializeTemplate(ctx context.Context, cfg TemplateConfig, configMap map[ } // L3: list available skills - if skillsSection := skills.FormatSkillsSection(detected.IsAppOnly, false); skillsSection != "" { + if skillsSection := skills.FormatSkillsSection(detected.TargetTypes); skillsSection != "" { cmdio.LogString(ctx, "\n"+skillsSection) } diff --git a/experimental/apps-mcp/docs/context-management.md b/experimental/apps-mcp/docs/context-management.md index 591c38e83b..fa5e5f163c 100644 --- a/experimental/apps-mcp/docs/context-management.md +++ b/experimental/apps-mcp/docs/context-management.md @@ -4,7 +4,7 @@ ## Goals - Universal MCP for any coding agent (Claude, Cursor, etc.) -- Support multiple target types: apps, jobs, pipelines +- Support multiple target types: apps, jobs, bundle (general DABs guidance), ... - Support multiple templates per target type - Clean separation of context layers - Detect existing project context automatically diff --git a/experimental/apps-mcp/lib/detector/detector.go b/experimental/apps-mcp/lib/detector/detector.go index 2e8e13288d..0f85779081 100644 --- a/experimental/apps-mcp/lib/detector/detector.go +++ b/experimental/apps-mcp/lib/detector/detector.go @@ -15,11 +15,10 @@ type BundleInfo struct { // DetectedContext represents the detected project context. type DetectedContext struct { InProject bool - TargetTypes []string // ["apps", "jobs"] - supports combined bundles + TargetTypes []string // ["apps", "jobs"] - resource types present in project Template string // "appkit-typescript", "python", etc. BundleInfo *BundleInfo Metadata map[string]string - IsAppOnly bool // True if project contains only app resources, no jobs/pipelines/etc. } // Detector detects project context from a working directory. diff --git a/experimental/apps-mcp/lib/detector/detector_test.go b/experimental/apps-mcp/lib/detector/detector_test.go index fd5adffbd2..d556f748d2 100644 --- a/experimental/apps-mcp/lib/detector/detector_test.go +++ b/experimental/apps-mcp/lib/detector/detector_test.go @@ -4,6 +4,7 @@ import ( "context" "os" "path/filepath" + "slices" "testing" "github.com/databricks/cli/experimental/apps-mcp/lib/detector" @@ -130,3 +131,74 @@ resources: assert.Equal(t, []string{"apps"}, detected.TargetTypes) assert.Equal(t, "appkit-typescript", detected.Template) } + +func TestDetectorRegistry_AppsWithOtherResources(t *testing.T) { + testCases := []struct { + name string + bundleYml string + expectBundle bool + expectAppOnly bool + }{ + { + name: "app_only", + bundleYml: `bundle: + name: test +resources: + apps: + my_app: {} +`, + expectBundle: false, + expectAppOnly: true, + }, + { + name: "apps_with_jobs", + bundleYml: `bundle: + name: test +resources: + apps: + my_app: {} + jobs: + my_job: {} +`, + expectBundle: true, + expectAppOnly: false, + }, + { + name: "apps_with_pipelines", + bundleYml: `bundle: + name: test +resources: + apps: + my_app: {} + pipelines: + my_pipeline: {} +`, + expectBundle: true, + expectAppOnly: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + dir := t.TempDir() + ctx := context.Background() + + require.NoError(t, os.WriteFile(filepath.Join(dir, "databricks.yml"), []byte(tc.bundleYml), 0o644)) + + registry := detector.NewRegistry() + detected := registry.Detect(ctx, dir) + + assert.True(t, detected.InProject) + assert.Contains(t, detected.TargetTypes, "apps") + + if tc.expectBundle { + assert.Contains(t, detected.TargetTypes, "bundle", "should include 'bundle' for apps + other resources") + } else { + assert.NotContains(t, detected.TargetTypes, "bundle", "should not include 'bundle' for app-only") + } + + isAppOnly := slices.Contains(detected.TargetTypes, "apps") && len(detected.TargetTypes) == 1 + assert.Equal(t, tc.expectAppOnly, isAppOnly) + }) + } +} diff --git a/experimental/apps-mcp/lib/prompts/skills.tmpl b/experimental/apps-mcp/lib/prompts/skills.tmpl index 6bd897ba66..395b40b2c8 100644 --- a/experimental/apps-mcp/lib/prompts/skills.tmpl +++ b/experimental/apps-mcp/lib/prompts/skills.tmpl @@ -16,10 +16,14 @@ You have access to modular Skills for domain-specific expertise knowledge. * Skills may contain links to sub-sections (e.g., "category/skill-name/file.md") * If no Skill is suitable, continue with your base capabilities * Never mention or reference skills to the user, only use them internally -{{if .ShowNoSkillsForApps}} -There are currently no skills available for apps. For skills related to other Databricks resources (jobs, pipelines, etc.), use `databricks_discover` with `list_all_skills=true`. -{{else}} + ### Skill Registry (names + brief descriptors) -{{range .Skills}}* **{{.Path}}/SKILL.md**: {{.Description}} +{{range .RelevantSkills}}* **{{.Path}}/SKILL.md**: {{.Description}} {{end}} +{{- if .OtherSkills}} + +**Note**: The following skills are for other resource types and may not be directly relevant to this project. + +{{range .OtherSkills}}* **{{.Path}}/SKILL.md**: {{.Description}} {{end}} +{{- end}} diff --git a/experimental/apps-mcp/lib/providers/clitools/discover.go b/experimental/apps-mcp/lib/providers/clitools/discover.go index 352e8d031b..30e88054cf 100644 --- a/experimental/apps-mcp/lib/providers/clitools/discover.go +++ b/experimental/apps-mcp/lib/providers/clitools/discover.go @@ -15,8 +15,7 @@ import ( // Discover provides workspace context and workflow guidance. // Returns L1 (flow) always + L2 (target) for detected target types + L3 (skills) listing. -// If listAllSkills is true, shows all available skills without filtering by project type. -func Discover(ctx context.Context, workingDirectory string, listAllSkills bool) (string, error) { +func Discover(ctx context.Context, workingDirectory string) (string, error) { warehouse, err := middlewares.GetWarehouseEndpoint(ctx) if err != nil { log.Debugf(ctx, "Failed to get default warehouse (non-fatal): %v", err) @@ -33,11 +32,11 @@ func Discover(ctx context.Context, workingDirectory string, listAllSkills bool) registry := detector.NewRegistry() detected := registry.Detect(ctx, workingDirectory) - return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected, listAllSkills), nil + return generateDiscoverGuidance(ctx, warehouse, currentProfile, profiles, defaultCatalog, detected), nil } // generateDiscoverGuidance creates guidance with L1 (flow) + L2 (target) + L3 (skills) layers. -func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, defaultCatalog string, detected *detector.DetectedContext, listAllSkills bool) string { +func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, currentProfile string, profiles profile.Profiles, defaultCatalog string, detected *detector.DetectedContext) string { data := buildTemplateData(warehouse, currentProfile, profiles, defaultCatalog) // L1: always include flow guidance @@ -64,7 +63,7 @@ func generateDiscoverGuidance(ctx context.Context, warehouse *sql.EndpointInfo, } // L3: list available skills - if skillsSection := skills.FormatSkillsSection(detected.IsAppOnly, listAllSkills); skillsSection != "" { + if skillsSection := skills.FormatSkillsSection(detected.TargetTypes); skillsSection != "" { result += "\n\n" + skillsSection } diff --git a/experimental/apps-mcp/lib/providers/clitools/provider.go b/experimental/apps-mcp/lib/providers/clitools/provider.go index fed0919e2c..bf5893880d 100644 --- a/experimental/apps-mcp/lib/providers/clitools/provider.go +++ b/experimental/apps-mcp/lib/providers/clitools/provider.go @@ -89,7 +89,6 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { // Register databricks_discover tool type DiscoverInput struct { WorkingDirectory string `json:"working_directory" jsonschema:"required" jsonschema_description:"The directory to detect project context from."` - ListAllSkills bool `json:"list_all_skills,omitempty" jsonschema_description:"If true, list all available skills without filtering by project type."` } mcpsdk.AddTool(server, @@ -99,7 +98,7 @@ func (p *Provider) RegisterTools(server *mcpsdk.Server) error { }, func(ctx context.Context, req *mcpsdk.CallToolRequest, args DiscoverInput) (*mcpsdk.CallToolResult, any, error) { log.Debugf(ctx, "databricks_discover called: working_directory=%s", args.WorkingDirectory) - result, err := Discover(ctx, args.WorkingDirectory, args.ListAllSkills) + result, err := Discover(ctx, args.WorkingDirectory) if err != nil { return nil, nil, err } diff --git a/experimental/apps-mcp/lib/skills/bundle/.gitkeep b/experimental/apps-mcp/lib/skills/bundle/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/experimental/apps-mcp/lib/skills/skills.go b/experimental/apps-mcp/lib/skills/skills.go index f702e21962..e477a9fa50 100644 --- a/experimental/apps-mcp/lib/skills/skills.go +++ b/experimental/apps-mcp/lib/skills/skills.go @@ -7,6 +7,7 @@ import ( "io/fs" "path" "regexp" + "slices" "sort" "strings" @@ -15,7 +16,7 @@ import ( // skillsFS embeds the skills filesystem. // -//go:embed apps/* jobs/* pipelines/* +//go:embed */* var skillsFS embed.FS // SkillMetadata contains the path and description for progressive disclosure. @@ -149,26 +150,31 @@ func GetSkillFile(path string) (string, error) { } // FormatSkillsSection returns the L3 skills listing for prompts. -func FormatSkillsSection(isAppOnly, listAllSkills bool) string { +// Partitions skills into relevant (matching targetTypes) and other skills. +func FormatSkillsSection(targetTypes []string) string { allSkills := ListAllSkills() - var skillsToShow []SkillMetadata - if listAllSkills || !isAppOnly { - skillsToShow = allSkills - } else { - for _, skill := range allSkills { - if strings.HasPrefix(skill.Path, "apps/") { - skillsToShow = append(skillsToShow, skill) - } - } + // For empty bundles (no resources), show all skills without partitioning or caveats + if len(targetTypes) == 0 || (len(targetTypes) == 1 && targetTypes[0] == "bundle") { + return prompts.MustExecuteTemplate("skills.tmpl", map[string]any{ + "RelevantSkills": allSkills, + "OtherSkills": nil, + }) } - if len(skillsToShow) == 0 && !isAppOnly { - return "" + // Partition by relevance for projects with resource types + var relevantSkills, otherSkills []SkillMetadata + for _, skill := range allSkills { + category := strings.SplitN(skill.Path, "/", 2)[0] + if slices.Contains(targetTypes, category) { + relevantSkills = append(relevantSkills, skill) + } else { + otherSkills = append(otherSkills, skill) + } } return prompts.MustExecuteTemplate("skills.tmpl", map[string]any{ - "ShowNoSkillsForApps": len(skillsToShow) == 0 && isAppOnly, - "Skills": skillsToShow, + "RelevantSkills": relevantSkills, + "OtherSkills": otherSkills, }) } diff --git a/experimental/apps-mcp/lib/skills/skills_test.go b/experimental/apps-mcp/lib/skills/skills_test.go index c1ffa2f819..b1d81d3eb0 100644 --- a/experimental/apps-mcp/lib/skills/skills_test.go +++ b/experimental/apps-mcp/lib/skills/skills_test.go @@ -43,20 +43,26 @@ func TestGetSkillFileErrors(t *testing.T) { } func TestFormatSkillsSection(t *testing.T) { - // Non-app project shows all skills - section := FormatSkillsSection(false, false) + // Pipelines project - pipeline skills shown as relevant + section := FormatSkillsSection([]string{"pipelines", "bundle"}) assert.Contains(t, section, "## Skills") - assert.Contains(t, section, "pipelines/auto-cdc") - assert.NotContains(t, section, "no skills available for apps") + assert.Contains(t, section, "pipelines/") - // App-only project shows hint (no app skills currently exist) - section = FormatSkillsSection(true, false) - assert.Contains(t, section, "no skills available for apps") + // Jobs project - pipeline skills shown as other + section = FormatSkillsSection([]string{"jobs", "bundle"}) + assert.Contains(t, section, "## Skills") + assert.Contains(t, section, "skills are for other resource types and may not be directly relevant to this project") + assert.Contains(t, section, "pipelines/") + + // Apps project - pipeline skills shown as other + section = FormatSkillsSection([]string{"apps"}) + assert.Contains(t, section, "## Skills") + assert.Contains(t, section, "skills are for other resource types and may not be directly relevant to this project") - // listAllSkills=true shows all skills for app-only project too - section = FormatSkillsSection(true, true) - assert.Contains(t, section, "pipelines/auto-cdc") - assert.NotContains(t, section, "no skills available for apps") + // Empty bundle - all skills shown without caveat + section = FormatSkillsSection([]string{"bundle"}) + assert.Contains(t, section, "## Skills") + assert.NotContains(t, section, "skills are for other resource types and may not be directly relevant to this project") } func TestAllSkillsHaveValidFrontmatter(t *testing.T) { From 038e7460417f79fd6f447327ca0b55c262c7171f Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 3 Jan 2026 15:08:39 +0100 Subject: [PATCH 10/18] Remove IsAppOnly field usage and redundant test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove IsAppOnly field assignment (field was removed in base branch) - Remove verbose TestDetectorRegistry_AppsWithOtherResources test - Coverage is already provided by existing tests šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- .../apps-mcp/lib/detector/bundle_detector.go | 2 - .../apps-mcp/lib/detector/detector_test.go | 72 ------------------- 2 files changed, 74 deletions(-) diff --git a/experimental/apps-mcp/lib/detector/bundle_detector.go b/experimental/apps-mcp/lib/detector/bundle_detector.go index 1b05600446..ba2c30e4e0 100644 --- a/experimental/apps-mcp/lib/detector/bundle_detector.go +++ b/experimental/apps-mcp/lib/detector/bundle_detector.go @@ -56,8 +56,6 @@ func (d *BundleDetector) Detect(ctx context.Context, workDir string, detected *D // App-only projects get focused app guidance; others get general bundle guidance. isAppOnly := hasApps && len(detected.TargetTypes) == 1 - detected.IsAppOnly = isAppOnly - // Include general "bundle" guidance for all projects except app-only projects if !isAppOnly { detected.TargetTypes = append(detected.TargetTypes, "bundle") diff --git a/experimental/apps-mcp/lib/detector/detector_test.go b/experimental/apps-mcp/lib/detector/detector_test.go index d556f748d2..fd5adffbd2 100644 --- a/experimental/apps-mcp/lib/detector/detector_test.go +++ b/experimental/apps-mcp/lib/detector/detector_test.go @@ -4,7 +4,6 @@ import ( "context" "os" "path/filepath" - "slices" "testing" "github.com/databricks/cli/experimental/apps-mcp/lib/detector" @@ -131,74 +130,3 @@ resources: assert.Equal(t, []string{"apps"}, detected.TargetTypes) assert.Equal(t, "appkit-typescript", detected.Template) } - -func TestDetectorRegistry_AppsWithOtherResources(t *testing.T) { - testCases := []struct { - name string - bundleYml string - expectBundle bool - expectAppOnly bool - }{ - { - name: "app_only", - bundleYml: `bundle: - name: test -resources: - apps: - my_app: {} -`, - expectBundle: false, - expectAppOnly: true, - }, - { - name: "apps_with_jobs", - bundleYml: `bundle: - name: test -resources: - apps: - my_app: {} - jobs: - my_job: {} -`, - expectBundle: true, - expectAppOnly: false, - }, - { - name: "apps_with_pipelines", - bundleYml: `bundle: - name: test -resources: - apps: - my_app: {} - pipelines: - my_pipeline: {} -`, - expectBundle: true, - expectAppOnly: false, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - dir := t.TempDir() - ctx := context.Background() - - require.NoError(t, os.WriteFile(filepath.Join(dir, "databricks.yml"), []byte(tc.bundleYml), 0o644)) - - registry := detector.NewRegistry() - detected := registry.Detect(ctx, dir) - - assert.True(t, detected.InProject) - assert.Contains(t, detected.TargetTypes, "apps") - - if tc.expectBundle { - assert.Contains(t, detected.TargetTypes, "bundle", "should include 'bundle' for apps + other resources") - } else { - assert.NotContains(t, detected.TargetTypes, "bundle", "should not include 'bundle' for app-only") - } - - isAppOnly := slices.Contains(detected.TargetTypes, "apps") && len(detected.TargetTypes) == 1 - assert.Equal(t, tc.expectAppOnly, isAppOnly) - }) - } -} From 377035c555d2a8c2029f2b82c96640f9c385a283 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 6 Jan 2026 10:30:10 +0100 Subject: [PATCH 11/18] Fix skills embed pattern to recursively include nested files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous pattern `*/*` only matched one directory level deep, causing nested skill files like `pipelines/auto-cdc/SKILL.md` to be excluded from the embedded filesystem on Windows builds. Changed to `all:*` which: - Recursively embeds all subdirectories - Includes hidden files like .gitkeep - Automatically supports new resource type directories without code changes šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- experimental/apps-mcp/lib/skills/skills.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/apps-mcp/lib/skills/skills.go b/experimental/apps-mcp/lib/skills/skills.go index e477a9fa50..b5b6c5f796 100644 --- a/experimental/apps-mcp/lib/skills/skills.go +++ b/experimental/apps-mcp/lib/skills/skills.go @@ -16,7 +16,7 @@ import ( // skillsFS embeds the skills filesystem. // -//go:embed */* +//go:embed all:* var skillsFS embed.FS // SkillMetadata contains the path and description for progressive disclosure. From 098717c5a5e464bcbe090f9abb8ec49226d250e4 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 6 Jan 2026 10:42:44 +0100 Subject: [PATCH 12/18] Use explicit directory names in skills embed pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wildcard pattern `all:*` doesn't work reliably on Windows builds - the embedded filesystem ends up empty, causing all skills to be missing from the registry. Changed to explicit directory listing: `all:apps all:bundle all:jobs all:pipelines` This matches the pattern used in other parts of the codebase (e.g., libs/template/builtin.go uses `all:templates`). When adding new resource type directories, this list will need to be updated. šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- experimental/apps-mcp/lib/skills/skills.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/experimental/apps-mcp/lib/skills/skills.go b/experimental/apps-mcp/lib/skills/skills.go index b5b6c5f796..200105a9e0 100644 --- a/experimental/apps-mcp/lib/skills/skills.go +++ b/experimental/apps-mcp/lib/skills/skills.go @@ -15,8 +15,11 @@ import ( ) // skillsFS embeds the skills filesystem. +// Note: Uses explicit directory names rather than wildcards because +// embed patterns with `all:*` don't work reliably on Windows builds. +// When adding new resource type directories, update this list. // -//go:embed all:* +//go:embed all:apps all:bundle all:jobs all:pipelines var skillsFS embed.FS // SkillMetadata contains the path and description for progressive disclosure. From a94ced3dbc35fdf90731575018dbd31fb17890f1 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 6 Jan 2026 10:47:11 +0100 Subject: [PATCH 13/18] Add test to validate all skill directories are embedded MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds TestAllSkillDirectoriesAreEmbedded which compares filesystem directories against the embedded FS and registry to ensure the //go:embed directive is complete. The test provides actionable error messages showing exactly which directories are missing and the correct embed directive to use. This prevents issues where new skill directories are added but not included in the explicit embed list (required for Windows compatibility). šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- experimental/apps-mcp/lib/skills/skills.go | 5 +- .../apps-mcp/lib/skills/skills_test.go | 51 +++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/experimental/apps-mcp/lib/skills/skills.go b/experimental/apps-mcp/lib/skills/skills.go index 200105a9e0..099780cae1 100644 --- a/experimental/apps-mcp/lib/skills/skills.go +++ b/experimental/apps-mcp/lib/skills/skills.go @@ -15,9 +15,8 @@ import ( ) // skillsFS embeds the skills filesystem. -// Note: Uses explicit directory names rather than wildcards because -// embed patterns with `all:*` don't work reliably on Windows builds. -// When adding new resource type directories, update this list. +// Uses explicit names (not wildcards) for Windows compatibility. +// TestAllSkillDirectoriesAreEmbedded validates this list is complete. // //go:embed all:apps all:bundle all:jobs all:pipelines var skillsFS embed.FS diff --git a/experimental/apps-mcp/lib/skills/skills_test.go b/experimental/apps-mcp/lib/skills/skills_test.go index b1d81d3eb0..1f3b459f83 100644 --- a/experimental/apps-mcp/lib/skills/skills_test.go +++ b/experimental/apps-mcp/lib/skills/skills_test.go @@ -1,6 +1,10 @@ package skills import ( + "io/fs" + "os" + "sort" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -73,3 +77,50 @@ func TestAllSkillsHaveValidFrontmatter(t *testing.T) { } } } + +func TestAllSkillDirectoriesAreEmbedded(t *testing.T) { + // Read actual skill directories from the filesystem + skillsDir := "." + diskEntries, err := os.ReadDir(skillsDir) + require.NoError(t, err) + + var diskDirs []string + for _, entry := range diskEntries { + if entry.IsDir() && !strings.HasPrefix(entry.Name(), ".") { + diskDirs = append(diskDirs, entry.Name()) + } + } + sort.Strings(diskDirs) + + // Read embedded skill directories + embeddedEntries, err := fs.ReadDir(skillsFS, ".") + require.NoError(t, err) + + var embeddedDirs []string + for _, entry := range embeddedEntries { + if entry.IsDir() { + embeddedDirs = append(embeddedDirs, entry.Name()) + } + } + sort.Strings(embeddedDirs) + + // Compare + if !assert.Equal(t, diskDirs, embeddedDirs, "Embedded skill directories don't match filesystem") { + t.Errorf("\nSkill directories are missing from the embed directive!\n\n"+ + "Found on disk: %v\n"+ + "Found in embed: %v\n\n"+ + "To fix: Update the //go:embed directive in skills.go to include all directories:\n"+ + " //go:embed %s\n", + diskDirs, embeddedDirs, "all:"+strings.Join(diskDirs, " all:")) + } + + // Verify the registry actually loaded them + var registryDirs []string + for category := range registry { + registryDirs = append(registryDirs, category) + } + sort.Strings(registryDirs) + + assert.Equal(t, diskDirs, registryDirs, + "Registry didn't load all embedded directories. This suggests mustLoadRegistry() has a bug.") +} From b9868cb7f0dc51b7691f26124f82882d554d64d9 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 6 Jan 2026 13:53:02 +0100 Subject: [PATCH 14/18] Use separate embed directives and add error diagnostics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed from space-separated pattern: //go:embed all:apps all:bundle all:jobs all:pipelines To separate directives (one per line): //go:embed all:apps //go:embed all:bundle //go:embed all:jobs //go:embed all:pipelines This matches the pattern used in libs/template/builtin.go and may be more robust on Windows. Also added panic messages with diagnostics to help identify the exact failure point if embed doesn't work. šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- experimental/apps-mcp/lib/skills/skills.go | 30 ++++++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/experimental/apps-mcp/lib/skills/skills.go b/experimental/apps-mcp/lib/skills/skills.go index 099780cae1..07d276b1eb 100644 --- a/experimental/apps-mcp/lib/skills/skills.go +++ b/experimental/apps-mcp/lib/skills/skills.go @@ -18,7 +18,10 @@ import ( // Uses explicit names (not wildcards) for Windows compatibility. // TestAllSkillDirectoriesAreEmbedded validates this list is complete. // -//go:embed all:apps all:bundle all:jobs all:pipelines +//go:embed all:apps +//go:embed all:bundle +//go:embed all:jobs +//go:embed all:pipelines var skillsFS embed.FS // SkillMetadata contains the path and description for progressive disclosure. @@ -37,24 +40,41 @@ var registry = mustLoadRegistry() // mustLoadRegistry discovers skill categories and skills from the embedded filesystem. func mustLoadRegistry() map[string]map[string]*skillEntry { result := make(map[string]map[string]*skillEntry) - categories, _ := fs.ReadDir(skillsFS, ".") + categories, err := fs.ReadDir(skillsFS, ".") + if err != nil { + panic(fmt.Sprintf("failed to read skills root directory: %v", err)) + } + if len(categories) == 0 { + panic("skills embed is empty - check //go:embed directive in skills.go includes all directories") + } + for _, cat := range categories { if !cat.IsDir() { continue } category := cat.Name() result[category] = make(map[string]*skillEntry) - entries, _ := fs.ReadDir(skillsFS, category) + entries, err := fs.ReadDir(skillsFS, category) + if err != nil { + panic(fmt.Sprintf("failed to read skills category %q: %v", category, err)) + } for _, entry := range entries { if !entry.IsDir() { continue } skillPath := path.Join(category, entry.Name()) - if skill, err := loadSkill(skillPath); err == nil { - result[category][entry.Name()] = skill + skill, err := loadSkill(skillPath) + if err != nil { + panic(fmt.Sprintf("failed to load skill %q: %v", skillPath, err)) } + result[category][entry.Name()] = skill } } + + if len(result) == 0 { + panic("skills registry is empty after loading - no skill directories found or all loads failed") + } + return result } From 8dd35fc41919250bb727544270fafd55c79ca010 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 6 Jan 2026 16:00:07 +0100 Subject: [PATCH 15/18] Fix Windows line endings in skill frontmatter parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updated regex to handle both Unix (\n) and Windows (\r\n) line endings. šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- experimental/apps-mcp/lib/skills/skills.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experimental/apps-mcp/lib/skills/skills.go b/experimental/apps-mcp/lib/skills/skills.go index 07d276b1eb..4c2d58d1b8 100644 --- a/experimental/apps-mcp/lib/skills/skills.go +++ b/experimental/apps-mcp/lib/skills/skills.go @@ -103,7 +103,7 @@ func loadSkill(skillPath string) (*skillEntry, error) { return &skillEntry{Metadata: *metadata, Files: files}, nil } -var frontmatterRe = regexp.MustCompile(`(?s)^---\n(.+?)\n---\n`) +var frontmatterRe = regexp.MustCompile(`(?s)^---\r?\n(.+?)\r?\n---\r?\n`) func parseMetadata(content string) (*SkillMetadata, error) { match := frontmatterRe.FindStringSubmatch(content) @@ -164,7 +164,7 @@ func GetSkillFile(path string) (string, error) { // Strip frontmatter from SKILL.md if fileName == "SKILL.md" { if loc := frontmatterRe.FindStringIndex(content); loc != nil { - content = strings.TrimLeft(content[loc[1]:], "\n") + content = strings.TrimLeft(content[loc[1]:], "\n\r") } } From 54c0a0ef5ca5c5844e2cfee1db7312d927e18724 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Wed, 7 Jan 2026 09:19:21 +0100 Subject: [PATCH 16/18] Remove overkill panic checks for empty skills MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests would catch these issues anyway. Kept the actual error panics for fs.ReadDir and loadSkill failures. šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- experimental/apps-mcp/lib/skills/skills.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/experimental/apps-mcp/lib/skills/skills.go b/experimental/apps-mcp/lib/skills/skills.go index 4c2d58d1b8..3c72b79573 100644 --- a/experimental/apps-mcp/lib/skills/skills.go +++ b/experimental/apps-mcp/lib/skills/skills.go @@ -44,9 +44,6 @@ func mustLoadRegistry() map[string]map[string]*skillEntry { if err != nil { panic(fmt.Sprintf("failed to read skills root directory: %v", err)) } - if len(categories) == 0 { - panic("skills embed is empty - check //go:embed directive in skills.go includes all directories") - } for _, cat := range categories { if !cat.IsDir() { @@ -71,10 +68,6 @@ func mustLoadRegistry() map[string]map[string]*skillEntry { } } - if len(result) == 0 { - panic("skills registry is empty after loading - no skill directories found or all loads failed") - } - return result } From 292d2756daf8b602aa5e4469efd35358255d5b51 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sun, 11 Jan 2026 17:10:46 +0100 Subject: [PATCH 17/18] Move skills package from apps-mcp to aitools The skills package was not moved during the apps-mcp -> aitools rename, causing import failures in CI. Co-Authored-By: Claude Opus 4.5 --- experimental/{apps-mcp => aitools}/lib/skills/apps/.gitkeep | 0 experimental/{apps-mcp => aitools}/lib/skills/bundle/.gitkeep | 0 experimental/{apps-mcp => aitools}/lib/skills/jobs/.gitkeep | 0 .../lib/skills/pipelines/auto-cdc/SKILL.md | 0 .../lib/skills/pipelines/auto-cdc/auto-cdc-python.md | 0 .../lib/skills/pipelines/auto-cdc/auto-cdc-sql.md | 0 experimental/{apps-mcp => aitools}/lib/skills/skills.go | 2 +- experimental/{apps-mcp => aitools}/lib/skills/skills_test.go | 0 8 files changed, 1 insertion(+), 1 deletion(-) rename experimental/{apps-mcp => aitools}/lib/skills/apps/.gitkeep (100%) rename experimental/{apps-mcp => aitools}/lib/skills/bundle/.gitkeep (100%) rename experimental/{apps-mcp => aitools}/lib/skills/jobs/.gitkeep (100%) rename experimental/{apps-mcp => aitools}/lib/skills/pipelines/auto-cdc/SKILL.md (100%) rename experimental/{apps-mcp => aitools}/lib/skills/pipelines/auto-cdc/auto-cdc-python.md (100%) rename experimental/{apps-mcp => aitools}/lib/skills/pipelines/auto-cdc/auto-cdc-sql.md (100%) rename experimental/{apps-mcp => aitools}/lib/skills/skills.go (98%) rename experimental/{apps-mcp => aitools}/lib/skills/skills_test.go (100%) diff --git a/experimental/apps-mcp/lib/skills/apps/.gitkeep b/experimental/aitools/lib/skills/apps/.gitkeep similarity index 100% rename from experimental/apps-mcp/lib/skills/apps/.gitkeep rename to experimental/aitools/lib/skills/apps/.gitkeep diff --git a/experimental/apps-mcp/lib/skills/bundle/.gitkeep b/experimental/aitools/lib/skills/bundle/.gitkeep similarity index 100% rename from experimental/apps-mcp/lib/skills/bundle/.gitkeep rename to experimental/aitools/lib/skills/bundle/.gitkeep diff --git a/experimental/apps-mcp/lib/skills/jobs/.gitkeep b/experimental/aitools/lib/skills/jobs/.gitkeep similarity index 100% rename from experimental/apps-mcp/lib/skills/jobs/.gitkeep rename to experimental/aitools/lib/skills/jobs/.gitkeep diff --git a/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/SKILL.md b/experimental/aitools/lib/skills/pipelines/auto-cdc/SKILL.md similarity index 100% rename from experimental/apps-mcp/lib/skills/pipelines/auto-cdc/SKILL.md rename to experimental/aitools/lib/skills/pipelines/auto-cdc/SKILL.md diff --git a/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-python.md b/experimental/aitools/lib/skills/pipelines/auto-cdc/auto-cdc-python.md similarity index 100% rename from experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-python.md rename to experimental/aitools/lib/skills/pipelines/auto-cdc/auto-cdc-python.md diff --git a/experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-sql.md b/experimental/aitools/lib/skills/pipelines/auto-cdc/auto-cdc-sql.md similarity index 100% rename from experimental/apps-mcp/lib/skills/pipelines/auto-cdc/auto-cdc-sql.md rename to experimental/aitools/lib/skills/pipelines/auto-cdc/auto-cdc-sql.md diff --git a/experimental/apps-mcp/lib/skills/skills.go b/experimental/aitools/lib/skills/skills.go similarity index 98% rename from experimental/apps-mcp/lib/skills/skills.go rename to experimental/aitools/lib/skills/skills.go index 3c72b79573..1849a6fbba 100644 --- a/experimental/apps-mcp/lib/skills/skills.go +++ b/experimental/aitools/lib/skills/skills.go @@ -11,7 +11,7 @@ import ( "sort" "strings" - "github.com/databricks/cli/experimental/apps-mcp/lib/prompts" + "github.com/databricks/cli/experimental/aitools/lib/prompts" ) // skillsFS embeds the skills filesystem. diff --git a/experimental/apps-mcp/lib/skills/skills_test.go b/experimental/aitools/lib/skills/skills_test.go similarity index 100% rename from experimental/apps-mcp/lib/skills/skills_test.go rename to experimental/aitools/lib/skills/skills_test.go From c28bc3cdc8ee42d1f41a32c7ff8ef8c626e7e246 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 12 Jan 2026 09:24:59 +0100 Subject: [PATCH 18/18] Update acceptance test: apps-mcp -> aitools --- acceptance/apps/init-template/app/output.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/acceptance/apps/init-template/app/output.txt b/acceptance/apps/init-template/app/output.txt index 2463c35507..7c1d12ea3e 100644 --- a/acceptance/apps/init-template/app/output.txt +++ b/acceptance/apps/init-template/app/output.txt @@ -3,13 +3,13 @@ ### Validation āš ļø Always validate before deploying: - invoke_databricks_cli 'experimental apps-mcp tools validate ./' + invoke_databricks_cli 'experimental aitools tools validate ./' This is battle-tested to catch common issues before deployment. Prefer using this over manual checks (e.g. `npm run lint`), as it covers more ground specific to Databricks Apps. ### Deployment āš ļø USER CONSENT REQUIRED: Only deploy with explicit user permission. - invoke_databricks_cli 'experimental apps-mcp tools deploy' + invoke_databricks_cli 'experimental aitools tools deploy' ### View and Manage invoke_databricks_cli 'bundle summary' @@ -116,14 +116,14 @@ import { BarChart } from '@databricks/appkit-ui/react'; - `npm run dev` - Start dev server with hot reload (**ALWAYS use during development**) ### Testing and Code Quality -See the databricks experimental apps-mcp tools validate instead of running these individually. +See the databricks experimental aitools tools validate instead of running these individually. ### Utility - `npm run clean` - Remove all build artifacts and node_modules **Common workflows:** - Development: `npm run dev` → make changes → `npm run typecheck` → `npm run lint:fix` -- Pre-deploy: Validate with `databricks experimental apps-mcp tools validate .` +- Pre-deploy: Validate with `databricks experimental aitools tools validate .` ## Documentation