diff --git a/Makefile b/Makefile index da5b7f9887..144116d504 100644 --- a/Makefile +++ b/Makefile @@ -141,5 +141,8 @@ generate: @echo "Generating CLI code..." $(GENKIT_BINARY) update-sdk +# Create a scratch testing environment to run tests on DBR. +dbr-scratch: + deco env run -i -n azure-prod-ucws -- go test -test.v -run TestSetupDbrRunner github.com/databricks/cli/acceptance -count 1 -.PHONY: lint lintfull tidy lintcheck fmt fmtfull test cover showcover build snapshot snapshot-release schema integration integration-short acc-cover acc-showcover docs ws links checks test-update test-update-aws test-update-all generate-validation +.PHONY: lint lintfull tidy lintcheck fmt fmtfull test cover showcover build snapshot snapshot-release schema integration integration-short acc-cover acc-showcover docs ws links checks test-update test-update-aws test-update-all generate-validation dbr_scratch diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 4db5858657..b395cb9b4d 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -466,6 +466,13 @@ func getSkipReason(config *internal.TestConfig, configPath string) string { return fmt.Sprintf("Disabled via RequiresCluster setting in %s (TEST_DEFAULT_CLUSTER_ID is empty)", configPath) } + if isTruePtr(config.RequiresWorkspaceFilesystem) { + isDBR := os.Getenv("DATABRICKS_RUNTIME_VERSION") != "" + if !isDBR || !WorkspaceTmpDir { + return fmt.Sprintf("Disabled via RequiresWorkspaceFilesystem setting in %s (DATABRICKS_RUNTIME_VERSION=%s, WorkspaceTmpDir=%v)", configPath, os.Getenv("DATABRICKS_RUNTIME_VERSION"), WorkspaceTmpDir) + } + } + } else { // Local run if !isTruePtr(config.Local) { @@ -516,7 +523,7 @@ func runTest(t *testing.T, // If the test is being run on DBR, auth is already configured // by the dbr_runner notebook by reading a token from the notebook context and // setting DATABRICKS_TOKEN and DATABRICKS_HOST environment variables. - _, _, tmpDir = workspaceTmpDir(t.Context(), t) + tmpDir = workspaceTmpDir(context.Background(), t) // Run DBR tests on the workspace file system to mimic usage from // DABs in the workspace. diff --git a/acceptance/dbr_test.go b/acceptance/dbr_test.go index ab0aea32ba..6bdd0bd168 100644 --- a/acceptance/dbr_test.go +++ b/acceptance/dbr_test.go @@ -1,20 +1,29 @@ package acceptance_test import ( + "bytes" "context" + "encoding/json" + "errors" "fmt" + "os" + "path" + "path/filepath" + "strconv" "testing" "time" + "github.com/databricks/cli/internal/testarchive" "github.com/databricks/cli/libs/filer" "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func workspaceTmpDir(ctx context.Context, t *testing.T) (*databricks.WorkspaceClient, filer.Filer, string) { +func workspaceTmpDir(ctx context.Context, t *testing.T) string { w, err := databricks.NewWorkspaceClient() require.NoError(t, err) @@ -29,19 +38,128 @@ func workspaceTmpDir(ctx context.Context, t *testing.T) (*databricks.WorkspaceCl uuid.New().String(), ) + // Create the directory using os.MkdirAll (via FUSE) + err = os.MkdirAll(tmpDir, 0o755) + require.NoError(t, err) + t.Cleanup(func() { - err := w.Workspace.Delete(ctx, workspace.Delete{ - Path: tmpDir, - Recursive: true, - }) + // Remove the directory using os.RemoveAll (via FUSE) + err := os.RemoveAll(tmpDir) assert.NoError(t, err) }) - err = w.Workspace.MkdirsByPath(ctx, tmpDir) + return tmpDir +} + +// Stable scratch directory to run and iterate on DBR tests. +func workspaceStableDir(ctx context.Context, t *testing.T) (w *databricks.WorkspaceClient, f filer.Filer, path string) { + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + currentUser, err := w.CurrentUser.Me(ctx) + require.NoError(t, err) + + path = fmt.Sprintf("/Workspace/Users/%s/dbr_scratch", currentUser.UserName) + + // Delete the directory if it exists. + err = w.Workspace.Delete(ctx, workspace.Delete{ + Path: path, + Recursive: true, + }) + var aerr *apierr.APIError + if err != nil && (!errors.As(err, &aerr) || aerr.ErrorCode != "RESOURCE_DOES_NOT_EXIST") { + t.Fatalf("Failed to delete directory %s: %v", path, err) + } + + err = w.Workspace.MkdirsByPath(ctx, path) require.NoError(t, err) - f, err := filer.NewWorkspaceFilesClient(w, tmpDir) + // Create a filer client for the workspace. + f, err = filer.NewWorkspaceFilesClient(w, path) require.NoError(t, err) - return w, f, tmpDir + return w, f, path +} + +func buildAndUploadArchive(ctx context.Context, t *testing.T, f filer.Filer) { + archiveDir := t.TempDir() + binDir := t.TempDir() + archiveName := "archive.tar.gz" + + // Build the CLI archives and upload to the workspace. + testarchive.CreateArchive(archiveDir, binDir, "..") + + archiveReader, err := os.Open(filepath.Join(archiveDir, archiveName)) + require.NoError(t, err) + + err = f.Write(ctx, archiveName, archiveReader) + require.NoError(t, err) + + err = archiveReader.Close() + require.NoError(t, err) +} + +func uploadScratchRunner(ctx context.Context, t *testing.T, f filer.Filer, w *databricks.WorkspaceClient, dir string) string { + runnerReader, err := os.Open("scratch_dbr_runner.ipynb") + require.NoError(t, err) + + err = f.Write(ctx, "scratch_dbr_runner.ipynb", runnerReader) + require.NoError(t, err) + + err = runnerReader.Close() + require.NoError(t, err) + + status, err := w.Workspace.GetStatusByPath(ctx, path.Join(dir, "scratch_dbr_runner")) + require.NoError(t, err) + + url := w.Config.Host + "/editor/notebooks/" + strconv.FormatInt(status.ObjectId, 10) + + return url +} + +func uploadParams(ctx context.Context, t *testing.T, f filer.Filer) { + names := []string{ + "CLOUD_ENV", + "TEST_DEFAULT_CLUSTER_ID", + "TEST_DEFAULT_WAREHOUSE_ID", + "TEST_INSTANCE_POOL_ID", + "TEST_METASTORE_ID", + } + + env := make(map[string]string) + for _, name := range names { + env[name] = os.Getenv(name) + } + + b, err := json.MarshalIndent(env, "", " ") + require.NoError(t, err) + + err = f.Write(ctx, "params.json", bytes.NewReader(b)) + require.NoError(t, err) +} + +// Running this test will setup a DBR test runner the configured workspace. +// You'll need to run the tests by actually running the notebook on the workspace. +func TestSetupDbrRunner(t *testing.T) { + ctx := t.Context() + w, f, dir := workspaceStableDir(ctx, t) + + t.Logf("Building and uploading archive...") + buildAndUploadArchive(ctx, t, f) + + t.Logf("Uploading params...") + uploadParams(ctx, t, f) + + t.Logf("Uploading runner...") + url := uploadScratchRunner(ctx, t, f, w, dir) + + t.Logf("Created DBR testing notebook at: %s", url) +} + +func TestArchive(t *testing.T) { + archiveDir := t.TempDir() + binDir := t.TempDir() + testarchive.CreateArchive(archiveDir, binDir, "..") + + assert.FileExists(t, filepath.Join(archiveDir, "archive.tar.gz")) } diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index 5f3d030947..689fbc78de 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -55,6 +55,11 @@ type TestConfig struct { // If true and Cloud=true, run this test only if a default warehouse is available in the cloud environment RequiresWarehouse *bool + // If true run this test only if running on DBR with workspace filesystem + // Note that this implicitly implies Cloud=true since running on the workspace + // file system is only supported for integration tests. + RequiresWorkspaceFilesystem *bool + // If set, current user will be set to a service principal-like UUID instead of email (default is false) IsServicePrincipal *bool diff --git a/acceptance/internal/materialized_config.go b/acceptance/internal/materialized_config.go index 3233d3907c..f58aae7d03 100644 --- a/acceptance/internal/materialized_config.go +++ b/acceptance/internal/materialized_config.go @@ -9,30 +9,32 @@ import ( const MaterializedConfigFile = "out.test.toml" type MaterializedConfig struct { - GOOS map[string]bool `toml:"GOOS,omitempty"` - CloudEnvs map[string]bool `toml:"CloudEnvs,omitempty"` - Local *bool `toml:"Local,omitempty"` - Cloud *bool `toml:"Cloud,omitempty"` - CloudSlow *bool `toml:"CloudSlow,omitempty"` - RequiresUnityCatalog *bool `toml:"RequiresUnityCatalog,omitempty"` - RequiresCluster *bool `toml:"RequiresCluster,omitempty"` - RequiresWarehouse *bool `toml:"RequiresWarehouse,omitempty"` - EnvMatrix map[string][]string `toml:"EnvMatrix,omitempty"` + GOOS map[string]bool `toml:"GOOS,omitempty"` + CloudEnvs map[string]bool `toml:"CloudEnvs,omitempty"` + Local *bool `toml:"Local,omitempty"` + Cloud *bool `toml:"Cloud,omitempty"` + CloudSlow *bool `toml:"CloudSlow,omitempty"` + RequiresUnityCatalog *bool `toml:"RequiresUnityCatalog,omitempty"` + RequiresCluster *bool `toml:"RequiresCluster,omitempty"` + RequiresWarehouse *bool `toml:"RequiresWarehouse,omitempty"` + RequiresWorkspaceFilesystem *bool `toml:"RequiresWorkspaceFilesystem,omitempty"` + EnvMatrix map[string][]string `toml:"EnvMatrix,omitempty"` } // GenerateMaterializedConfig creates a TOML representation of the configuration fields // that determine where and how a test is executed func GenerateMaterializedConfig(config TestConfig) (string, error) { materialized := MaterializedConfig{ - GOOS: config.GOOS, - CloudEnvs: config.CloudEnvs, - Local: config.Local, - Cloud: config.Cloud, - CloudSlow: config.CloudSlow, - RequiresUnityCatalog: config.RequiresUnityCatalog, - RequiresCluster: config.RequiresCluster, - RequiresWarehouse: config.RequiresWarehouse, - EnvMatrix: config.EnvMatrix, + GOOS: config.GOOS, + CloudEnvs: config.CloudEnvs, + Local: config.Local, + Cloud: config.Cloud, + CloudSlow: config.CloudSlow, + RequiresUnityCatalog: config.RequiresUnityCatalog, + RequiresCluster: config.RequiresCluster, + RequiresWarehouse: config.RequiresWarehouse, + RequiresWorkspaceFilesystem: config.RequiresWorkspaceFilesystem, + EnvMatrix: config.EnvMatrix, } var buf bytes.Buffer diff --git a/acceptance/scratch_dbr_runner.ipynb b/acceptance/scratch_dbr_runner.ipynb new file mode 100644 index 0000000000..9bddab026d --- /dev/null +++ b/acceptance/scratch_dbr_runner.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "d9fd5224", + "metadata": {}, + "outputs": [], + "source": [ + "import tarfile\n", + "from pathlib import Path\n", + "\n", + "\n", + "def extract_cli_archive():\n", + " src = \"archive.tar.gz\"\n", + " dst = Path(\"/tmp/cli_archive\")\n", + "\n", + " with tarfile.open(src, \"r:gz\") as tar:\n", + " tar.extractall(path=dst)\n", + "\n", + " print(f\"Extracted {src} to {dst}\")\n", + "\n", + "\n", + "extract_cli_archive()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13fd9a7f", + "metadata": {}, + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mRunning cells with '.venv (Python 3.13.2)' requires the ipykernel package.\n", + "\u001b[1;31mInstall 'ipykernel' into the Python environment. \n", + "\u001b[1;31mCommand: '/Users/shreyas.goenka/repos/cli2/.venv/bin/python -m pip install ipykernel -U --force-reinstall'" + ] + } + ], + "source": [ + "import os\n", + "import json\n", + "from dbruntime.databricks_repl_context import get_context\n", + "import subprocess\n", + "import multiprocessing\n", + "\n", + "\n", + "class Runner:\n", + " def __init__(self, archive_dir):\n", + " # Load environment variables to set in the test runner.\n", + " self.env = os.environ.copy()\n", + " with open(\"params.json\", \"r\") as f:\n", + " params = json.load(f)\n", + " for k, v in params.items():\n", + " self.env[k] = v\n", + "\n", + " # Configure PATH to include go, uv and jq.\n", + " self.env[\"PATH\"] = f\"{archive_dir}/bin/amd64:{archive_dir}/bin/amd64/go/bin:{self.env['PATH']}\"\n", + "\n", + " ctx = get_context()\n", + " workspace_url = spark.conf.get(\"spark.databricks.workspaceUrl\")\n", + "\n", + " # Configure auth for the acceptance tests.\n", + " self.env[\"DATABRICKS_TOKEN\"] = ctx.apiToken\n", + " self.env[\"DATABRICKS_HOST\"] = workspace_url\n", + "\n", + " # Configure working directory to the root of the CLI repo.\n", + " self.exec_dir = Path(archive_dir) / \"cli\"\n", + "\n", + " # Terraform needs to be installed on the workspace file system to be used by the acceptance tests.\n", + " # Otherwise creating symlinks in .databricks fails.\n", + " self.terraform_dir = os.getcwd() + \"/terraform\"\n", + "\n", + " def run(self, prefix):\n", + " cmd = [\n", + " \"go\",\n", + " \"tool\",\n", + " \"gotestsum\",\n", + " \"--format\",\n", + " \"testname\",\n", + " \"--no-summary=skipped\",\n", + " \"--\",\n", + " \"-timeout\",\n", + " \"7200s\",\n", + " \"-test.v\",\n", + " \"-run\",\n", + " prefix,\n", + " \"github.com/databricks/cli/acceptance\",\n", + " \"-workspace-tmp-dir\",\n", + " \"-terraform-dir\",\n", + " self.terraform_dir,\n", + " \"-tail\",\n", + " ]\n", + "\n", + " subprocess.run(cmd, env=self.env, check=True, cwd=self.exec_dir)\n", + "\n", + " # Debug helper to run commands and see the output.\n", + " def run_script_with_logs(self, name, input_script):\n", + " cmd = [\n", + " \"go\",\n", + " \"tool\",\n", + " \"gotestsum\",\n", + " \"--format\",\n", + " \"testname\",\n", + " \"--no-summary=skipped\",\n", + " \"--\",\n", + " \"-timeout\",\n", + " \"7200s\",\n", + " \"-test.v\",\n", + " \"-run\",\n", + " \"TestAccept/\" + name,\n", + " \"github.com/databricks/cli/acceptance\",\n", + " \"-workspace-tmp-dir\",\n", + " \"-terraform-dir\",\n", + " self.terraform_dir,\n", + " \"-tail\",\n", + " ]\n", + "\n", + " with open(self.exec_dir / \"acceptance\" / name / \"script\", \"w\") as f:\n", + " f.write(input_script)\n", + "\n", + " subprocess.run(cmd, env=self.env, check=True, cwd=self.exec_dir)\n", + "\n", + " # Debug helper to run commands and see the output.\n", + " def run_with_script(self, name, input_script):\n", + " cmd = [\n", + " \"go\",\n", + " \"test\",\n", + " \"-timeout\",\n", + " \"7200s\",\n", + " \"-test.v\",\n", + " \"-run\",\n", + " \"TestAccept/\" + name,\n", + " \"github.com/databricks/cli/acceptance\",\n", + " \"-update\",\n", + " \"-workspace-tmp-dir\",\n", + " \"-terraform-dir\",\n", + " self.terraform_dir,\n", + " ]\n", + "\n", + " with open(self.exec_dir / \"acceptance\" / name / \"script\", \"w\") as f:\n", + " f.write(input_script)\n", + "\n", + " try:\n", + " subprocess.run(\n", + " cmd, env=self.env, check=True, cwd=self.exec_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL\n", + " )\n", + " except subprocess.CalledProcessError:\n", + " pass\n", + "\n", + " with open(self.exec_dir / \"acceptance\" / name / \"output.txt\", \"r\") as f:\n", + " print(f.read())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "271c44d8", + "metadata": {}, + "outputs": [], + "source": [ + "runner = Runner(\"/tmp/cli_archive\")\n", + "runner.run_with_script(\n", + " \"selftest/dbr\",\n", + " r\"\"\"\n", + "echo \"========================= START OF SCRIPT =====================\"\n", + "DASHBOARD_DISPLAY_NAME=\"test bundle-deploy-dashboard $(uuid)\"\n", + "export DASHBOARD_DISPLAY_NAME\n", + "\n", + "trace cat databricks.yml\n", + "\n", + "envsubst < databricks.yml.tmpl > databricks.yml\n", + "\n", + "trace cat databricks.yml\n", + "echo \"========================= END OF SCRIPT =====================\"\n", + "\"\"\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d141de5", + "metadata": {}, + "outputs": [], + "source": [ + "# Call .run on the runner to run the tests. Please the the previous cells before running tests here.\n", + "\n", + "# Example:\n", + "runner = Runner(\"/tmp/cli_archive\")\n", + "runner.run(\"selftest/dbr\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/acceptance/selftest/dbr/script b/acceptance/selftest/dbr/script new file mode 100644 index 0000000000..afcb033a50 --- /dev/null +++ b/acceptance/selftest/dbr/script @@ -0,0 +1,30 @@ +#!/bin/bash + +printf "=== Verify running on DBR\n" +if [ -z "$DATABRICKS_RUNTIME_VERSION" ]; then + echo "ERROR: DATABRICKS_RUNTIME_VERSION is not set" + exit 1 +fi +echo "DATABRICKS_RUNTIME_VERSION is set: $DATABRICKS_RUNTIME_VERSION" + +printf "\n=== Verify current directory is on workspace filesystem\n" +trace pwd + +current_dir=$(pwd) +if [[ ! "$current_dir" =~ ^/Workspace/Users/ ]]; then + echo "ERROR: Current directory is not in /Workspace/Users/" + echo "Current directory: $current_dir" + exit 1 +fi + +echo "SUCCESS: Current directory is in workspace filesystem" + +printf "\n=== Verify current user matches directory\n" +expected_path="/Workspace/Users/$CURRENT_USER_NAME" +if [[ ! "$current_dir" =~ ^$expected_path ]]; then + echo "ERROR: Current directory does not start with $expected_path" + echo "Current directory: $current_dir" + exit 1 +fi + +echo "SUCCESS: Current directory is under $expected_path" diff --git a/acceptance/selftest/dbr/test.toml b/acceptance/selftest/dbr/test.toml new file mode 100644 index 0000000000..55b1193eb2 --- /dev/null +++ b/acceptance/selftest/dbr/test.toml @@ -0,0 +1,2 @@ +Cloud = true +RequiresWorkspaceFilesystem = true