From 3a3b6f54daaf75343a2f35920786e2430ce3eca6 Mon Sep 17 00:00:00 2001 From: Varun Deep Saini Date: Mon, 15 Dec 2025 20:35:03 +0530 Subject: [PATCH 1/3] Add KillCaller feature to test server for CLI crash recovery testing --- acceptance/internal/config.go | 5 ++ acceptance/internal/prepare_server.go | 38 +++++++++ .../kill_caller/mid_request/out.test.toml | 5 ++ .../kill_caller/mid_request/output.txt | 5 ++ .../selftest/kill_caller/mid_request/script | 1 + .../kill_caller/mid_request/test.toml | 10 +++ acceptance/selftest/kill_caller/out.test.toml | 5 ++ acceptance/selftest/kill_caller/output.txt | 5 ++ acceptance/selftest/kill_caller/script | 1 + acceptance/selftest/kill_caller/test.toml | 11 +++ cmd/pipelines/root/root.go | 2 + cmd/root/root.go | 2 + libs/testserver/server.go | 82 ++++++++++++++++++- 13 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 acceptance/selftest/kill_caller/mid_request/out.test.toml create mode 100644 acceptance/selftest/kill_caller/mid_request/output.txt create mode 100644 acceptance/selftest/kill_caller/mid_request/script create mode 100644 acceptance/selftest/kill_caller/mid_request/test.toml create mode 100644 acceptance/selftest/kill_caller/out.test.toml create mode 100644 acceptance/selftest/kill_caller/output.txt create mode 100644 acceptance/selftest/kill_caller/script create mode 100644 acceptance/selftest/kill_caller/test.toml diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index 2129fa5573..ed5d1cd03c 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -156,6 +156,11 @@ type ServerStub struct { // Configure as "1ms", "2s", "3m", etc. // See [time.ParseDuration] for details. Delay time.Duration + + // If set, send this signal to the caller process instead of returning a response. + // Use signal numbers: 9 for SIGKILL, 15 for SIGTERM, etc. + // Requires DATABRICKS_CLI_TEST_PID=1 to be set. + KillCaller int } // FindConfigs finds all the config relevant for this test, diff --git a/acceptance/internal/prepare_server.go b/acceptance/internal/prepare_server.go index a401a1cac8..17bc29e118 100644 --- a/acceptance/internal/prepare_server.go +++ b/acceptance/internal/prepare_server.go @@ -10,6 +10,7 @@ import ( "slices" "strings" "sync" + "syscall" "testing" "time" "unicode/utf8" @@ -209,6 +210,43 @@ func startLocalServer(t *testing.T, } } + if stub.KillCaller > 0 { + pid := testserver.ExtractPidFromHeaders(req.Headers) + if pid == 0 { + t.Errorf("KillCaller configured but test-pid not found in User-Agent") + return testserver.Response{ + StatusCode: http.StatusBadRequest, + Body: "test-pid not found in User-Agent (set DATABRICKS_CLI_TEST_PID=1)", + } + } + + signal := syscall.Signal(stub.KillCaller) + t.Logf("KillCaller: sending signal %d to PID %d (pattern: %s)", signal, pid, stub.Pattern) + + process, err := os.FindProcess(pid) + if err != nil { + t.Errorf("Failed to find process %d: %s", pid, err) + return testserver.Response{ + StatusCode: http.StatusInternalServerError, + Body: fmt.Sprintf("failed to find process: %s", err), + } + } + + if err := process.Signal(signal); err != nil { + t.Errorf("Failed to send signal %d to process %d: %s", signal, pid, err) + return testserver.Response{ + StatusCode: http.StatusInternalServerError, + Body: fmt.Sprintf("failed to send signal: %s", err), + } + } + + // Return a response (the CLI will likely be killed before it receives this) + return testserver.Response{ + StatusCode: http.StatusOK, + Body: fmt.Sprintf("sent signal %d to PID %d", signal, pid), + } + } + return stub.Response }) } diff --git a/acceptance/selftest/kill_caller/mid_request/out.test.toml b/acceptance/selftest/kill_caller/mid_request/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/selftest/kill_caller/mid_request/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/selftest/kill_caller/mid_request/output.txt b/acceptance/selftest/kill_caller/mid_request/output.txt new file mode 100644 index 0000000000..8ae1495a64 --- /dev/null +++ b/acceptance/selftest/kill_caller/mid_request/output.txt @@ -0,0 +1,5 @@ + +>>> errcode [CLI] workspace list / +script: line [N]: [PID] Killed "$@" + +Exit code: 137 diff --git a/acceptance/selftest/kill_caller/mid_request/script b/acceptance/selftest/kill_caller/mid_request/script new file mode 100644 index 0000000000..817a90cc76 --- /dev/null +++ b/acceptance/selftest/kill_caller/mid_request/script @@ -0,0 +1 @@ +trace errcode $CLI workspace list / diff --git a/acceptance/selftest/kill_caller/mid_request/test.toml b/acceptance/selftest/kill_caller/mid_request/test.toml new file mode 100644 index 0000000000..7a4894b38d --- /dev/null +++ b/acceptance/selftest/kill_caller/mid_request/test.toml @@ -0,0 +1,10 @@ +Local = true +Env.DATABRICKS_CLI_TEST_PID = "1" + +[[Server]] +Pattern = "GET /api/2.0/workspace/list" +KillCaller = 9 + +[[Repls]] +Old = 'script: line \d+: \d+ Killed' +New = 'script: line [N]: [PID] Killed' diff --git a/acceptance/selftest/kill_caller/out.test.toml b/acceptance/selftest/kill_caller/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/selftest/kill_caller/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/selftest/kill_caller/output.txt b/acceptance/selftest/kill_caller/output.txt new file mode 100644 index 0000000000..6bbd632062 --- /dev/null +++ b/acceptance/selftest/kill_caller/output.txt @@ -0,0 +1,5 @@ + +>>> errcode [CLI] current-user me +script: line [N]: [PID] Killed "$@" + +Exit code: 137 diff --git a/acceptance/selftest/kill_caller/script b/acceptance/selftest/kill_caller/script new file mode 100644 index 0000000000..b6e8c070f6 --- /dev/null +++ b/acceptance/selftest/kill_caller/script @@ -0,0 +1 @@ +trace errcode $CLI current-user me diff --git a/acceptance/selftest/kill_caller/test.toml b/acceptance/selftest/kill_caller/test.toml new file mode 100644 index 0000000000..f7d2b34354 --- /dev/null +++ b/acceptance/selftest/kill_caller/test.toml @@ -0,0 +1,11 @@ +Local = true +Env.DATABRICKS_CLI_TEST_PID = "1" + +# Kill the CLI when it calls /Me endpoint +[[Server]] +Pattern = "GET /api/2.0/preview/scim/v2/Me" +KillCaller = 9 + +[[Repls]] +Old = 'script: line \d+: \d+ Killed' +New = 'script: line [N]: [PID] Killed' diff --git a/cmd/pipelines/root/root.go b/cmd/pipelines/root/root.go index 72d0b52664..1eb874067c 100644 --- a/cmd/pipelines/root/root.go +++ b/cmd/pipelines/root/root.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/testserver" "github.com/spf13/cobra" ) @@ -72,6 +73,7 @@ func New(ctx context.Context) *cobra.Command { ctx = withCommandInUserAgent(ctx, cmd) ctx = withCommandExecIdInUserAgent(ctx) ctx = withUpstreamInUserAgent(ctx) + ctx = testserver.InjectPidToUserAgent(ctx) cmd.SetContext(ctx) return nil } diff --git a/cmd/root/root.go b/cmd/root/root.go index 96fde20846..20bf96691d 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -17,6 +17,7 @@ import ( "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/telemetry" "github.com/databricks/cli/libs/telemetry/protos" + "github.com/databricks/cli/libs/testserver" "github.com/spf13/cobra" ) @@ -79,6 +80,7 @@ func New(ctx context.Context) *cobra.Command { ctx = withCommandInUserAgent(ctx, cmd) ctx = withCommandExecIdInUserAgent(ctx) ctx = withUpstreamInUserAgent(ctx) + ctx = testserver.InjectPidToUserAgent(ctx) cmd.SetContext(ctx) return nil } diff --git a/libs/testserver/server.go b/libs/testserver/server.go index 8b8e346e99..99ec2b56e6 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -9,15 +9,47 @@ import ( "net/http" "net/http/httptest" "net/url" + "os" "reflect" + "regexp" + "strconv" "strings" "sync" + "syscall" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/env" + "github.com/databricks/databricks-sdk-go/useragent" "github.com/gorilla/mux" +) - "github.com/databricks/cli/internal/testutil" +const ( + TestPidEnvVar = "DATABRICKS_CLI_TEST_PID" + testPidKey = "test-pid" ) +var testPidRegex = regexp.MustCompile(testPidKey + `/(\d+)`) + +func InjectPidToUserAgent(ctx context.Context) context.Context { + if env.Get(ctx, TestPidEnvVar) != "1" { + return ctx + } + return useragent.InContext(ctx, testPidKey, strconv.Itoa(os.Getpid())) +} + +func ExtractPidFromHeaders(headers http.Header) int { + ua := headers.Get("User-Agent") + matches := testPidRegex.FindStringSubmatch(ua) + if len(matches) < 2 { + return 0 + } + pid, err := strconv.Atoi(matches[1]) + if err != nil { + return 0 + } + return pid +} + type Server struct { *httptest.Server Router *mux.Router @@ -274,6 +306,9 @@ func (s *Server) Handle(method, path string, handler HandlerFunc) { StatusCode: 500, Body: []byte("INJECTED"), } + } else if bytes.Contains(request.Body, []byte("KILL_CALLER")) { + s.handleKillCaller(&request, w) + return } else { respAny := handler(request) if respAny == nil && request.Context.Err() != nil { @@ -322,3 +357,48 @@ func isNil(i any) bool { return false } } + +func (s *Server) handleKillCaller(request *Request, w http.ResponseWriter) { + pid := ExtractPidFromHeaders(request.Headers) + if pid == 0 { + s.t.Errorf("KILL_CALLER requested but test-pid not found in User-Agent") + w.WriteHeader(http.StatusBadRequest) + _, _ = fmt.Fprint(w, "test-pid not found in User-Agent (set DATABRICKS_CLI_TEST_PID=1)") + return + } + + signal := syscall.SIGKILL + bodyStr := string(request.Body) + if idx := strings.Index(bodyStr, "KILL_CALLER:"); idx != -1 { + signalPart := bodyStr[idx+len("KILL_CALLER:"):] + endIdx := 0 + for endIdx < len(signalPart) && signalPart[endIdx] >= '0' && signalPart[endIdx] <= '9' { + endIdx++ + } + if endIdx > 0 { + if sigNum, err := strconv.Atoi(signalPart[:endIdx]); err == nil { + signal = syscall.Signal(sigNum) + } + } + } + + s.t.Logf("KILL_CALLER: sending signal %d to PID %d", signal, pid) + + process, err := os.FindProcess(pid) + if err != nil { + s.t.Errorf("Failed to find process %d: %s", pid, err) + w.WriteHeader(http.StatusInternalServerError) + _, _ = fmt.Fprintf(w, "failed to find process: %s", err) + return + } + + if err := process.Signal(signal); err != nil { + s.t.Errorf("Failed to send signal %d to process %d: %s", signal, pid, err) + w.WriteHeader(http.StatusInternalServerError) + _, _ = fmt.Fprintf(w, "failed to send signal: %s", err) + return + } + + w.WriteHeader(http.StatusOK) + _, _ = fmt.Fprintf(w, "sent signal %d to PID %d", signal, pid) +} From 617d1d3dcfa1f2edac5c4af9040017450f19ae1e Mon Sep 17 00:00:00 2001 From: Varun Deep Saini Date: Tue, 16 Dec 2025 21:27:04 +0530 Subject: [PATCH 2/3] Removed other signals and now only killing is supported --- acceptance/internal/config.go | 7 ++--- acceptance/internal/prepare_server.go | 16 +++++------ .../kill_caller/mid_request/output.txt | 4 +-- .../kill_caller/mid_request/test.toml | 19 +++++++++++-- acceptance/selftest/kill_caller/output.txt | 4 +-- acceptance/selftest/kill_caller/test.toml | 19 +++++++++++-- libs/testserver/server.go | 28 +++++-------------- 7 files changed, 54 insertions(+), 43 deletions(-) diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index ed5d1cd03c..3b5a3aadc2 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -157,10 +157,9 @@ type ServerStub struct { // See [time.ParseDuration] for details. Delay time.Duration - // If set, send this signal to the caller process instead of returning a response. - // Use signal numbers: 9 for SIGKILL, 15 for SIGTERM, etc. - // Requires DATABRICKS_CLI_TEST_PID=1 to be set. - KillCaller int + // If true, kill the caller process instead of returning a response. + // Requires DATABRICKS_CLI_TEST_PID=1 to be set in the test environment. + KillCaller bool } // FindConfigs finds all the config relevant for this test, diff --git a/acceptance/internal/prepare_server.go b/acceptance/internal/prepare_server.go index 17bc29e118..200532e7e4 100644 --- a/acceptance/internal/prepare_server.go +++ b/acceptance/internal/prepare_server.go @@ -10,7 +10,6 @@ import ( "slices" "strings" "sync" - "syscall" "testing" "time" "unicode/utf8" @@ -210,7 +209,7 @@ func startLocalServer(t *testing.T, } } - if stub.KillCaller > 0 { + if stub.KillCaller { pid := testserver.ExtractPidFromHeaders(req.Headers) if pid == 0 { t.Errorf("KillCaller configured but test-pid not found in User-Agent") @@ -220,8 +219,7 @@ func startLocalServer(t *testing.T, } } - signal := syscall.Signal(stub.KillCaller) - t.Logf("KillCaller: sending signal %d to PID %d (pattern: %s)", signal, pid, stub.Pattern) + t.Logf("KillCaller: killing PID %d (pattern: %s)", pid, stub.Pattern) process, err := os.FindProcess(pid) if err != nil { @@ -232,18 +230,20 @@ func startLocalServer(t *testing.T, } } - if err := process.Signal(signal); err != nil { - t.Errorf("Failed to send signal %d to process %d: %s", signal, pid, err) + // Use process.Kill() for cross-platform compatibility. + // On Unix, this sends SIGKILL. On Windows, this calls TerminateProcess. + if err := process.Kill(); err != nil { + t.Errorf("Failed to kill process %d: %s", pid, err) return testserver.Response{ StatusCode: http.StatusInternalServerError, - Body: fmt.Sprintf("failed to send signal: %s", err), + Body: fmt.Sprintf("failed to kill process: %s", err), } } // Return a response (the CLI will likely be killed before it receives this) return testserver.Response{ StatusCode: http.StatusOK, - Body: fmt.Sprintf("sent signal %d to PID %d", signal, pid), + Body: fmt.Sprintf("killed PID %d", pid), } } diff --git a/acceptance/selftest/kill_caller/mid_request/output.txt b/acceptance/selftest/kill_caller/mid_request/output.txt index 8ae1495a64..fac04c326a 100644 --- a/acceptance/selftest/kill_caller/mid_request/output.txt +++ b/acceptance/selftest/kill_caller/mid_request/output.txt @@ -1,5 +1,5 @@ >>> errcode [CLI] workspace list / -script: line [N]: [PID] Killed "$@" +[PROCESS_KILLED] -Exit code: 137 +Exit code: [KILLED] diff --git a/acceptance/selftest/kill_caller/mid_request/test.toml b/acceptance/selftest/kill_caller/mid_request/test.toml index 7a4894b38d..c264c61861 100644 --- a/acceptance/selftest/kill_caller/mid_request/test.toml +++ b/acceptance/selftest/kill_caller/mid_request/test.toml @@ -3,8 +3,21 @@ Env.DATABRICKS_CLI_TEST_PID = "1" [[Server]] Pattern = "GET /api/2.0/workspace/list" -KillCaller = 9 +KillCaller = true [[Repls]] -Old = 'script: line \d+: \d+ Killed' -New = 'script: line [N]: [PID] Killed' +# macOS bash shows "Killed: 9" (with signal number), Linux shows "Killed" +# Normalize the whole killed line to a placeholder +Old = 'script: line \d+:\s+\d+ Killed(: 9)?\s+"\$@"' +New = '[PROCESS_KILLED]' + +[[Repls]] +# On Windows, there's no "Killed" message - just empty line before Exit code +# Insert [PROCESS_KILLED] placeholder for consistency +Old = '(\n>>> errcode [^\n]+\n)\nExit code:' +New = '${1}[PROCESS_KILLED]\n\nExit code:' + +[[Repls]] +# Normalize exit code: 137 on Unix (128 + SIGKILL), 1 on Windows +Old = 'Exit code: (137|1)' +New = 'Exit code: [KILLED]' diff --git a/acceptance/selftest/kill_caller/output.txt b/acceptance/selftest/kill_caller/output.txt index 6bbd632062..2e413420b8 100644 --- a/acceptance/selftest/kill_caller/output.txt +++ b/acceptance/selftest/kill_caller/output.txt @@ -1,5 +1,5 @@ >>> errcode [CLI] current-user me -script: line [N]: [PID] Killed "$@" +[PROCESS_KILLED] -Exit code: 137 +Exit code: [KILLED] diff --git a/acceptance/selftest/kill_caller/test.toml b/acceptance/selftest/kill_caller/test.toml index f7d2b34354..c7a106e144 100644 --- a/acceptance/selftest/kill_caller/test.toml +++ b/acceptance/selftest/kill_caller/test.toml @@ -4,8 +4,21 @@ Env.DATABRICKS_CLI_TEST_PID = "1" # Kill the CLI when it calls /Me endpoint [[Server]] Pattern = "GET /api/2.0/preview/scim/v2/Me" -KillCaller = 9 +KillCaller = true [[Repls]] -Old = 'script: line \d+: \d+ Killed' -New = 'script: line [N]: [PID] Killed' +# macOS bash shows "Killed: 9" (with signal number), Linux shows "Killed" +# Normalize the whole killed line to a placeholder +Old = 'script: line \d+:\s+\d+ Killed(: 9)?\s+"\$@"' +New = '[PROCESS_KILLED]' + +[[Repls]] +# On Windows, there's no "Killed" message - just empty line before Exit code +# Insert [PROCESS_KILLED] placeholder for consistency +Old = '(\n>>> errcode [^\n]+\n)\nExit code:' +New = '${1}[PROCESS_KILLED]\n\nExit code:' + +[[Repls]] +# Normalize exit code: 137 on Unix (128 + SIGKILL), 1 on Windows +Old = 'Exit code: (137|1)' +New = 'Exit code: [KILLED]' diff --git a/libs/testserver/server.go b/libs/testserver/server.go index 99ec2b56e6..d9f3407858 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -15,7 +15,6 @@ import ( "strconv" "strings" "sync" - "syscall" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/env" @@ -367,22 +366,7 @@ func (s *Server) handleKillCaller(request *Request, w http.ResponseWriter) { return } - signal := syscall.SIGKILL - bodyStr := string(request.Body) - if idx := strings.Index(bodyStr, "KILL_CALLER:"); idx != -1 { - signalPart := bodyStr[idx+len("KILL_CALLER:"):] - endIdx := 0 - for endIdx < len(signalPart) && signalPart[endIdx] >= '0' && signalPart[endIdx] <= '9' { - endIdx++ - } - if endIdx > 0 { - if sigNum, err := strconv.Atoi(signalPart[:endIdx]); err == nil { - signal = syscall.Signal(sigNum) - } - } - } - - s.t.Logf("KILL_CALLER: sending signal %d to PID %d", signal, pid) + s.t.Logf("KILL_CALLER: killing PID %d", pid) process, err := os.FindProcess(pid) if err != nil { @@ -392,13 +376,15 @@ func (s *Server) handleKillCaller(request *Request, w http.ResponseWriter) { return } - if err := process.Signal(signal); err != nil { - s.t.Errorf("Failed to send signal %d to process %d: %s", signal, pid, err) + // Use process.Kill() for cross-platform compatibility. + // On Unix, this sends SIGKILL. On Windows, this calls TerminateProcess. + if err := process.Kill(); err != nil { + s.t.Errorf("Failed to kill process %d: %s", pid, err) w.WriteHeader(http.StatusInternalServerError) - _, _ = fmt.Fprintf(w, "failed to send signal: %s", err) + _, _ = fmt.Fprintf(w, "failed to kill process: %s", err) return } w.WriteHeader(http.StatusOK) - _, _ = fmt.Fprintf(w, "sent signal %d to PID %d", signal, pid) + _, _ = fmt.Fprintf(w, "killed PID %d", pid) } From ba053c39b1c386f0dbc93a1590284a4ad775b97d Mon Sep 17 00:00:00 2001 From: Varun Deep Saini Date: Wed, 17 Dec 2025 13:27:36 +0530 Subject: [PATCH 3/3] Fixed windows tests --- acceptance/selftest/kill_caller/mid_request/test.toml | 9 ++++++++- acceptance/selftest/kill_caller/test.toml | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/acceptance/selftest/kill_caller/mid_request/test.toml b/acceptance/selftest/kill_caller/mid_request/test.toml index c264c61861..dedb9fd7a3 100644 --- a/acceptance/selftest/kill_caller/mid_request/test.toml +++ b/acceptance/selftest/kill_caller/mid_request/test.toml @@ -15,9 +15,16 @@ New = '[PROCESS_KILLED]' # On Windows, there's no "Killed" message - just empty line before Exit code # Insert [PROCESS_KILLED] placeholder for consistency Old = '(\n>>> errcode [^\n]+\n)\nExit code:' -New = '${1}[PROCESS_KILLED]\n\nExit code:' +New = """${1}[PROCESS_KILLED] + +Exit code:""" [[Repls]] # Normalize exit code: 137 on Unix (128 + SIGKILL), 1 on Windows Old = 'Exit code: (137|1)' New = 'Exit code: [KILLED]' + +[[Repls]] +# Normalize Windows line endings (CRLF -> LF) - must be LAST +Old = "\r" +New = '' diff --git a/acceptance/selftest/kill_caller/test.toml b/acceptance/selftest/kill_caller/test.toml index c7a106e144..0fd483f97a 100644 --- a/acceptance/selftest/kill_caller/test.toml +++ b/acceptance/selftest/kill_caller/test.toml @@ -16,9 +16,16 @@ New = '[PROCESS_KILLED]' # On Windows, there's no "Killed" message - just empty line before Exit code # Insert [PROCESS_KILLED] placeholder for consistency Old = '(\n>>> errcode [^\n]+\n)\nExit code:' -New = '${1}[PROCESS_KILLED]\n\nExit code:' +New = """${1}[PROCESS_KILLED] + +Exit code:""" [[Repls]] # Normalize exit code: 137 on Unix (128 + SIGKILL), 1 on Windows Old = 'Exit code: (137|1)' New = 'Exit code: [KILLED]' + +[[Repls]] +# Normalize Windows line endings (CRLF -> LF) - must be LAST +Old = "\r" +New = ''