From d2c27b09c5de6bbc43138cfa5759fbb2bc8b0295 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Wed, 24 Dec 2025 16:13:12 -0800 Subject: [PATCH 1/4] run all scripts with controller Signed-off-by: Harper, Jason M --- cmd/config/config.go | 2 +- cmd/metrics/metadata.go | 6 +- internal/common/common.go | 76 +++++--- internal/script/script.go | 333 +++++++++++++++++---------------- internal/script/script_defs.go | 83 ++++---- internal/script/script_test.go | 16 +- 6 files changed, 273 insertions(+), 243 deletions(-) diff --git a/cmd/config/config.go b/cmd/config/config.go index aafefffd..95e7ea39 100644 --- a/cmd/config/config.go +++ b/cmd/config/config.go @@ -221,7 +221,7 @@ func runCmd(cmd *cobra.Command, args []string) error { // prepareTarget prepares the target for configuration changes // almost all set scripts require the msr kernel module to be loaded and // use wrmsr and rdmsr, so we do that here so that the goroutines for the -// set scripts can run in parallel without conflicts +// set scripts can run concurrently without conflicts func prepareTarget(myTarget target.Target, localTempDir string) (err error) { prepareScript := script.ScriptDefinition{ Name: "prepare-target", diff --git a/cmd/metrics/metadata.go b/cmd/metrics/metadata.go index 2249abb3..1efda68a 100644 --- a/cmd/metrics/metadata.go +++ b/cmd/metrics/metadata.go @@ -157,7 +157,7 @@ func (c *X86MetadataCollector) CollectMetadata(t target.Target, noRoot bool, noS if err != nil { return Metadata{}, fmt.Errorf("failed to get number of general purpose counters: %v", err) } - // the rest of the metadata is retrieved by running scripts in parallel + // the rest of the metadata is retrieved by running scripts concurrently metadataScripts, err := getMetadataScripts(noRoot, noSystemSummary, metadata.NumGeneralPurposeCounters) if err != nil { return Metadata{}, fmt.Errorf("failed to get metadata scripts: %v", err) @@ -336,7 +336,7 @@ func (c *ARMMetadataCollector) CollectMetadata(t target.Target, noRoot bool, noS if err != nil { return Metadata{}, fmt.Errorf("failed to get number of general purpose counters: %v", err) } - // the rest of the metadata is retrieved by running scripts in parallel and then parsing the output + // the rest of the metadata is retrieved by running scripts concurrently and then parsing the output metadataScripts, err := getMetadataScripts(noRoot, noSystemSummary, metadata.NumGeneralPurposeCounters) if err != nil { return Metadata{}, fmt.Errorf("failed to get metadata scripts: %v", err) @@ -393,7 +393,7 @@ func (c *ARMMetadataCollector) CollectMetadata(t target.Target, noRoot bool, noS } func getMetadataScripts(noRoot bool, noSystemSummary bool, numGPCounters int) (metadataScripts []script.ScriptDefinition, err error) { - // reduce startup time by running the metadata scripts in parallel + // reduce startup time by running the metadata scripts concurrently metadataScriptDefs := []script.ScriptDefinition{ { Name: "get architecture", diff --git a/internal/common/common.go b/internal/common/common.go index 4bd1c9e3..6dc5f336 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -288,16 +288,31 @@ func (rc *ReportingCommand) Run() error { return nil } +func signalProcessOnTarget(t target.Target, pidStr string, sigStr string) error { + var cmd *exec.Cmd + // prepend "-" to the signal string if not already present + if !strings.HasPrefix(sigStr, "-") { + sigStr = "-" + sigStr + } + if !t.IsSuperUser() && t.CanElevatePrivileges() { + cmd = exec.Command("sudo", "kill", sigStr, pidStr) + } else { + cmd = exec.Command("kill", sigStr, pidStr) + } + _, _, _, err := t.RunCommandEx(cmd, 5, false, true) // #nosec G204 + return err +} + // configureSignalHandler sets up a signal handler to catch SIGINT and SIGTERM // // When perfspect receives ctrl-c while in the shell, the shell propagates the // signal to all our children. But when perfspect is run in the background or disowned and // then receives SIGINT, e.g., from a script, we need to send the signal to our children // -// Also, when running scripts in parallel using the parallel_master.sh script, we need to -// send the signal to the parallel_master.sh script on each target so that it can clean up -// its child processes. This is because the parallel_master.sh script is run in its own process group -// and does not receive the signal when perfspect receives it. +// When running scripts using the controller.sh script, we need to send the signal to the +// controller.sh script on each target so that it can clean up its child processes. This is +// because the controller.sh script is run in its own process group and does not receive the +// signal when perfspect receives it. // // Parameters: // - myTargets: The list of targets to send the signal to. @@ -308,48 +323,38 @@ func configureSignalHandler(myTargets []target.Target, statusFunc progress.Multi go func() { sig := <-sigChannel slog.Debug("received signal", slog.String("signal", sig.String())) - // Scripts that are run in parallel using the parallel_master.sh script and a few other sequential scripts need to be handled specially - // because they are run in their own process group, we need to send the signal directly to the PID of the script. - // For every target, look for the primary_collection_script PID file and send SIGINT to it. + // The controller.sh script is run in its own process group, so we need to send the signal + // directly to the PID of the controller. For every target, look for the primary_collection_script + // PID file and send SIGINT to it. + // The controller script is run in its own process group, so we need to send the signal + // directly to the PID of the controller. For every target, look for the controller + // PID file and send SIGINT to it. for _, t := range myTargets { if statusFunc != nil { _ = statusFunc(t.GetName(), "Signal received, cleaning up...") } - pidFilePath := filepath.Join(t.GetTempDirectory(), "primary_collection_script.pid") + pidFilePath := filepath.Join(t.GetTempDirectory(), script.ControllerPIDFileName) stdout, _, exitcode, err := t.RunCommandEx(exec.Command("cat", pidFilePath), 5, false, true) // #nosec G204 if err != nil { - slog.Error("error retrieving target primary_collection_script PID", slog.String("target", t.GetName()), slog.String("error", err.Error())) + slog.Error("error retrieving target controller PID", slog.String("target", t.GetName()), slog.String("error", err.Error())) } if exitcode == 0 { pidStr := strings.TrimSpace(stdout) - _, _, _, err := t.RunCommandEx(exec.Command("sudo", "kill", "-SIGINT", pidStr), 5, false, true) // #nosec G204 + err = signalProcessOnTarget(t, pidStr, "SIGINT") if err != nil { - slog.Error("error sending signal to target primary_collection_script", slog.String("target", t.GetName()), slog.String("error", err.Error())) + slog.Error("error sending SIGINT signal to target controller", slog.String("target", t.GetName()), slog.String("error", err.Error())) } } } - // now wait until all primary collection scripts have exited - slog.Debug("waiting for primary_collection_script scripts to exit") + // now wait until all controller scripts have exited + slog.Debug("waiting for controller scripts to exit") for _, t := range myTargets { // create a per-target timeout context targetTimeout := 10 * time.Second ctx, cancel := context.WithTimeout(context.Background(), targetTimeout) timedOut := false - pidFilePath := filepath.Join(t.GetTempDirectory(), "primary_collection_script.pid") + pidFilePath := filepath.Join(t.GetTempDirectory(), script.ControllerPIDFileName) for { - // check for timeout - select { - case <-ctx.Done(): - if statusFunc != nil { - _ = statusFunc(t.GetName(), "cleanup timeout exceeded") - } - slog.Warn("signal handler cleanup timeout exceeded for target", slog.String("target", t.GetName())) - timedOut = true - default: - } - if timedOut { - break - } // read the pid file stdout, _, exitcode, err := t.RunCommandEx(exec.Command("cat", pidFilePath), 5, false, true) // #nosec G204 if err != nil || exitcode != 0 { @@ -362,6 +367,23 @@ func configureSignalHandler(myTargets []target.Target, statusFunc progress.Multi if err != nil || exitcode != 0 { break // process no longer exists, script has exited } + // check for timeout + select { + case <-ctx.Done(): + timedOut = true + default: + } + if timedOut { + if statusFunc != nil { + _ = statusFunc(t.GetName(), "cleanup timeout exceeded, sending kill signal") + } + slog.Warn("signal handler cleanup timeout exceeded for target, sending SIGKILL", slog.String("target", t.GetName())) + err = signalProcessOnTarget(t, pidStr, "SIGKILL") + if err != nil { + slog.Error("error sending SIGKILL signal to target controller", slog.String("target", t.GetName()), slog.String("error", err.Error())) + } + break + } // sleep for a short time before checking again time.Sleep(500 * time.Millisecond) } diff --git a/internal/script/script.go b/internal/script/script.go index e3b0faec..39767900 100644 --- a/internal/script/script.go +++ b/internal/script/script.go @@ -23,6 +23,8 @@ import ( //go:embed resources var Resources embed.FS +const ControllerPIDFileName = "controller.pid" + type ScriptOutput struct { ScriptDefinition Stdout string @@ -45,10 +47,10 @@ func RunScript(myTarget target.Target, script ScriptDefinition, localTempDir str // RunScripts runs a list of scripts on a target and returns the outputs of each script as a map with the script name as the key. func RunScripts(myTarget target.Target, scripts []ScriptDefinition, ignoreScriptErrors bool, localTempDir string, statusUpdate progress.MultiSpinnerUpdateFunc, collectingStatus string) (map[string]ScriptOutput, error) { - // drop scripts that should not be run and separate scripts that must run sequentially from those that can be run in parallel + // drop scripts that should not be run and separate scripts that must run sequentially from those that can be run concurrently canElevate := myTarget.CanElevatePrivileges() var sequentialScripts []ScriptDefinition - var parallelScripts []ScriptDefinition + var concurrentScripts []ScriptDefinition for _, script := range scripts { if script.Superuser && !canElevate { slog.Debug("skipping script because it requires superuser privileges and the user cannot elevate privileges on target", slog.String("script", script.Name)) @@ -57,14 +59,14 @@ func RunScripts(myTarget target.Target, scripts []ScriptDefinition, ignoreScript if script.Sequential { sequentialScripts = append(sequentialScripts, script) } else { - parallelScripts = append(parallelScripts, script) + concurrentScripts = append(concurrentScripts, script) } } // prepare target to run scripts by copying scripts and dependencies to target and installing LKMs if statusUpdate != nil { _ = statusUpdate(myTarget.GetName(), "preparing to collect data") } - installedLkms, err := prepareTargetToRunScripts(myTarget, append(sequentialScripts, parallelScripts...), localTempDir, false) + installedLkms, err := prepareTargetToRunScripts(myTarget, append(sequentialScripts, concurrentScripts...), localTempDir, false) if err != nil { err = fmt.Errorf("error while preparing target to run scripts: %v", err) return nil, err @@ -80,107 +82,66 @@ func RunScripts(myTarget target.Target, scripts []ScriptDefinition, ignoreScript if statusUpdate != nil { _ = statusUpdate(myTarget.GetName(), collectingStatus) } - // if there's only 1 parallel script, run it sequentially - if len(parallelScripts) == 1 { - slog.Debug("running single parallel script sequentially", slog.String("script", parallelScripts[0].Name)) - sequentialScripts = append(sequentialScripts, parallelScripts...) - parallelScripts = nil - } scriptOutputs := make(map[string]ScriptOutput) - // run parallel scripts - if len(parallelScripts) > 0 { - // form one master script that calls all the parallel scripts in the background - masterScriptName := "parallel_master.sh" - masterScript, needsElevatedPrivileges, err := formMasterScript(myTarget.GetTempDirectory(), parallelScripts) - if err != nil { - err = fmt.Errorf("error forming master script: %v", err) - return nil, err - } - // write master script to local file - masterScriptPath := path.Join(localTempDir, myTarget.GetName(), masterScriptName) - err = os.WriteFile(masterScriptPath, []byte(masterScript), 0600) - if err != nil { - err = fmt.Errorf("error writing master script to local file: %v", err) - return nil, err - } - // copy master script to target - err = myTarget.PushFile(masterScriptPath, myTarget.GetTempDirectory()) - if err != nil { - err = fmt.Errorf("error copying script to target: %v", err) - return nil, err - } - // run master script on target - // if the master script requires elevated privileges, we run it with sudo - // Note: adding 'sudo' to the individual scripts inside the master script - // instigates a known bug in the terminal that corrupts the tty settings: - // https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1043320 - var cmd *exec.Cmd - if needsElevatedPrivileges && !canElevate { - // this shouldn't happen because we already filtered out the scripts that require elevated privileges if the user cannot elevate privileges on the target - err = fmt.Errorf("master script requires elevated privileges but the user cannot elevate privileges on target") - return nil, err - } else if needsElevatedPrivileges && !myTarget.IsSuperUser() { - // run master script with sudo, "-S" to read password from stdin. Note: password won't be asked for if password-less sudo is configured. - cmd = exec.Command("sudo", "-S", "bash", path.Join(myTarget.GetTempDirectory(), masterScriptName)) // #nosec G204 - } else { - cmd = exec.Command("bash", path.Join(myTarget.GetTempDirectory(), masterScriptName)) // #nosec G204 - } - timeout := 0 // no timeout - // We run parallel_master in a new process group so that tty/terminal signals, e.g., Ctrl-C, are not sent to the command. This is - // necessary to allow the master script to handle signals itself and propagate them to the child scripts as needed. The - // signal handler in perfspect will send the signal to the parallel_master.sh script on each target so that it can clean up - // its child processes. - newProcessGroup := true - reuseSSHConnection := false // don't reuse ssh connection on long-running commands, makes it difficult to kill the command - stdout, stderr, exitcode, err := myTarget.RunCommandEx(cmd, timeout, newProcessGroup, reuseSSHConnection) - if err != nil { - slog.Error("error running master script on target", slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode), slog.String("error", err.Error())) - return nil, err - } - // parse output of master script - parallelScriptOutputs := parseMasterScriptOutput(stdout) - for _, scriptOutput := range parallelScriptOutputs { - // find associated parallel script - scriptIdx := -1 - for i, script := range parallelScripts { - if script.Name == scriptOutput.Name { - scriptIdx = i - break - } - } - scriptOutput.ScriptTemplate = parallelScripts[scriptIdx].ScriptTemplate - scriptOutputs[scriptOutput.Name] = scriptOutput - } + // form a unified controller script that runs all scripts (both concurrent and sequential phases) + controllerScriptName := "controller.sh" + controllerScript, needsElevatedPrivileges, err := formControllerScript(myTarget.GetTempDirectory(), concurrentScripts, sequentialScripts) + if err != nil { + err = fmt.Errorf("error forming controller script: %v", err) + return nil, err } - // run sequential scripts - for _, script := range sequentialScripts { - var cmd *exec.Cmd - scriptPath := path.Join(myTarget.GetTempDirectory(), scriptNameToFilename(script.Name)) - if script.Superuser && !canElevate { - // this shouldn't happen because we already filtered out the scripts that require elevated privileges if the user cannot elevate privileges on the target - err = fmt.Errorf("script requires elevated privileges but the user cannot elevate privileges on target") - return nil, err - } else if script.Superuser && !myTarget.IsSuperUser() { - // run script with sudo, "-S" to read password from stdin. Note: password won't be asked for if password-less sudo is configured. - cmd = exec.Command("sudo", "-S", "bash", scriptPath) // #nosec G204 - } else { - cmd = exec.Command("bash", scriptPath) // #nosec G204 - } - // if the script is tagged with NeedsKill, we run it in a new process group so that tty/terminal signals, e.g., Ctrl-C, are not sent to the command. This is - // necessary to allow the script to handle signals itself and clean up as needed. The - // signal handler in perfspect will send the signal to the script on each target so that it can clean up - // as needed. - newProcessGroup := script.NeedsKill - reuseSSHConnection := false // don't reuse ssh connection on long-running commands, makes it difficult to kill the command - stdout, stderr, exitcode, err := myTarget.RunCommandEx(cmd, 0, newProcessGroup, reuseSSHConnection) - if err != nil { - slog.Warn("error running script on target", slog.String("name", script.Name), slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode), slog.String("error", err.Error())) - } - scriptOutputs[script.Name] = ScriptOutput{ScriptDefinition: script, Stdout: stdout, Stderr: stderr, Exitcode: exitcode} - if !ignoreScriptErrors { - return scriptOutputs, err + // write controller script to local file + controllerScriptPath := path.Join(localTempDir, myTarget.GetName(), controllerScriptName) + err = os.WriteFile(controllerScriptPath, []byte(controllerScript), 0600) + if err != nil { + err = fmt.Errorf("error writing controller script to local file: %v", err) + return nil, err + } + // copy controller script to target + err = myTarget.PushFile(controllerScriptPath, myTarget.GetTempDirectory()) + if err != nil { + err = fmt.Errorf("error copying script to target: %v", err) + return nil, err + } + // run controller script on target + // if the controller script requires elevated privileges, we run it with sudo + // Note: adding 'sudo' to the individual scripts inside the controller script + // instigates a known bug in the terminal that corrupts the tty settings: + // https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1043320 + var cmd *exec.Cmd + if needsElevatedPrivileges && !canElevate { + // this shouldn't happen because we already filtered out the scripts that require elevated privileges if the user cannot elevate privileges on the target + err = fmt.Errorf("controller script requires elevated privileges but the user cannot elevate privileges on target") + return nil, err + } else if needsElevatedPrivileges && !myTarget.IsSuperUser() { + // run controller script with sudo, "-S" to read password from stdin. Note: password won't be asked for if password-less sudo is configured. + cmd = exec.Command("sudo", "-S", "bash", path.Join(myTarget.GetTempDirectory(), controllerScriptName)) // #nosec G204 + } else { + cmd = exec.Command("bash", path.Join(myTarget.GetTempDirectory(), controllerScriptName)) // #nosec G204 + } + timeout := 0 // no timeout + // We run controller in a new process group so that tty/terminal signals, e.g., Ctrl-C, are not sent to the command. This is + // necessary to allow the controller script to handle signals itself and propagate them to all child scripts as needed. The + // signal handler in perfspect will send the signal to the controller.sh script on each target so that it can clean up + // its child processes. + newProcessGroup := true + reuseSSHConnection := false // don't reuse ssh connection on long-running commands, makes it difficult to kill the command + stdout, stderr, exitcode, err := myTarget.RunCommandEx(cmd, timeout, newProcessGroup, reuseSSHConnection) + if err != nil { + slog.Error("error running controller script on target", slog.String("stdout", stdout), slog.String("stderr", stderr), slog.Int("exitcode", exitcode), slog.String("error", err.Error())) + return nil, err + } + // parse output of controller script + allScriptOutputs := parseControllerScriptOutput(stdout) + for _, scriptOutput := range allScriptOutputs { + // find associated script (concurrent or sequential) + for _, script := range append(concurrentScripts, sequentialScripts...) { + if script.Name == scriptOutput.Name { + scriptOutput.ScriptTemplate = script.ScriptTemplate + scriptOutputs[scriptOutput.Name] = scriptOutput + break + } } - err = nil } return scriptOutputs, nil } @@ -229,31 +190,49 @@ func scriptNameToFilename(name string) string { return sanitizeScriptName(name) + ".sh" } -// formMasterScript forms a master script that runs all parallel scripts in the background, waits for them to finish, then prints the output of each script. -// Return values are the master script and a boolean indicating whether the master script requires elevated privileges. -func formMasterScript(targetTempDirectory string, parallelScripts []ScriptDefinition) (string, bool, error) { - // data model for template +// formControllerScript forms a controller script that runs all scripts in two phases: +// first all concurrent scripts together in the background, then all sequential scripts one-by-one. +// It handles signals and cleanup for both phases uniformly. +// Return values are the controller script and a boolean indicating whether the controller script requires elevated privileges. +func formControllerScript(targetTempDirectory string, concurrentScripts []ScriptDefinition, sequentialScripts []ScriptDefinition) (string, bool, error) { + // tplScript holds the minimal per-script fields passed into the + // template that renders the shell controller script. + // Primarily carries the sanitized script name used for filenames and + // template keys (e.g., ${s}.sh, ${s}.stdout, pids[$s]), while the original + // Name is kept for readable summary output. type tplScript struct { Name string Sanitized string - NeedsKill bool - Superuser bool } - data := struct { - TargetTempDir string - Scripts []tplScript + // tplData holds all data passed into the controller script template. + tplData := struct { + TargetTempDir string + ControllerPIDFile string + ConcurrentScripts []tplScript + SequentialScripts []tplScript }{} - data.TargetTempDir = targetTempDirectory + // populate tplData + tplData.TargetTempDir = targetTempDirectory + tplData.ControllerPIDFile = ControllerPIDFileName needsElevated := false - for _, s := range parallelScripts { + for _, s := range concurrentScripts { if s.Superuser { needsElevated = true } - data.Scripts = append(data.Scripts, tplScript{ - Name: s.Name, Sanitized: sanitizeScriptName(s.Name), NeedsKill: s.NeedsKill, Superuser: s.Superuser, + tplData.ConcurrentScripts = append(tplData.ConcurrentScripts, tplScript{ + Name: s.Name, Sanitized: sanitizeScriptName(s.Name), }) } - const masterScriptTemplate = `#!/usr/bin/env bash + for _, s := range sequentialScripts { + if s.Superuser { + needsElevated = true + } + tplData.SequentialScripts = append(tplData.SequentialScripts, tplScript{ + Name: s.Name, Sanitized: sanitizeScriptName(s.Name), + }) + } + // define controller script template + const controllerScriptTemplate = `#!/usr/bin/env bash set -o errexit set -o pipefail @@ -261,54 +240,73 @@ script_dir={{.TargetTempDir}} cd "$script_dir" # write our pid to a file so that perfspect can send us a signal if needed -echo $$ > primary_collection_script.pid +echo $$ > {{.ControllerPIDFile}} -declare -a scripts=() -declare -A needs_kill=() +declare -a concurrent_scripts=() +declare -a sequential_scripts=() declare -A pids=() declare -A exitcodes=() declare -A orig_names=() +current_seq_pid="" -{{- range .Scripts}} -scripts+=({{ .Sanitized }}) -needs_kill[{{ .Sanitized }}]={{ if .NeedsKill }}1{{ else }}0{{ end }} +ensure_trailing_newline() { + local f="$1" + if [ ! -f "$f" ]; then return; fi + cat "$f" || true + if [ -s "$f" ]; then + if [ "$(tail -c 1 "$f" 2>/dev/null | wc -l)" -eq 0 ]; then echo; fi + fi +} + +{{- range .ConcurrentScripts}} +concurrent_scripts+=({{ .Sanitized }}) +orig_names[{{ .Sanitized }}]="{{ .Name }}" +{{ end }} +{{- range .SequentialScripts}} +sequential_scripts+=({{ .Sanitized }}) orig_names[{{ .Sanitized }}]="{{ .Name }}" {{ end }} -start_scripts() { - for s in "${scripts[@]}"; do - bash "$script_dir/${s}.sh" > "$script_dir/${s}.stdout" 2> "$script_dir/${s}.stderr" & +start_concurrent_scripts() { + for s in "${concurrent_scripts[@]}"; do + setsid bash "$script_dir/${s}.sh" > "$script_dir/${s}.stdout" 2> "$script_dir/${s}.stderr" & pids[$s]=$! done } +run_sequential_scripts() { + for s in "${sequential_scripts[@]}"; do + current_seq_pid="" + setsid bash "$script_dir/${s}.sh" > "$script_dir/${s}.stdout" 2> "$script_dir/${s}.stderr" & + current_seq_pid=$! + pids[$s]=$current_seq_pid + if wait "$current_seq_pid"; then + exitcodes[$s]=0 + else + ec=$? + exitcodes[$s]=$ec + fi + current_seq_pid="" + done +} + kill_script() { local s="$1" local pid="${pids[$s]:-}" [[ -z "$pid" ]] && return 0 if ! ps -p "$pid" > /dev/null 2>&1; then return 0; fi - if [[ "${needs_kill[$s]}" == "1" && -f "${s}_cmd.pid" ]]; then - local bgpid - bgpid="$(cat "${s}_cmd.pid" 2>/dev/null || true)" - if [[ -n "$bgpid" && $(ps -p "$bgpid" -o pid= 2>/dev/null) ]]; then - kill -SIGINT "$bgpid" 2>/dev/null || true - sleep 0.5 - if ps -p "$bgpid" > /dev/null 2>&1; then - kill -SIGKILL "$bgpid" 2>/dev/null || true - exitcodes[$s]=137 - else - exitcodes[$s]=0 - fi - fi - else - kill -SIGINT "$pid" 2>/dev/null || true - wait "$pid" 2>/dev/null || true - if [[ -z "${exitcodes[$s]:-}" ]]; then exitcodes[$s]=130; fi - fi + # Send signal to the process group (negative PID) + kill -SIGINT -"$pid" 2>/dev/null || true + # Give it a moment to clean up + sleep 0.1 + # Force kill the process group if still alive + kill -SIGKILL -"$pid" 2>/dev/null || true + wait "$pid" 2>/dev/null || true + if [[ -z "${exitcodes[$s]:-}" ]]; then exitcodes[$s]=130; fi } -wait_for_scripts() { - for s in "${scripts[@]}"; do +wait_for_concurrent_scripts() { + for s in "${concurrent_scripts[@]}"; do if wait "${pids[$s]}"; then exitcodes[$s]=0 else @@ -319,50 +317,63 @@ wait_for_scripts() { } print_summary() { - for s in "${scripts[@]}"; do + local all_scripts=("${concurrent_scripts[@]}" "${sequential_scripts[@]}") + for s in "${all_scripts[@]}"; do echo "<---------------------->" echo "SCRIPT NAME: ${orig_names[$s]}" - echo "STDOUT:"; cat "$script_dir/${s}.stdout" || true - echo "STDERR:"; cat "$script_dir/${s}.stderr" || true + echo "STDOUT:"; ensure_trailing_newline "$script_dir/${s}.stdout" + echo "STDERR:"; ensure_trailing_newline "$script_dir/${s}.stderr" echo "EXIT CODE: ${exitcodes[$s]:-1}" done } handle_sigint() { echo "Received SIGINT; attempting graceful shutdown" >&2 - for s in "${scripts[@]}"; do + # kill all running concurrent scripts + for s in "${concurrent_scripts[@]}"; do kill_script "$s" done + # kill current sequential script if running + if [[ -n "$current_seq_pid" ]]; then + kill -SIGINT -"$current_seq_pid" 2>/dev/null || true + sleep 0.1 + kill -SIGKILL -"$current_seq_pid" 2>/dev/null || true + wait "$current_seq_pid" 2>/dev/null || true + fi print_summary - rm -f primary_collection_script.pid + rm -f {{.ControllerPIDFile}} exit 0 } trap handle_sigint SIGINT -start_scripts -wait_for_scripts +# run concurrent scripts first +start_concurrent_scripts +wait_for_concurrent_scripts +# then run sequential scripts +run_sequential_scripts print_summary -rm -f primary_collection_script.pid +rm -f {{.ControllerPIDFile}} ` - tmpl, err := template.New("master").Parse(masterScriptTemplate) + // render controller script template + tmpl, err := template.New("controller").Parse(controllerScriptTemplate) if err != nil { - slog.Error("failed to parse master script template", slog.String("error", err.Error())) + slog.Error("failed to parse controller script template", slog.String("error", err.Error())) return "", needsElevated, err } var out strings.Builder - if err = tmpl.Execute(&out, data); err != nil { - slog.Error("failed to execute master script template", slog.String("error", err.Error())) + if err = tmpl.Execute(&out, tplData); err != nil { + slog.Error("failed to execute controller script template", slog.String("error", err.Error())) return "", needsElevated, err } return out.String(), needsElevated, nil } -// parseMasterScriptOutput parses the output of the master script that runs all parallel scripts in the background. +// parseControllerScriptOutput parses the output of the controller script that runs all scripts (concurrent and sequential). // It returns a list of ScriptOutput objects, one for each script that was run. -func parseMasterScriptOutput(masterScriptOutput string) (scriptOutputs []ScriptOutput) { - // split output of master script into individual script outputs - for output := range strings.SplitSeq(masterScriptOutput, "<---------------------->\n") { +func parseControllerScriptOutput(controllerScriptOutput string) (scriptOutputs []ScriptOutput) { + // split output of controller script into individual script outputs + for output := range strings.SplitSeq(controllerScriptOutput, "<---------------------->\n") { lines := strings.Split(output, "\n") if len(lines) < 4 { // minimum lines for a script output continue diff --git a/internal/script/script_defs.go b/internal/script/script_defs.go index 866c95b1..aa59a005 100644 --- a/internal/script/script_defs.go +++ b/internal/script/script_defs.go @@ -21,7 +21,6 @@ type ScriptDefinition struct { Depends []string // binary dependencies that must be available for the script to run Superuser bool // requires sudo or root Sequential bool // run script sequentially (not at the same time as others) - NeedsKill bool // process/script needs to be killed after run without a duration specified, i.e., it doesn't stop through SIGINT } // script names, these must be unique @@ -1274,14 +1273,11 @@ duration={{.Duration}} if [ $duration -ne 0 ] && [ $interval -ne 0 ]; then count=$((duration / interval)) fi -LC_TIME=C mpstat -u -T -I SCPU -P ALL $interval $count & -echo $! > {{.ScriptName}}_cmd.pid -wait +LC_TIME=C mpstat -u -T -I SCPU -P ALL $interval $count `, Superuser: true, Lkms: []string{}, Depends: []string{"mpstat"}, - NeedsKill: true, }, IostatTelemetryScriptName: { Name: IostatTelemetryScriptName, @@ -1290,14 +1286,11 @@ duration={{.Duration}} if [ $duration -ne 0 ] && [ $interval -ne 0 ]; then count=$((duration / interval)) fi -S_TIME_FORMAT=ISO iostat -d -t $interval $count | sed '/^loop/d' & -echo $! > {{.ScriptName}}_cmd.pid -wait +S_TIME_FORMAT=ISO iostat -d -t $interval $count | sed '/^loop/d' `, Superuser: true, Lkms: []string{}, Depends: []string{"iostat"}, - NeedsKill: true, }, MemoryTelemetryScriptName: { Name: MemoryTelemetryScriptName, @@ -1306,14 +1299,11 @@ duration={{.Duration}} if [ $duration -ne 0 ] && [ $interval -ne 0 ]; then count=$((duration / interval)) fi -LC_TIME=C sar -r $interval $count & -echo $! > {{.ScriptName}}_cmd.pid -wait +LC_TIME=C sar -r $interval $count `, Superuser: true, Lkms: []string{}, Depends: []string{"sar", "sadc"}, - NeedsKill: true, }, NetworkTelemetryScriptName: { Name: NetworkTelemetryScriptName, @@ -1322,14 +1312,11 @@ duration={{.Duration}} if [ $duration -ne 0 ] && [ $interval -ne 0 ]; then count=$((duration / interval)) fi -LC_TIME=C sar -n DEV $interval $count & -echo $! > {{.ScriptName}}_cmd.pid -wait +LC_TIME=C sar -n DEV $interval $count `, Superuser: true, Lkms: []string{}, Depends: []string{"sar", "sadc"}, - NeedsKill: true, }, TurbostatTelemetryScriptName: { Name: TurbostatTelemetryScriptName, @@ -1344,19 +1331,40 @@ else fi echo TIME: $(date +"%H:%M:%S") echo INTERVAL: $interval -turbostat -i $interval $count & -echo $! > {{.ScriptName}}_cmd.pid -wait +turbostat -i $interval $count `, Superuser: true, Lkms: []string{"msr"}, Depends: []string{"turbostat"}, - NeedsKill: true, }, InstructionTelemetryScriptName: { Name: InstructionTelemetryScriptName, ScriptTemplate: `interval={{.Interval}} duration={{.Duration}} +pid={{.InstrMixPID}} + +cleanup_done=0 + +finalize() { + if [ $cleanup_done -eq 1 ]; then + return + fi + cleanup_done=1 + + # Explicitly write to fd 2 to ensure it reaches stderr + echo "Finalizing instruction mix telemetry (pw_pid=$pw_pid)..." 1>&2 + + # kill the processwatch pipeline process group if it is still running + if [ -n "${pw_pid:-}" ] && [ "$pw_pid" -gt 0 ]; then + # Try SIGTERM first + kill -TERM -"$pw_pid" 2>/dev/null || true + sleep 0.1 + # Then SIGKILL if needed + kill -KILL -"$pw_pid" 2>/dev/null || true + fi +} +trap finalize INT TERM EXIT + if [ $duration -ne 0 ] && [ $interval -ne 0 ]; then count=$((duration / interval)) arg_count="-n $count" @@ -1364,13 +1372,13 @@ fi if [ $interval -ne 0 ]; then arg_interval="-i $interval" fi -echo TIME: $(date +"%H:%M:%S") +echo TIME: "$(date +"%H:%M:%S")" echo INTERVAL: $interval # if no PID specified, increase the sampling interval (defaults to 100,000) to reduce overhead -if [ {{.InstrMixPID}} -eq 0 ]; then +if [ $pid -eq 0 ]; then arg_sampling_rate="-s {{.InstrMixFrequency}}" else - arg_pid="-p {{.InstrMixPID}}" + arg_pid="-p $pid" fi # -c: CSV output, -a: all categories, -p: PID, -s: sampling rate, -i: interval, -n: count # example output: @@ -1379,34 +1387,28 @@ fi # 0,2038501,stress-ng-cpu,0.000000,0.000000,0.000000,0.000000 # We only need the header line and the subsequent "ALL" lines (for each interval) # Filter output: keep header (NR==1) and lines where 2nd and 3rd columns are ALL -processwatch -c -a $arg_pid $arg_sampling_rate $arg_interval $arg_count | awk -F',' 'NR==1 || ($2=="ALL" && $3=="ALL")' & -echo $! > {{.ScriptName}}_cmd.pid -wait +( processwatch -c -a "$arg_pid" "$arg_sampling_rate" "$arg_interval" "$arg_count" | awk -F',' 'NR==1 || ($2=="ALL" && $3=="ALL")' ) & +pw_pid=$! +# wait for processwatch subshell to finish. It will finish naturally if a duration is provided, +# otherwise it will be killed in the finalize() function upon receiving a signal. +wait $pw_pid 2>/dev/null || true +finalize `, Superuser: true, Lkms: []string{"msr"}, Depends: []string{"processwatch"}, - NeedsKill: true, }, GaudiTelemetryScriptName: { Name: GaudiTelemetryScriptName, ScriptTemplate: ` if command -v {{.GaudiHlsmiPath}} &> /dev/null; then - {{.GaudiHlsmiPath}} --query-aip=timestamp,name,temperature.aip,module_id,utilization.aip,memory.total,memory.free,memory.used,power.draw --format=csv,nounits -l {{.Interval}} & - echo $! > {{.ScriptName}}_cmd.pid - # if duration is set, sleep for the duration then kill the process - if [ {{.Duration}} -ne 0 ]; then - sleep {{.Duration}} - kill -SIGINT $(cat {{.ScriptName}}_cmd.pid) - fi - wait + {{.GaudiHlsmiPath}} --query-aip=timestamp,name,temperature.aip,module_id,utilization.aip,memory.total,memory.free,memory.used,power.draw --format=csv,nounits -l {{.Interval}} -d {{.Duration}} else echo "hl-smi not found at {{.GaudiHlsmiPath}}" >&2 exit 1 fi `, Superuser: true, - NeedsKill: true, }, PDUTelemetryScriptName: { Name: PDUTelemetryScriptName, @@ -1441,7 +1443,6 @@ for ((i=0; i primary_collection_script.pid - ap_interval=0 if [ "$frequency" -ne 0 ]; then ap_interval=$((1000000000 / frequency)) @@ -1525,7 +1523,7 @@ finalize() { stop_profiling collapse_perf_data print_results - rm -f primary_collection_script.pid + rm -f controller.pid exit 0 } @@ -1600,7 +1598,6 @@ wait Superuser: true, Sequential: true, Depends: []string{"async-profiler", "perf", "stackcollapse-perf"}, - NeedsKill: true, }, // lock analysis scripts ProfileKernelLockScriptName: { diff --git a/internal/script/script_test.go b/internal/script/script_test.go index e280359d..4e2477be 100644 --- a/internal/script/script_test.go +++ b/internal/script/script_test.go @@ -219,7 +219,7 @@ func TestFormMasterScriptTemplateStructure(t *testing.T) { {Name: "alpha script", Superuser: false}, {Name: "beta-script", Superuser: true}, } - master, elevated, err := formMasterScript("/tmp/targetdir", scripts) + master, elevated, err := formControllerScript("/tmp/targetdir", scripts, nil) if err != nil { t.Fatalf("error forming master script: %v", err) } @@ -231,7 +231,7 @@ func TestFormMasterScriptTemplateStructure(t *testing.T) { t.Errorf("master script missing shebang") } // Functions present - for _, fn := range []string{"start_scripts()", "kill_script()", "wait_for_scripts()", "print_summary()", "handle_sigint()"} { + for _, fn := range []string{"start_concurrent_scripts()", "run_sequential_scripts()", "kill_script()", "wait_for_concurrent_scripts()", "print_summary()", "handle_sigint()"} { if !strings.Contains(master, fn) { t.Errorf("expected function %s in master script", fn) } @@ -254,7 +254,7 @@ func TestFormMasterScriptTemplateStructure(t *testing.T) { func TestFormMasterScriptNeedsElevatedFlag(t *testing.T) { scripts := []ScriptDefinition{{Name: "user", Superuser: false}, {Name: "also user", Superuser: false}} - _, elevated, err := formMasterScript("/tmp/dir", scripts) + _, elevated, err := formControllerScript("/tmp/dir", scripts, nil) if err != nil { t.Fatalf("error forming master script: %v", err) } @@ -264,7 +264,7 @@ func TestFormMasterScriptNeedsElevatedFlag(t *testing.T) { } func TestFormMasterScriptEmptyScripts(t *testing.T) { - master, elevated, err := formMasterScript("/tmp/dir", nil) + master, elevated, err := formControllerScript("/tmp/dir", nil, nil) if err != nil { t.Fatalf("error forming master script: %v", err) } @@ -272,7 +272,7 @@ func TestFormMasterScriptEmptyScripts(t *testing.T) { t.Fatalf("expected elevated=false with empty slice") } // Should still contain core function definitions even if no scripts. - if !strings.Contains(master, "start_scripts()") || !strings.Contains(master, "print_summary()") { + if !strings.Contains(master, "start_concurrent_scripts()") || !strings.Contains(master, "print_summary()") { t.Errorf("template missing expected functions for empty slice") } t.Logf("MASTER SCRIPT EMPTY:\n%s", master) @@ -291,7 +291,7 @@ func TestFormMasterScriptExecutionIntegration(t *testing.T) { // Integration test: create temp directory, stub two child scripts, run master script, parse output. tmp := t.TempDir() scripts := []ScriptDefinition{{Name: "alpha script"}, {Name: "beta-script"}} - master, elevated, err := formMasterScript(tmp, scripts) + master, elevated, err := formControllerScript(tmp, scripts, nil) if err != nil { t.Fatalf("error forming master script: %v", err) } @@ -308,7 +308,7 @@ func TestFormMasterScriptExecutionIntegration(t *testing.T) { } } // Write master script. - masterPath := filepath.Join(tmp, "parallel_master.sh") + masterPath := filepath.Join(tmp, "concurrent_master.sh") if err := os.WriteFile(masterPath, []byte(master), 0o700); err != nil { t.Fatalf("failed writing master script: %v", err) } @@ -319,7 +319,7 @@ func TestFormMasterScriptExecutionIntegration(t *testing.T) { content, _ := os.ReadFile(masterPath) t.Fatalf("error executing master script: %v\nstdout+stderr: %s\nMASTER SCRIPT:\n%s", err, out, string(content)) } - parsed := parseMasterScriptOutput(out) + parsed := parseControllerScriptOutput(out) if len(parsed) != 2 { t.Fatalf("expected 2 parsed script outputs, got %d", len(parsed)) } From 41ac4666dfc058f3875a3f25dcd15b544a6d387c Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Wed, 24 Dec 2025 20:50:13 -0800 Subject: [PATCH 2/4] clean up errors and logs Signed-off-by: Harper, Jason M --- cmd/flamegraph/flamegraph_tables.go | 12 ++++++++++++ cmd/telemetry/telemetry_tables.go | 6 +++++- internal/common/turbostat.go | 28 +++++++++++++--------------- internal/script/script.go | 8 +++++--- 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/cmd/flamegraph/flamegraph_tables.go b/cmd/flamegraph/flamegraph_tables.go index e600bc40..65120108 100644 --- a/cmd/flamegraph/flamegraph_tables.go +++ b/cmd/flamegraph/flamegraph_tables.go @@ -41,6 +41,10 @@ func callStackFrequencyTableValues(outputs map[string]script.ScriptOutput) []tab } func javaFoldedFromOutput(outputs map[string]script.ScriptOutput) string { + if outputs[script.CollapsedCallStacksScriptName].Stdout == "" { + slog.Warn("collapsed call stack output is empty") + return "" + } sections := common.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout) if len(sections) == 0 { slog.Warn("no sections in collapsed call stack output") @@ -79,6 +83,10 @@ func javaFoldedFromOutput(outputs map[string]script.ScriptOutput) string { } func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string { + if outputs[script.CollapsedCallStacksScriptName].Stdout == "" { + slog.Warn("collapsed call stack output is empty") + return "" + } sections := common.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout) if len(sections) == 0 { slog.Warn("no sections in collapsed call stack output") @@ -104,6 +112,10 @@ func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string { } func maxRenderDepthFromOutput(outputs map[string]script.ScriptOutput) string { + if outputs[script.CollapsedCallStacksScriptName].Stdout == "" { + slog.Warn("collapsed call stack output is empty") + return "" + } sections := common.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout) if len(sections) == 0 { slog.Warn("no sections in collapsed call stack output") diff --git a/cmd/telemetry/telemetry_tables.go b/cmd/telemetry/telemetry_tables.go index 3656a36c..9076bc78 100644 --- a/cmd/telemetry/telemetry_tables.go +++ b/cmd/telemetry/telemetry_tables.go @@ -609,9 +609,13 @@ func instructionTelemetryTableValues(outputs map[string]script.ScriptOutput) []t // first two lines are not part of the CSV output, they are the start time and interval var startTime time.Time var interval int + if len(outputs[script.InstructionTelemetryScriptName].Stdout) == 0 { + slog.Warn("instruction mix output is empty") + return []table.Field{} + } lines := strings.Split(outputs[script.InstructionTelemetryScriptName].Stdout, "\n") if len(lines) < 4 { - slog.Warn("no data found in instruction mix output") + slog.Warn("no data lines found in instruction mix output") return []table.Field{} } // TIME diff --git a/internal/common/turbostat.go b/internal/common/turbostat.go index 04b2be90..515a4fb0 100644 --- a/internal/common/turbostat.go +++ b/internal/common/turbostat.go @@ -91,19 +91,18 @@ func parseTurbostatOutput(output string) ([]map[string]string, error) { // The "platform" rows are those where Package, Die, Core, and CPU are all "-". // The first column is the sample time, and the rest are the values for the specified fields. func TurbostatPlatformRows(turboStatScriptOutput string, fieldNames []string) ([][]string, error) { + if turboStatScriptOutput == "" { + return nil, fmt.Errorf("turbostat output is empty") + } if len(fieldNames) == 0 { - err := fmt.Errorf("no field names provided") - slog.Error(err.Error()) - return nil, err + return nil, fmt.Errorf("no field names provided") } rows, err := parseTurbostatOutput(turboStatScriptOutput) if err != nil { - err := fmt.Errorf("unable to parse turbostat output: %w", err) - return nil, err + return nil, fmt.Errorf("unable to parse turbostat output: %w", err) } if len(rows) == 0 { - slog.Warn("no platform rows found in turbostat output") - return nil, nil + return nil, fmt.Errorf("no platform rows found in turbostat output") } // filter the rows to the summary rows only var fieldValues [][]string @@ -145,18 +144,18 @@ func isPlatformRow(row map[string]string) bool { // for each package, for the specified field names. // The first column is the sample time, and the rest are the values for the specified fields. func TurbostatPackageRows(turboStatScriptOutput string, fieldNames []string) ([][][]string, error) { + if turboStatScriptOutput == "" { + return nil, fmt.Errorf("turbostat output is empty") + } if len(fieldNames) == 0 { - err := fmt.Errorf("no field names provided") - return nil, err + return nil, fmt.Errorf("no field names provided") } rows, err := parseTurbostatOutput(turboStatScriptOutput) if err != nil { - err := fmt.Errorf("unable to parse turbostat output: %w", err) - return nil, err + return nil, fmt.Errorf("unable to parse turbostat output: %w", err) } if len(rows) == 0 { - slog.Warn("no package rows found in turbostat output") - return nil, nil + return nil, fmt.Errorf("no package rows found in turbostat output") } var packageRows [][][]string for _, row := range rows { @@ -195,8 +194,7 @@ func TurbostatPackageRows(turboStatScriptOutput string, fieldNames []string) ([] } } if len(packageRows) == 0 { - err := fmt.Errorf("no data found in turbostat output for fields: %s", fieldNames) - return nil, err + return nil, fmt.Errorf("no data found in turbostat output for fields: %s", fieldNames) } return packageRows, nil } diff --git a/internal/script/script.go b/internal/script/script.go index 39767900..237fe482 100644 --- a/internal/script/script.go +++ b/internal/script/script.go @@ -53,7 +53,7 @@ func RunScripts(myTarget target.Target, scripts []ScriptDefinition, ignoreScript var concurrentScripts []ScriptDefinition for _, script := range scripts { if script.Superuser && !canElevate { - slog.Debug("skipping script because it requires superuser privileges and the user cannot elevate privileges on target", slog.String("script", script.Name)) + slog.Warn("skipping script because it requires superuser privileges and the user cannot elevate privileges on target", slog.String("script", script.Name)) continue } if script.Sequential { @@ -62,14 +62,16 @@ func RunScripts(myTarget target.Target, scripts []ScriptDefinition, ignoreScript concurrentScripts = append(concurrentScripts, script) } } + if len(sequentialScripts) == 0 && len(concurrentScripts) == 0 { + return nil, fmt.Errorf("no scripts to run on target") + } // prepare target to run scripts by copying scripts and dependencies to target and installing LKMs if statusUpdate != nil { _ = statusUpdate(myTarget.GetName(), "preparing to collect data") } installedLkms, err := prepareTargetToRunScripts(myTarget, append(sequentialScripts, concurrentScripts...), localTempDir, false) if err != nil { - err = fmt.Errorf("error while preparing target to run scripts: %v", err) - return nil, err + return nil, fmt.Errorf("error while preparing target to run scripts: %v", err) } if len(installedLkms) > 0 { defer func() { From 99345ac20a911068ffa7795b69aeef9ad9f148e0 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Thu, 25 Dec 2025 09:08:25 -0800 Subject: [PATCH 3/4] update turbostat tests to expect errors for empty output scenarios Signed-off-by: Harper, Jason M --- internal/common/turbostat_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/common/turbostat_test.go b/internal/common/turbostat_test.go index eb12ebca..540097f1 100644 --- a/internal/common/turbostat_test.go +++ b/internal/common/turbostat_test.go @@ -98,7 +98,7 @@ func TestTurbostatPlatformRows(t *testing.T) { fieldNames: []string{"Avg_MHz", "Busy%"}, wantFirst: nil, wantLen: 0, - expectErr: false, + expectErr: true, }, { name: "No output", @@ -106,7 +106,7 @@ func TestTurbostatPlatformRows(t *testing.T) { fieldNames: []string{"Avg_MHz", "Busy%"}, wantFirst: nil, wantLen: 0, - expectErr: false, + expectErr: true, }, { name: "Only time and interval, no turbostat data", @@ -114,7 +114,7 @@ func TestTurbostatPlatformRows(t *testing.T) { fieldNames: []string{"Avg_MHz", "Busy%"}, wantFirst: nil, wantLen: 0, - expectErr: false, + expectErr: true, }, } @@ -547,7 +547,7 @@ X 0 0 1000 10 turbostatOutput: "", fieldNames: []string{"Avg_MHz"}, want: nil, - wantErr: false, + wantErr: true, }, { name: "Only headers, no data", @@ -558,7 +558,7 @@ Package Core CPU Avg_MHz Busy% `, fieldNames: []string{"Avg_MHz"}, want: nil, - wantErr: false, + wantErr: true, }, } From c8dac446ad068dc5ec4c31709933c92e5e75745d Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Thu, 25 Dec 2025 09:21:14 -0800 Subject: [PATCH 4/4] no sudo if user is root Signed-off-by: Harper, Jason M --- internal/script/script.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/internal/script/script.go b/internal/script/script.go index 237fe482..8965d317 100644 --- a/internal/script/script.go +++ b/internal/script/script.go @@ -164,14 +164,16 @@ func RunScriptStream(myTarget target.Target, script ScriptDefinition, localTempD } }() } - cmd := prepareCommand(script, myTarget.GetTempDirectory()) + cmd := prepareCommand(script, myTarget) err = myTarget.RunCommandStream(cmd, stdoutChannel, stderrChannel, exitcodeChannel, cmdChannel) errorChannel <- err } -func prepareCommand(script ScriptDefinition, targetTempDirectory string) (cmd *exec.Cmd) { - scriptPath := path.Join(targetTempDirectory, scriptNameToFilename(script.Name)) - if script.Superuser { +// prepareCommand prepares the command to run the specified script on the target. +// If the script requires superuser privileges and the target's user is not already superuser, run with sudo. +func prepareCommand(script ScriptDefinition, myTarget target.Target) (cmd *exec.Cmd) { + scriptPath := path.Join(myTarget.GetTempDirectory(), scriptNameToFilename(script.Name)) + if script.Superuser && !myTarget.IsSuperUser() { cmd = exec.Command("sudo", "bash", scriptPath) // #nosec G204 } else { cmd = exec.Command("bash", scriptPath) // #nosec G204