@@ -6,10 +6,12 @@ package common
66// SPDX-License-Identifier: BSD-3-Clause
77
88import (
9+ "context"
910 "errors"
1011 "fmt"
1112 "log/slog"
1213 "os"
14+ "os/exec"
1315 "os/signal"
1416 "path/filepath"
1517 "perfspect/internal/progress"
@@ -20,6 +22,7 @@ import (
2022 "perfspect/internal/util"
2123 "strings"
2224 "syscall"
25+ "time"
2326
2427 "slices"
2528
@@ -118,22 +121,6 @@ func (rc *ReportingCommand) Run() error {
118121 localTempDir := appContext .LocalTempDir
119122 outputDir := appContext .OutputDir
120123 logFilePath := appContext .LogFilePath
121- // handle signals
122- // child processes will exit when the signals are received which will
123- // allow this app to exit normally
124- sigChannel := make (chan os.Signal , 1 )
125- signal .Notify (sigChannel , syscall .SIGINT , syscall .SIGTERM )
126- go func () {
127- sig := <- sigChannel
128- slog .Info ("received signal" , slog .String ("signal" , sig .String ()))
129- // when perfspect receives ctrl-c while in the shell, the shell makes sure to propogate the
130- // signal to all our children. But when perfspect is run in the background or disowned and
131- // then receives SIGINT, e.g., from a script, we need to send the signal to our children
132- err := util .SignalChildren (syscall .SIGINT )
133- if err != nil {
134- slog .Error ("error sending signal to children" , slog .String ("error" , err .Error ()))
135- }
136- }()
137124 // create output directory
138125 err := util .CreateDirectoryIfNotExists (outputDir , 0755 ) // #nosec G301
139126 if err != nil {
@@ -144,8 +131,8 @@ func (rc *ReportingCommand) Run() error {
144131 return err
145132 }
146133
147- var orderedTargetScriptOutputs []TargetScriptOutputs
148134 var myTargets []target.Target
135+ var orderedTargetScriptOutputs []TargetScriptOutputs
149136 if FlagInput != "" {
150137 var err error
151138 orderedTargetScriptOutputs , err = outputsFromInput (rc .Tables , rc .SummaryTableName )
@@ -203,6 +190,8 @@ func (rc *ReportingCommand) Run() error {
203190 for i := len (indicesToRemove ) - 1 ; i >= 0 ; i -- {
204191 myTargets = slices .Delete (myTargets , indicesToRemove [i ], indicesToRemove [i ]+ 1 )
205192 }
193+ // set up signal handler to help with cleaning up child processes on ctrl-c/SIGINT or SIGTERM
194+ configureSignalHandler (myTargets , multiSpinner .Status )
206195 // collect data from targets
207196 orderedTargetScriptOutputs , err = outputsFromTargets (rc .Cmd , myTargets , rc .Tables , rc .ScriptParams , multiSpinner .Status , localTempDir )
208197 if err != nil {
@@ -299,6 +288,94 @@ func (rc *ReportingCommand) Run() error {
299288 return nil
300289}
301290
291+ // configureSignalHandler sets up a signal handler to catch SIGINT and SIGTERM
292+ //
293+ // When perfspect receives ctrl-c while in the shell, the shell propagates the
294+ // signal to all our children. But when perfspect is run in the background or disowned and
295+ // then receives SIGINT, e.g., from a script, we need to send the signal to our children
296+ //
297+ // Also, when running scripts in parallel using the parallel_master.sh script, we need to
298+ // send the signal to the parallel_master.sh script on each target so that it can clean up
299+ // its child processes. This is because the parallel_master.sh script is run in its own process group
300+ // and does not receive the signal when perfspect receives it.
301+ //
302+ // Parameters:
303+ // - myTargets: The list of targets to send the signal to.
304+ // - statusFunc: A function to update the status of the progress indicator.
305+ func configureSignalHandler (myTargets []target.Target , statusFunc progress.MultiSpinnerUpdateFunc ) {
306+ sigChannel := make (chan os.Signal , 1 )
307+ signal .Notify (sigChannel , syscall .SIGINT , syscall .SIGTERM )
308+ go func () {
309+ sig := <- sigChannel
310+ slog .Debug ("received signal" , slog .String ("signal" , sig .String ()))
311+ // Scripts that are run in parallel using the parallel_master.sh script and a few other sequential scripts need to be handled specially
312+ // because they are run in their own process group, we need to send the signal directly to the PID of the script.
313+ // For every target, look for the primary_collection_script PID file and send SIGINT to it.
314+ for _ , t := range myTargets {
315+ if statusFunc != nil {
316+ _ = statusFunc (t .GetName (), "Signal received, cleaning up..." )
317+ }
318+ pidFilePath := filepath .Join (t .GetTempDirectory (), "primary_collection_script.pid" )
319+ stdout , _ , exitcode , err := t .RunCommandEx (exec .Command ("cat" , pidFilePath ), 5 , false , true ) // #nosec G204
320+ if err != nil {
321+ slog .Error ("error retrieving target primary_collection_script PID" , slog .String ("target" , t .GetName ()), slog .String ("error" , err .Error ()))
322+ }
323+ if exitcode == 0 {
324+ pidStr := strings .TrimSpace (stdout )
325+ _ , _ , _ , err := t .RunCommandEx (exec .Command ("sudo" , "kill" , "-SIGINT" , pidStr ), 5 , false , true ) // #nosec G204
326+ if err != nil {
327+ slog .Error ("error sending signal to target primary_collection_script" , slog .String ("target" , t .GetName ()), slog .String ("error" , err .Error ()))
328+ }
329+ }
330+ }
331+ // now wait until all primary collection scripts have exited
332+ slog .Debug ("waiting for primary_collection_script scripts to exit" )
333+ for _ , t := range myTargets {
334+ // create a per-target timeout context
335+ targetTimeout := 10 * time .Second
336+ ctx , cancel := context .WithTimeout (context .Background (), targetTimeout )
337+ timedOut := false
338+ pidFilePath := filepath .Join (t .GetTempDirectory (), "primary_collection_script.pid" )
339+ for {
340+ // check for timeout
341+ select {
342+ case <- ctx .Done ():
343+ if statusFunc != nil {
344+ _ = statusFunc (t .GetName (), "cleanup timeout exceeded" )
345+ }
346+ slog .Warn ("signal handler cleanup timeout exceeded for target" , slog .String ("target" , t .GetName ()))
347+ timedOut = true
348+ default :
349+ }
350+ if timedOut {
351+ break
352+ }
353+ // read the pid file
354+ stdout , _ , exitcode , err := t .RunCommandEx (exec .Command ("cat" , pidFilePath ), 5 , false , true ) // #nosec G204
355+ if err != nil || exitcode != 0 {
356+ // pid file doesn't exist
357+ break
358+ }
359+ pidStr := strings .TrimSpace (stdout )
360+ // determine if the process still exists
361+ _ , _ , exitcode , err = t .RunCommandEx (exec .Command ("ps" , "-p" , pidStr ), 5 , false , true ) // #nosec G204
362+ if err != nil || exitcode != 0 {
363+ break // process no longer exists, script has exited
364+ }
365+ // sleep for a short time before checking again
366+ time .Sleep (500 * time .Millisecond )
367+ }
368+ cancel ()
369+ }
370+
371+ // send SIGINT to perfspect's children
372+ err := util .SignalChildren (syscall .SIGINT )
373+ if err != nil {
374+ slog .Error ("error sending signal to children" , slog .String ("error" , err .Error ()))
375+ }
376+ }()
377+ }
378+
302379// DefaultInsightsFunc returns the insights table values from the table values
303380func DefaultInsightsFunc (allTableValues []table.TableValues , scriptOutputs map [string ]script.ScriptOutput ) table.TableValues {
304381 insightsTableValues := table.TableValues {
@@ -554,7 +631,8 @@ func outputsFromTargets(cmd *cobra.Command, myTargets []target.Target, tables []
554631 scriptsToRunOnTarget = append (scriptsToRunOnTarget , script )
555632 }
556633 // run the selected scripts on the target
557- go collectOnTarget (target , scriptsToRunOnTarget , localTempDir , scriptParams ["Duration" ], cmd .Name () == "telemetry" , channelTargetScriptOutputs , channelError , statusUpdate )
634+ ctrlCToStop := cmd .Name () == "telemetry" || cmd .Name () == "flamegraph"
635+ go collectOnTarget (target , scriptsToRunOnTarget , localTempDir , scriptParams ["Duration" ], ctrlCToStop , channelTargetScriptOutputs , channelError , statusUpdate )
558636 }
559637 // wait for scripts to run on all targets
560638 var allTargetScriptOutputs []TargetScriptOutputs
@@ -631,10 +709,10 @@ func elevatedPrivilegesRequired(tables []table.TableDefinition) bool {
631709}
632710
633711// collectOnTarget runs the scripts on the target and sends the results to the appropriate channels
634- func collectOnTarget (myTarget target.Target , scriptsToRun []script.ScriptDefinition , localTempDir string , duration string , isTelemetry bool , channelTargetScriptOutputs chan TargetScriptOutputs , channelError chan error , statusUpdate progress.MultiSpinnerUpdateFunc ) {
712+ func collectOnTarget (myTarget target.Target , scriptsToRun []script.ScriptDefinition , localTempDir string , duration string , ctrlCToStop bool , channelTargetScriptOutputs chan TargetScriptOutputs , channelError chan error , statusUpdate progress.MultiSpinnerUpdateFunc ) {
635713 // run the scripts on the target
636714 status := "collecting data"
637- if isTelemetry && duration == "0" { // telemetry is the only command that uses this common code that can run indefinitely
715+ if ctrlCToStop && duration == "0" {
638716 status += ", press Ctrl+c to stop"
639717 } else if duration != "0" && duration != "" {
640718 status += fmt .Sprintf (" for %s seconds" , duration )
0 commit comments