Skip to content

Commit 0c52eb1

Browse files
committed
print process group extra info
Signed-off-by: reggie-k <regina.voloshin@codefresh.io>
1 parent 400bfbe commit 0c52eb1

File tree

4 files changed

+338
-3
lines changed

4 files changed

+338
-3
lines changed

util/exec/exec.go

Lines changed: 265 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ import (
66
"fmt"
77
"os"
88
"os/exec"
9+
"path/filepath"
910
"strconv"
1011
"strings"
12+
"sync"
1113
"syscall"
1214
"time"
1315
"unicode"
@@ -183,17 +185,97 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) {
183185
args := strings.Join(cmd.Args, " ")
184186
logCtx.WithFields(logrus.Fields{"dir": cmd.Dir}).Info(redactor(args))
185187

188+
// Capture process group information while processes are running
189+
var capturedProcessInfo []string
190+
var capturedProcessMutex sync.Mutex
191+
192+
// Helper: debug whether HEAD.lock exists under the current working directory
193+
logHeadLockStatus := func(where string) {
194+
if cmd.Dir == "" {
195+
return
196+
}
197+
lockPath := filepath.Join(cmd.Dir, ".git", "HEAD.lock")
198+
fileInfo, statErr := os.Stat(lockPath)
199+
exists := statErr == nil
200+
fields := logrus.Fields{
201+
"headLockPath": lockPath,
202+
"headLockExists": exists,
203+
"where": where,
204+
}
205+
if exists {
206+
fields["headLockSize"] = fileInfo.Size()
207+
fields["headLockMode"] = fileInfo.Mode().String()
208+
fields["headLockModTime"] = fileInfo.ModTime()
209+
fields["headLockIsDir"] = fileInfo.IsDir()
210+
}
211+
212+
// Add process group information if the process has started
213+
if cmd.Process != nil {
214+
pgid := cmd.Process.Pid // Process group ID is the same as the main process PID when Setpgid=true
215+
fields["processGroupId"] = pgid
216+
217+
// Try to get current process group info
218+
currentProcesses := getProcessGroupInfo(pgid)
219+
if len(currentProcesses) > 0 && !strings.Contains(currentProcesses[0], "terminated or no processes found") {
220+
fields["processGroupProcesses"] = currentProcesses
221+
// Update captured info if we got fresh data
222+
capturedProcessMutex.Lock()
223+
capturedProcessInfo = currentProcesses
224+
capturedProcessMutex.Unlock()
225+
226+
// Check which processes might be related to the lock file
227+
if exists {
228+
lockDir := filepath.Dir(lockPath)
229+
suspiciousProcesses := findProcessesInDirectory(currentProcesses, lockDir)
230+
if len(suspiciousProcesses) > 0 {
231+
fields["processesInLockDirectory"] = suspiciousProcesses
232+
}
233+
}
234+
} else {
235+
capturedProcessMutex.Lock()
236+
if len(capturedProcessInfo) > 0 {
237+
// Use previously captured info if current query failed
238+
fields["processGroupProcesses"] = capturedProcessInfo
239+
fields["processGroupProcessesNote"] = "captured during execution (process group has since terminated)"
240+
241+
// Check captured processes for lock file relation
242+
if exists {
243+
lockDir := filepath.Dir(lockPath)
244+
suspiciousProcesses := findProcessesInDirectory(capturedProcessInfo, lockDir)
245+
if len(suspiciousProcesses) > 0 {
246+
fields["processesInLockDirectory"] = suspiciousProcesses
247+
fields["processesInLockDirectoryNote"] = "from captured process info"
248+
}
249+
}
250+
} else {
251+
fields["processGroupProcesses"] = currentProcesses
252+
}
253+
capturedProcessMutex.Unlock()
254+
}
255+
}
256+
257+
logCtx.WithFields(fields).Info("HEAD.lock status")
258+
}
259+
186260
var stdout bytes.Buffer
187261
var stderr bytes.Buffer
188262
cmd.Stdout = &stdout
189263
cmd.Stderr = &stderr
190264

265+
// Configure the child to run in its own process group so we can signal the whole group on timeout/cancel.
266+
// On Unix this sets Setpgid; on Windows this is a no-op.
267+
if cmd.SysProcAttr == nil {
268+
cmd.SysProcAttr = newSysProcAttr(true)
269+
}
270+
191271
start := time.Now()
192272
err = cmd.Start()
193273
if err != nil {
194274
return "", err
195275
}
196276

277+
logHeadLockStatus("start-exec")
278+
197279
done := make(chan error)
198280
go func() { done <- cmd.Wait() }()
199281

@@ -228,15 +310,47 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) {
228310
select {
229311
// noinspection ALL
230312
case <-timoutCh:
313+
// Capture process group info RIGHT BEFORE sending timeout signal
314+
if cmd.Process != nil {
315+
pgid := cmd.Process.Pid
316+
preTerminationProcesses := getProcessGroupInfo(pgid)
317+
if len(preTerminationProcesses) > 0 && !strings.Contains(preTerminationProcesses[0], "terminated or no processes found") {
318+
capturedProcessMutex.Lock()
319+
capturedProcessInfo = preTerminationProcesses
320+
capturedProcessMutex.Unlock()
321+
logCtx.WithFields(logrus.Fields{
322+
"processGroupId": pgid,
323+
"processGroupProcesses": preTerminationProcesses,
324+
"capturePoint": "pre-timeout-signal",
325+
}).Info("Process group info captured before timeout signal")
326+
}
327+
}
231328
// send timeout signal
232-
_ = cmd.Process.Signal(timeoutBehavior.Signal)
329+
// signal the process group (negative PID) so children are terminated as well
330+
if cmd.Process != nil {
331+
_ = sysCallSignal(-cmd.Process.Pid, timeoutBehavior.Signal)
332+
}
233333
// wait on timeout signal and fallback to fatal timeout signal
234334
if timeoutBehavior.ShouldWait {
235335
select {
236336
case <-done:
237337
case <-fatalTimeoutCh:
238-
// upgrades to SIGKILL if cmd does not respect SIGTERM
239-
_ = cmd.Process.Signal(fatalTimeoutBehaviour)
338+
// Capture process group info RIGHT BEFORE sending fatal signal
339+
if cmd.Process != nil {
340+
pgid := cmd.Process.Pid
341+
preFatalProcesses := getProcessGroupInfo(pgid)
342+
if len(preFatalProcesses) > 0 && !strings.Contains(preFatalProcesses[0], "terminated or no processes found") {
343+
logCtx.WithFields(logrus.Fields{
344+
"processGroupId": pgid,
345+
"processGroupProcesses": preFatalProcesses,
346+
"capturePoint": "pre-fatal-signal",
347+
}).Info("Process group info captured before fatal signal")
348+
}
349+
}
350+
// upgrades to fatal signal (default SIGKILL) if cmd does not respect the initial signal
351+
if cmd.Process != nil {
352+
_ = sysCallSignal(-cmd.Process.Pid, fatalTimeoutBehaviour)
353+
}
240354
// now original cmd should exit immediately after SIGKILL
241355
<-done
242356
// return error with a marker indicating that cmd exited only after fatal SIGKILL
@@ -245,6 +359,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) {
245359
output += stderr.String()
246360
}
247361
logCtx.WithFields(logrus.Fields{"duration": time.Since(start)}).Debug(redactor(output))
362+
logHeadLockStatus("fatal-timeout")
248363
err = newCmdError(redactor(args), fmt.Errorf("fatal timeout after %v", timeout+fatalTimeout), "")
249364
logCtx.Error(err.Error())
250365
return strings.TrimSuffix(output, "\n"), err
@@ -256,10 +371,23 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) {
256371
output += stderr.String()
257372
}
258373
logCtx.WithFields(logrus.Fields{"duration": time.Since(start)}).Debug(redactor(output))
374+
logHeadLockStatus("timeout")
259375
err = newCmdError(redactor(args), fmt.Errorf("timeout after %v", timeout), "")
260376
logCtx.Error(err.Error())
261377
return strings.TrimSuffix(output, "\n"), err
262378
case err := <-done:
379+
// Capture process group info right when command finishes (might catch lingering processes)
380+
if cmd.Process != nil {
381+
pgid := cmd.Process.Pid
382+
postExitProcesses := getProcessGroupInfo(pgid)
383+
if len(postExitProcesses) > 0 && !strings.Contains(postExitProcesses[0], "terminated or no processes found") {
384+
logCtx.WithFields(logrus.Fields{
385+
"processGroupId": pgid,
386+
"processGroupProcesses": postExitProcesses,
387+
"capturePoint": "post-command-exit",
388+
}).Info("Process group info captured right after command exit")
389+
}
390+
}
263391
if err != nil {
264392
output := stdout.String()
265393
if opts.CaptureStderr {
@@ -270,6 +398,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) {
270398
if !opts.SkipErrorLogging {
271399
logCtx.Error(err.Error())
272400
}
401+
logHeadLockStatus("done-error")
273402
return strings.TrimSuffix(output, "\n"), err
274403
}
275404
}
@@ -278,10 +407,143 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) {
278407
output += stderr.String()
279408
}
280409
logCtx.WithFields(logrus.Fields{"duration": time.Since(start)}).Debug(redactor(output))
410+
logHeadLockStatus("done-success")
281411

282412
return strings.TrimSuffix(output, "\n"), nil
283413
}
284414

285415
func RunCommand(name string, opts CmdOpts, arg ...string) (string, error) {
286416
return RunCommandExt(exec.Command(name, arg...), opts)
287417
}
418+
419+
// getProcessGroupInfo returns information about processes in the given process group
420+
func getProcessGroupInfo(pgid int) []string {
421+
if pgid <= 0 {
422+
return nil
423+
}
424+
425+
// Use ps to get process group information with more details
426+
psCmd := exec.Command("ps", "-o", "pid,ppid,pgid,etime,comm,args", "-g", strconv.Itoa(pgid))
427+
output, err := psCmd.Output()
428+
429+
// ps returns exit status 1 when no processes are found in the process group
430+
// This is normal behavior, not an error condition
431+
if err != nil {
432+
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
433+
// Exit code 1 typically means no processes found, check if we got header output
434+
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
435+
if len(lines) <= 1 {
436+
return []string{"Process group terminated or no processes found"}
437+
}
438+
// Continue processing the output even with exit code 1
439+
} else {
440+
// Other types of errors (command not found, permission denied, etc.)
441+
return []string{fmt.Sprintf("Failed to get process group info: %v", err)}
442+
}
443+
}
444+
445+
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
446+
if len(lines) <= 1 {
447+
return []string{"Process group terminated or no processes found"}
448+
}
449+
450+
// Skip header line and format the output
451+
var processes []string
452+
for i, line := range lines {
453+
if i == 0 {
454+
continue // Skip header
455+
}
456+
line = strings.TrimSpace(line)
457+
if line == "" {
458+
continue
459+
}
460+
461+
pid, ppid, pgidStr, elapsed, comm, args, ok := parsePsLineFivePlus(line)
462+
if ok {
463+
processInfo := fmt.Sprintf("PID=%s PPID=%s PGID=%s ELAPSED=%s COMM=%s ARGS=%s",
464+
pid, ppid, pgidStr, elapsed, comm, args)
465+
466+
// Add working directory information if available
467+
if workDir := getProcessWorkingDir(pid); workDir != "" {
468+
processInfo += fmt.Sprintf(" CWD=%s", workDir)
469+
}
470+
471+
processes = append(processes, processInfo)
472+
} else {
473+
processes = append(processes, fmt.Sprintf("Raw: %s", line))
474+
}
475+
}
476+
477+
if len(processes) == 0 {
478+
return []string{"Process group terminated or no processes found"}
479+
}
480+
481+
return processes
482+
}
483+
484+
// getProcessWorkingDir returns the working directory of a process
485+
func getProcessWorkingDir(pid string) string {
486+
// Try to read the working directory from /proc/PID/cwd (Linux) or use lsof
487+
if cwd, err := os.Readlink(fmt.Sprintf("/proc/%s/cwd", pid)); err == nil {
488+
return cwd
489+
}
490+
491+
// Fallback to lsof on macOS/other systems
492+
lsofCmd := exec.Command("lsof", "-p", pid, "-d", "cwd", "-Fn")
493+
if output, err := lsofCmd.Output(); err == nil {
494+
lines := strings.Split(string(output), "\n")
495+
for _, line := range lines {
496+
if strings.HasPrefix(line, "n") {
497+
return strings.TrimPrefix(line, "n")
498+
}
499+
}
500+
}
501+
502+
return ""
503+
}
504+
505+
// findProcessesInDirectory finds processes that have their working directory in or under the specified directory
506+
func findProcessesInDirectory(processes []string, targetDir string) []string {
507+
var matches []string
508+
for _, process := range processes {
509+
if strings.Contains(process, fmt.Sprintf("CWD=%s", targetDir)) ||
510+
strings.Contains(process, fmt.Sprintf("CWD=%s/", targetDir)) {
511+
matches = append(matches, process)
512+
}
513+
}
514+
return matches
515+
}
516+
517+
// parsePsLineFivePlus splits a ps output line with at least 5 whitespace-separated fields,
518+
// returning PID, PPID, PGID, ELAPSED, COMM, and the remaining ARGS (which may contain spaces).
519+
func parsePsLineFivePlus(line string) (string, string, string, string, string, string, bool) {
520+
// Extract first five fields, rest is ARGS
521+
fields := make([]string, 0, 6)
522+
start := -1
523+
inSpace := true
524+
for i, r := range line {
525+
if r == ' ' || r == '\t' {
526+
if !inSpace {
527+
fields = append(fields, line[start:i])
528+
if len(fields) == 5 {
529+
rest := strings.TrimLeft(line[i+1:], " \t")
530+
fields = append(fields, rest)
531+
break
532+
}
533+
}
534+
inSpace = true
535+
} else {
536+
if inSpace {
537+
start = i
538+
}
539+
inSpace = false
540+
}
541+
}
542+
if !inSpace && len(fields) < 5 && start >= 0 {
543+
fields = append(fields, line[start:])
544+
}
545+
if len(fields) < 6 { // need at least PID, PPID, PGID, ELAPSED, COMM, ARGS
546+
return "", "", "", "", "", "", false
547+
}
548+
return fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], true
549+
}

0 commit comments

Comments
 (0)