Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/pins/sss/clevis-decrypt-sss.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ main(int argc, char *argv[])
chldrn.next = pin;

pin->file = call(args, json_string_value(val),
json_string_length(val), &pin->pid);
json_string_length(val), &pin->pid, true);
if (!pin->file)
goto egress;

Expand Down Expand Up @@ -314,7 +314,13 @@ main(int argc, char *argv[])
fclose(pin->file);

if (pin->pid > 0) {
kill(pin->pid, SIGTERM);
/*
* Kill entire process group (negative PID). This ensures
* grandchildren (e.g., curl spawned by clevis-decrypt-tang)
* are terminated. Fall back to direct kill if group doesn't exist.
*/
if (kill(-pin->pid, SIGTERM) < 0)
(void) kill(pin->pid, SIGTERM);
waitpid(pin->pid, NULL, 0);
}

Expand Down
2 changes: 1 addition & 1 deletion src/pins/sss/clevis-encrypt-sss.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ encrypt_frag(json_t *sss, const char *pin, const json_t *cfg, int assume_yes)
if (!pnt)
return NULL;

pipe = call(args, pnt, pntl, &pid);
pipe = call(args, pnt, pntl, &pid, false);
OPENSSL_cleanse(pnt, pntl);
free(pnt);
if (!pipe)
Expand Down
3 changes: 3 additions & 0 deletions src/pins/sss/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ if jansson.found() and libcrypto.found()
env.prepend('PATH',
join_paths(meson.source_root(), 'src'),
join_paths(meson.source_root(), 'src', 'pins', 'tang'),
join_paths(meson.build_root(), 'src', 'pins', 'tang', 'tests'),
meson.current_build_dir(),
'/usr/libexec',
libexecdir,
Expand All @@ -33,6 +34,8 @@ if jansson.found() and libcrypto.found()

test('pin-sss', find_program(join_paths(src, 'pin-sss')), env: env)
test('pin-null', find_program(join_paths(src, 'pin-null')), env: env)

subdir('tests')
else
warning('Will not install sss pin due to missing dependencies!')
endif
32 changes: 30 additions & 2 deletions src/pins/sss/sss.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ enum {
};

FILE *
call(char *const argv[], const void *buf, size_t len, pid_t *pid)
call(char *const argv[], const void *buf, size_t len, pid_t *pid,
bool use_pgrp)
{
int dump[2] = { -1, -1 };
int load[2] = { -1, -1 };
Expand All @@ -360,6 +361,15 @@ call(char *const argv[], const void *buf, size_t len, pid_t *pid)
goto error;

if (*pid == 0) {
/*
* Create a new process group so we can kill all descendants.
* Only do this when use_pgrp is set (i.e., for decryption where
* we may need to kill leftover children). For encryption, this
* can cause issues with terminal job control in child scripts.
*/
if (use_pgrp && setpgid(0, 0) < 0)
exit(EXIT_FAILURE);

if (dup2(dump[PIPE_RD], STDIN_FILENO) < 0 ||
dup2(load[PIPE_WR], STDOUT_FILENO) < 0)
exit(EXIT_FAILURE);
Expand All @@ -368,6 +378,13 @@ call(char *const argv[], const void *buf, size_t len, pid_t *pid)
exit(EXIT_FAILURE);
}

if (use_pgrp) {
/* Also set from parent to eliminate race with child's setpgid().
* This may fail if the child has already called setpgid(0,0) or
* exec'd, which is fine - the important thing is one succeeds. */
(void) setpgid(*pid, *pid);
}

for (const uint8_t *tmp = buf; len > 0; tmp += wr, len -= wr) {
wr = write(dump[PIPE_WR], tmp, len);
if (wr < 0)
Expand All @@ -390,7 +407,18 @@ call(char *const argv[], const void *buf, size_t len, pid_t *pid)
close(load[PIPE_WR]);

if (*pid > 0) {
kill(*pid, SIGTERM);
if (use_pgrp) {
/*
* Kill entire process group (negative PID). This ensures
* grandchildren (e.g., curl spawned by clevis-decrypt-tang)
* are terminated. Fall back to direct kill if group doesn't
* exist.
*/
if (kill(-*pid, SIGTERM) < 0)
(void) kill(*pid, SIGTERM);
} else {
(void) kill(*pid, SIGTERM);
}
waitpid(*pid, NULL, 0);
*pid = 0;
}
Expand Down
4 changes: 3 additions & 1 deletion src/pins/sss/sss.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#pragma once
#include <jansson.h>
#include <stdbool.h>
#include <stdint.h>
#include <sys/types.h>

Expand All @@ -32,4 +33,5 @@ json_t *
sss_recover(const json_t *p, size_t npnts, const uint8_t *pnts[]);

FILE *
call(char *const argv[], const void *buf, size_t len, pid_t *pid);
call(char *const argv[], const void *buf, size_t len, pid_t *pid,
bool use_pgrp);
8 changes: 8 additions & 0 deletions src/pins/sss/tests/meson.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
lsof = find_program('lsof', required: false)
pgrep = find_program('pgrep', required: false)

if lsof.found() and pgrep.found()
test('sss-child-process-cleanup', find_program('sss-child-process-cleanup'), env: env, timeout: 30)
else
warning('Will not run sss-child-process-cleanup test due to missing dependencies (lsof, pgrep)!')
endif
177 changes: 177 additions & 0 deletions src/pins/sss/tests/sss-child-process-cleanup
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#!/bin/bash -e
# vim: set tabstop=8 shiftwidth=4 softtabstop=4 expandtab smarttab colorcolumn=80:
#
# Copyright (c) 2025 Red Hat, Inc.
# Author: Sergio Correia <scorreia@redhat.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Test to verify that clevis-decrypt-sss properly kills all child processes
# and their descendants (grandchildren) after successful decryption.
#
# This test addresses issue #460:
# https://github.com/latchset/clevis/issues/460
#
# The issue: When using SSS with multiple pins (e.g., tang, fido2) and t < n,
# after successful decryption with t pins, the remaining child processes
# (and their grandchildren like curl or fido2-assert) were not being killed.
#
# The fix uses process groups: each child becomes a process group leader
# via setpgid(0,0), and cleanup uses kill(-pid, SIGTERM) to kill the
# entire process group.
#
# Test strategy:
# 1. Start a real tang server that responds quickly
# 2. Start a "hanging" server that accepts connections but never responds
# 3. Encrypt with SSS t=1, two tang pins (real + hanging)
# 4. Decrypt - the real tang succeeds, the hanging one's child should be killed
# 5. Verify no orphaned curl processes remain after decryption

. tang-common-test-functions

TMP="$(mktemp -d)"

# Force exit - kills background jobs first, then exits
force_exit() {
local code="$1"
trap - EXIT # Disable trap to prevent recursion

# Kill all child processes to prevent bash from waiting
# Use both pkill and direct kill to be thorough
pkill -9 -P $$ 2>/dev/null || true
jobs -p 2>/dev/null | xargs -r kill -9 2>/dev/null || true

if [ "${code}" -eq 0 ]; then
# Success - normal exit after killing children
exit 0
else
# Failure - kill entire process group for immediate exit
kill -9 -$$ 2>/dev/null || kill -9 $$ 2>/dev/null || exit "${code}"
fi
}

# No cleanup trap - we'll force kill on exit

# Start a real tang server
tang_run "${TMP}" sig exc
port_real=$(tang_get_port "${TMP}")
url_real="http://localhost:${port_real}"

# Get the advertisement from the real tang server
adv="${TMP}/adv.jws"
tang_get_adv "${port_real}" "${adv}"

# Start a "hanging" server - accepts TCP connections but never sends HTTP response.
# This simulates a tang server that is unreachable or very slow.
# We use socat to listen and spawn a process that just sleeps forever.
# Use disown to prevent bash from waiting for this background job on exit.
"${SOCAT}" TCP4-LISTEN:0,fork EXEC:"sleep 3600" &
hang_pid=$!
disown "${hang_pid}"
echo "${hang_pid}" > "${TMP}/hang.pid"

# Wait for the hanging server to start listening
sleep 0.5
port_hang=$(lsof -nP -a -p "${hang_pid}" -iTCP -sTCP:LISTEN -Fn 2>/dev/null \
| grep '^n.*:' | head -1 | sed 's/.*://')

if [ -z "${port_hang}" ]; then
echo "Failed to start hanging server" >&2
force_exit 1
fi

url_hang="http://localhost:${port_hang}"

echo "Real tang server on port ${port_real}"
echo "Hanging server on port ${port_hang}"

# Encrypt with SSS: t=1, two tang pins
# Both pins use the same advertisement (from the real server) so encryption works.
# During decryption, one will contact the real server (succeeds) and one will
# contact the hanging server (curl hangs waiting for response).
cfg=$(cat <<EOF
{
"t": 1,
"pins": {
"tang": [
{"url": "${url_real}", "adv": "${adv}"},
{"url": "${url_hang}", "adv": "${adv}"}
]
}
}
EOF
)

echo "Encrypting test data..."
enc="$(echo -n "test-cleanup-data" | clevis encrypt sss "${cfg}" -y)"
echo "Encryption successful."

# Record curl processes before decryption
curl_before=$(pgrep -x curl 2>/dev/null | wc -l || echo 0)

echo "Starting decryption... ($(date +%T))"

# Decrypt with a timeout. The real tang should respond quickly, so decryption
# should complete in a few seconds. The hanging server's child process
# (and its curl grandchild) should be killed after decryption succeeds.
#
# Without the fix: curl connecting to the hanging server would remain running
# after clevis-decrypt-sss exits (orphaned to init).
#
# With the fix: the entire process group is killed, including curl.

# Decrypt the data. Without the fix, decryption succeeds but leaves orphaned
# grandchild processes (curl) which we detect below.
dec=""
if ! dec="$(echo -n "${enc}" | clevis decrypt)"; then
echo "FAIL: Decryption failed" >&2
force_exit 1
fi

if [ "${dec}" != "test-cleanup-data" ]; then
echo "FAIL: Decrypted data mismatch: got '${dec}'" >&2
force_exit 1
fi

echo "Decryption successful: '${dec}' ($(date +%T))"

# Give a moment for process cleanup
sleep 0.5

# Check for orphaned curl processes connecting to the hanging server.
# Use lsof to find any process with a connection to port_hang.
# -n: no hostname resolution (faster)
# -P: no port name resolution (faster)
# -t: terse output (just PIDs)
orphaned_connections=$(lsof -nP -i "TCP:${port_hang}" -t 2>/dev/null | grep -v "^${hang_pid}$" || true)

if [ -n "${orphaned_connections}" ]; then
echo "FAIL: Found orphaned processes connecting to hanging server:" >&2
echo "${orphaned_connections}" >&2
force_exit 1
fi

# Also check that we don't have more curl processes than before
# (accounting for possible legitimate curl usage)
curl_after=$(pgrep -x curl 2>/dev/null | wc -l || echo 0)

# We expect curl_after <= curl_before (the curl that was connecting to hang
# server should have been killed)
if [ "${curl_after}" -gt "${curl_before}" ]; then
echo "WARNING: More curl processes after decryption (${curl_after}) than before (${curl_before})" >&2
echo "This might indicate orphaned processes, but checking connections is more reliable." >&2
fi

echo "SUCCESS: No orphaned processes detected after SSS decryption cleanup"
force_exit 0