From e25a758cd1f543193383f3230548acfbb4c12ddc Mon Sep 17 00:00:00 2001 From: Zhenya Tikhonov Date: Thu, 29 Jan 2026 21:48:52 +0400 Subject: [PATCH 1/3] fix: start monitor in `init` cgroup to avoid race condition --- run.sh | 11 +++++++---- service.yaml | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/run.sh b/run.sh index 0596903..232925f 100755 --- a/run.sh +++ b/run.sh @@ -133,10 +133,6 @@ if [[ -n "${USE_DIND_IMAGES_LIB}" && "${USE_DIND_IMAGES_LIB}" != "false" ]]; the fi echo "DOCKERD_PARAMS = ${DOCKERD_PARAMS}" -# Starting monitor -${DIR}/monitor/start.sh <&- & -MONITOR_PID=$! - ### start docker with retry DOCKERD_PID_FILE=/var/run/docker.pid DOCKERD_PID_MAXWAIT=${DOCKERD_PID_MAXWAIT:-20} @@ -200,8 +196,11 @@ do # otherwise the current group will become of type "domain threaded", # and it will not be possible to enable required controllers for DinD group. # Ref: https://github.com/moby/moby/blob/38805f20f9bcc5e87869d6c79d432b166e1c88b4/hack/dind#L28-L38 + echo "Creating init cgroup ${CURRENT_CGROUP_PATH}/init" mkdir -p ${CURRENT_CGROUP_PATH}/init + echo "Moving existing processes from ${CURRENT_CGROUP_PATH} to ${CURRENT_CGROUP_PATH}/init" xargs -rn1 < ${CURRENT_CGROUP_PATH}/cgroup.procs > ${CURRENT_CGROUP_PATH}/init/cgroup.procs || : + echo "Done moving existing processes from ${CURRENT_CGROUP_PATH} to ${CURRENT_CGROUP_PATH}/init" # Set `memory.oom.group=0` to disable killing all processes in cgroup at once on OOM. # if all processes are killed at once, the system will not be able to detect this event; @@ -246,6 +245,10 @@ do break done +# Starting monitor +${DIR}/monitor/start.sh <&- & +MONITOR_PID=$! + # Starting cleaner agent if [[ -z "${DISABLE_CLEANER_AGENT}" && -z "${SIGTERM}" ]]; then ${DIR}/cleaner/cleaner-agent.sh <&- & diff --git a/service.yaml b/service.yaml index 9d83b6b..99da8d1 100644 --- a/service.yaml +++ b/service.yaml @@ -1 +1 @@ -version: 3.0.10 +version: 3.0.11 From d875e2de14bac10c22fd4507542a7abaf4590bbc Mon Sep 17 00:00:00 2001 From: Zhenya Tikhonov Date: Fri, 30 Jan 2026 12:44:02 +0400 Subject: [PATCH 2/3] refactor: move cgroup manipulation to the top --- run.sh | 56 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/run.sh b/run.sh index 232925f..9bd377d 100755 --- a/run.sh +++ b/run.sh @@ -1,8 +1,38 @@ #!/bin/bash +echo "Entering $0 at $(date) " + +# It is required to keep this block at the top of the script! +# All processes started by this script must be moved to `./init` group on cgroup v2. +if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then + echo "Using cgroup v1" +else + echo "Using cgroup v2" + CURRENT_CGROUP=$(cat /proc/self/cgroup | sed 's/0:://') + CURRENT_CGROUP_PATH="/sys/fs/cgroup/${CURRENT_CGROUP}" + echo "Current cgroup: ${CURRENT_CGROUP}" + + # Move the processes from the current group to the `./init` group, + # otherwise the current group will become of type "domain threaded", + # and it will not be possible to enable required controllers for DinD group. + # Ref: https://github.com/moby/moby/blob/38805f20f9bcc5e87869d6c79d432b166e1c88b4/hack/dind#L28-L38 + echo "Creating init cgroup ${CURRENT_CGROUP_PATH}/init" + mkdir -p ${CURRENT_CGROUP_PATH}/init + echo "Moving existing processes from ${CURRENT_CGROUP_PATH} to ${CURRENT_CGROUP_PATH}/init" + xargs -rn1 < ${CURRENT_CGROUP_PATH}/cgroup.procs > ${CURRENT_CGROUP_PATH}/init/cgroup.procs || : + echo "Done moving existing processes from ${CURRENT_CGROUP_PATH} to ${CURRENT_CGROUP_PATH}/init" + + # Set `memory.oom.group=0` to disable killing all processes in cgroup at once on OOM. + # if all processes are killed at once, the system will not be able to detect this event; + # instead, we expect separate pipeline steps to be killed if total consumptions exceed limits. + MEMORY_OOM_GROUP="${CURRENT_CGROUP_PATH}/memory.oom.group" + echo "Ensuring memory.oom.group is set to 0 to disable killing all processes in cgroup at once on OOM" + echo "0" > "${MEMORY_OOM_GROUP}" + echo "Current memory.oom.group value: $(cat "${MEMORY_OOM_GROUP}")" +fi + DIR=$(dirname $0) -echo "Entering $0 at $(date) " DOCKERD_DATA_ROOT=${DOCKERD_DATA_ROOT:-/var/lib/docker} DIND_VOLUME_STAT_DIR=${DIND_VOLUME_STAT_DIR:-${DOCKERD_DATA_ROOT}/dind-volume} DIND_VOLUME_CREATED_TS_FILE=${DIND_VOLUME_STAT_DIR}/created @@ -184,32 +214,8 @@ do echo "Starting dockerd" if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then - echo "Using cgroup v1" dockerd ${DOCKERD_PARAMS} <&- & else - echo "Using cgroup v2" - CURRENT_CGROUP=$(cat /proc/self/cgroup | sed 's/0:://') - CURRENT_CGROUP_PATH="/sys/fs/cgroup/${CURRENT_CGROUP}" - echo "Current cgroup: ${CURRENT_CGROUP}" - - # Move the processes from the current group to the `./init` group, - # otherwise the current group will become of type "domain threaded", - # and it will not be possible to enable required controllers for DinD group. - # Ref: https://github.com/moby/moby/blob/38805f20f9bcc5e87869d6c79d432b166e1c88b4/hack/dind#L28-L38 - echo "Creating init cgroup ${CURRENT_CGROUP_PATH}/init" - mkdir -p ${CURRENT_CGROUP_PATH}/init - echo "Moving existing processes from ${CURRENT_CGROUP_PATH} to ${CURRENT_CGROUP_PATH}/init" - xargs -rn1 < ${CURRENT_CGROUP_PATH}/cgroup.procs > ${CURRENT_CGROUP_PATH}/init/cgroup.procs || : - echo "Done moving existing processes from ${CURRENT_CGROUP_PATH} to ${CURRENT_CGROUP_PATH}/init" - - # Set `memory.oom.group=0` to disable killing all processes in cgroup at once on OOM. - # if all processes are killed at once, the system will not be able to detect this event; - # instead, we expect separate pipeline steps to be killed if total consumptions exceed limits. - MEMORY_OOM_GROUP="${CURRENT_CGROUP_PATH}/memory.oom.group" - echo "Ensuring memory.oom.group is set to 0 to disable killing all processes in cgroup at once on OOM" - echo "0" > "${MEMORY_OOM_GROUP}" - echo "Current memory.oom.group value: $(cat "${MEMORY_OOM_GROUP}")" - # Explicitly set --cgroup-parent to prevent DinD containers escaping the pod cgroup on cgroup v2. dockerd --feature containerd-snapshotter=false --cgroup-parent "${CURRENT_CGROUP}/codefresh-dind" ${DOCKERD_PARAMS} <&- & fi From 4ea9b9ad2cbf03023a31a160e9ecaf9ea589607a Mon Sep 17 00:00:00 2001 From: Zhenya Tikhonov Date: Fri, 30 Jan 2026 12:44:30 +0400 Subject: [PATCH 3/3] fix: disable `containerd` image store on cgroup v1 --- run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.sh b/run.sh index 9bd377d..151134c 100755 --- a/run.sh +++ b/run.sh @@ -214,7 +214,7 @@ do echo "Starting dockerd" if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then - dockerd ${DOCKERD_PARAMS} <&- & + dockerd --feature containerd-snapshotter=false ${DOCKERD_PARAMS} <&- & else # Explicitly set --cgroup-parent to prevent DinD containers escaping the pod cgroup on cgroup v2. dockerd --feature containerd-snapshotter=false --cgroup-parent "${CURRENT_CGROUP}/codefresh-dind" ${DOCKERD_PARAMS} <&- &