From 9c2c1008bd28be5e0ed4986b51105394ff0cab4b Mon Sep 17 00:00:00 2001
From: grokspawn <jordan@nimblewidget.com>
Date: Wed, 11 Feb 2026 16:32:04 -0600
Subject: [PATCH 1/2] abandon evaluation of any new catalogsource image which
 pathologically restrts

Signed-off-by: grokspawn <jordan@nimblewidget.com>
---
 pkg/controller/registry/reconciler/grpc.go | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/pkg/controller/registry/reconciler/grpc.go b/pkg/controller/registry/reconciler/grpc.go
index 57cedd0ebc..fe61a0cc3c 100644
--- a/pkg/controller/registry/reconciler/grpc.go
+++ b/pkg/controller/registry/reconciler/grpc.go
@@ -30,6 +30,8 @@ const (
 	CatalogSourceUpdateKey      = "catalogsource.operators.coreos.com/update"
 	ServiceHashLabelKey         = "olm.service-spec-hash"
 	CatalogPollingRequeuePeriod = 30 * time.Second
+	// pathologicalRestartCountThreshold is the number of container restarts above which a pod is considered failed for catalog polling.
+	pathologicalRestartCountThreshold = 10
 )
 
 // grpcCatalogSourceDecorator wraps CatalogSource to add additional methods
@@ -754,9 +756,25 @@ func swapLabels(pod *corev1.Pod, labelKey, updateKey string) *corev1.Pod {
 	return pod
 }
 
+// podPathologicallyRestarting returns true if any container in the pod has restarts exceeding pathologicalRestartCountThreshold.
+func podPathologicallyRestarting(pod *corev1.Pod) bool {
+	for _, s := range pod.Status.ContainerStatuses {
+		if s.RestartCount > pathologicalRestartCountThreshold {
+			return true
+		}
+	}
+	for _, s := range pod.Status.InitContainerStatuses {
+		if s.RestartCount > pathologicalRestartCountThreshold {
+			return true
+		}
+	}
+	// TODO: currently no ephemeral containers in a catalogsource, should we add checks anyway?
+	return false
+}
+
 // podFailed checks whether the pod status is in a failed or unknown state, and deletes the pod if so.
 func (c *GrpcRegistryReconciler) podFailed(pod *corev1.Pod) (bool, error) {
-	if pod.Status.Phase == corev1.PodFailed || pod.Status.Phase == corev1.PodUnknown {
+	if pod.Status.Phase == corev1.PodFailed || pod.Status.Phase == corev1.PodUnknown || podPathologicallyRestarting(pod) {
 		logrus.WithField("UpdatePod", pod.GetName()).Infof("catalog polling result: update pod %s failed to start", pod.GetName())
 		err := c.removePods([]*corev1.Pod{pod}, pod.GetNamespace())
 		if err != nil {

From 10985516833379af165f2c126aaabd93b3c62924 Mon Sep 17 00:00:00 2001
From: grokspawn <jordan@nimblewidget.com>
Date: Thu, 12 Feb 2026 08:43:09 -0600
Subject: [PATCH 2/2] add CLBO container considerations to detection

Signed-off-by: grokspawn <jordan@nimblewidget.com>
---
 pkg/controller/registry/reconciler/grpc.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pkg/controller/registry/reconciler/grpc.go b/pkg/controller/registry/reconciler/grpc.go
index fe61a0cc3c..024e298691 100644
--- a/pkg/controller/registry/reconciler/grpc.go
+++ b/pkg/controller/registry/reconciler/grpc.go
@@ -30,8 +30,8 @@ const (
 	CatalogSourceUpdateKey      = "catalogsource.operators.coreos.com/update"
 	ServiceHashLabelKey         = "olm.service-spec-hash"
 	CatalogPollingRequeuePeriod = 30 * time.Second
-	// pathologicalRestartCountThreshold is the number of container restarts above which a pod is considered failed for catalog polling.
-	pathologicalRestartCountThreshold = 10
+	// containerReasonCrashLoopBackOff is the kubelet Waiting reason when a container is backing off after repeated crashes.
+	containerReasonCrashLoopBackOff = "CrashLoopBackOff"
 )
 
 // grpcCatalogSourceDecorator wraps CatalogSource to add additional methods
@@ -756,15 +756,15 @@ func swapLabels(pod *corev1.Pod, labelKey, updateKey string) *corev1.Pod {
 	return pod
 }
 
-// podPathologicallyRestarting returns true if any container in the pod has restarts exceeding pathologicalRestartCountThreshold.
-func podPathologicallyRestarting(pod *corev1.Pod) bool {
+// podContainersArePathological returns true if any container in the pod has restarts exceeding pathologicalRestartCountThreshold or is in CrashLoopBackOff.
+func podContainersArePathological(pod *corev1.Pod) bool {
 	for _, s := range pod.Status.ContainerStatuses {
-		if s.RestartCount > pathologicalRestartCountThreshold {
+		if s.State.Waiting != nil && s.State.Waiting.Reason == containerReasonCrashLoopBackOff {
 			return true
 		}
 	}
 	for _, s := range pod.Status.InitContainerStatuses {
-		if s.RestartCount > pathologicalRestartCountThreshold {
+		if s.State.Waiting != nil && s.State.Waiting.Reason == containerReasonCrashLoopBackOff {
 			return true
 		}
 	}
@@ -774,7 +774,7 @@ func podPathologicallyRestarting(pod *corev1.Pod) bool {
 
 // podFailed checks whether the pod status is in a failed or unknown state, and deletes the pod if so.
 func (c *GrpcRegistryReconciler) podFailed(pod *corev1.Pod) (bool, error) {
-	if pod.Status.Phase == corev1.PodFailed || pod.Status.Phase == corev1.PodUnknown || podPathologicallyRestarting(pod) {
+	if pod.Status.Phase == corev1.PodFailed || pod.Status.Phase == corev1.PodUnknown || podContainersArePathological(pod) {
 		logrus.WithField("UpdatePod", pod.GetName()).Infof("catalog polling result: update pod %s failed to start", pod.GetName())
 		err := c.removePods([]*corev1.Pod{pod}, pod.GetNamespace())
 		if err != nil {