Skip to content

Commit b5acabf

Browse files
csiaddonsnode: delete the object after max connection retries
This patch adds the functionality to retry for a maximum of `CSIAddonsNodeConnectionMaxRetries` to connect to the sidecar. If the connection attempt is not successful, the object is considered obsolete and is deleted. An in-memory retry counter is used to keep things simple and to avoid updating the object's status/annotations. Signed-off-by: Niraj Yadav <niryadav@redhat.com>
1 parent 40a8a6c commit b5acabf

File tree

1 file changed

+44
-13
lines changed

1 file changed

+44
-13
lines changed

internal/controller/csiaddons/csiaddonsnode_controller.go

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"net/url"
2525
"slices"
2626
"strings"
27+
"time"
2728

2829
csiaddonsv1alpha1 "github.com/csi-addons/kubernetes-csi-addons/api/csiaddons/v1alpha1"
2930
"github.com/csi-addons/kubernetes-csi-addons/internal/connection"
@@ -42,6 +43,16 @@ import (
4243
"sigs.k8s.io/controller-runtime/pkg/predicate"
4344
)
4445

46+
const (
47+
CSIAddonsNodeConnectionMaxRetries = 3
48+
CSIAddonsNodeConnectionSleepInterval = 2 * time.Second
49+
50+
// The duration after which a new reconcile should be triggered
51+
// to validate the cluster state. Used only when reconciliation
52+
// completes without any errors.
53+
CSIAddonsNodeRequeueAfter = time.Hour
54+
)
55+
4556
var (
4657
csiAddonsNodeFinalizer = csiaddonsv1alpha1.GroupVersion.Group + "/csiaddonsnode"
4758
)
@@ -126,22 +137,42 @@ func (r *CSIAddonsNodeReconciler) Reconcile(ctx context.Context, req ctrl.Reques
126137
return ctrl.Result{}, err
127138
}
128139

129-
logger.Info("Connecting to sidecar")
130-
newConn, err := connection.NewConnection(ctx, endPoint, nodeID, driverName, csiAddonsNode.Namespace, csiAddonsNode.Name, r.EnableAuth)
131-
if err != nil {
132-
logger.Error(err, "Failed to establish connection with sidecar")
140+
// The reconciler is stateless and if we delete an object that is still valid
141+
// it will be recreated (by the sidecar). Use an in-memory retry loop to keep
142+
// things simple. If we wanted to preserve state we would have had to rely
143+
// on the CRD's status/annotations.
144+
var newConn *connection.Connection
145+
var connErr error
146+
for i := range CSIAddonsNodeConnectionMaxRetries {
147+
logger.Info("Connecting to sidecar", "attempt", i)
148+
newConn, connErr = connection.NewConnection(ctx, endPoint, nodeID, driverName, csiAddonsNode.Namespace, csiAddonsNode.Name, r.EnableAuth)
149+
150+
// Success, exit early. Logged later after getting fence client status
151+
if connErr == nil {
152+
break
153+
}
133154

134-
errMessage := util.GetErrorMessage(err)
135-
csiAddonsNode.Status.State = csiaddonsv1alpha1.CSIAddonsNodeStateFailed
136-
csiAddonsNode.Status.Message = fmt.Sprintf("Failed to establish connection with sidecar: %v", errMessage)
137-
statusErr := r.Status().Update(ctx, csiAddonsNode)
138-
if statusErr != nil {
139-
logger.Error(statusErr, "Failed to update status")
155+
// Do not spam the socket
156+
if i < CSIAddonsNodeConnectionMaxRetries-1 {
157+
time.Sleep(CSIAddonsNodeConnectionSleepInterval)
158+
}
159+
}
140160

141-
return ctrl.Result{}, statusErr
161+
// If we were still unable to connect after max retries
162+
if connErr != nil {
163+
logger.Error(connErr, fmt.Sprintf("Failed to establish connection with sidecar after %d attempts, deleting the object", CSIAddonsNodeConnectionMaxRetries))
164+
165+
// We do not update the status anymore as we consider deletion
166+
// as the resolution after max attempts.
167+
if delErr := r.Delete(ctx, csiAddonsNode); client.IgnoreNotFound(delErr) != nil {
168+
logger.Error(delErr, "failed to delete CSIAddonsNode object after max retries")
169+
170+
return ctrl.Result{}, delErr
142171
}
143172

144-
return ctrl.Result{}, err
173+
// Object is deleted, stop the reconcile phase
174+
logger.Info("successfully deleted CSIAddonsNode object due to reaching max reconnection attempts")
175+
return ctrl.Result{}, nil
145176
}
146177

147178
nfsc, err := r.getNetworkFenceClientStatus(ctx, &logger, newConn, csiAddonsNode)
@@ -165,7 +196,7 @@ func (r *CSIAddonsNodeReconciler) Reconcile(ctx context.Context, req ctrl.Reques
165196
return ctrl.Result{}, err
166197
}
167198

168-
return ctrl.Result{}, nil
199+
return ctrl.Result{RequeueAfter: CSIAddonsNodeRequeueAfter}, nil
169200
}
170201

171202
// getNetworkFenceClassesForDriver gets the networkfenceclasses for the driver.

0 commit comments

Comments
 (0)