Skip to content

Commit a76379f

Browse files
committed
controllers: fix decommission stuck when pod is deleted
When a Cassandra pod is deleted during decommission (e.g., StatefulSet scaled down manually), casskop gets stuck in StatusOngoing state because it tries to get a non-existent pod and returns an error instead of transitioning to StatusFinalizing for PVC cleanup. This fix handles the IsNotFound error case during StatusOngoing by transitioning to StatusFinalizing and proceeding with PVC cleanup, similar to how StatusFinalizing already handles this case. The fix ensures casskop can properly recover when pods are deleted during decommission operations, allowing the cluster to scale correctly.
1 parent d88739c commit a76379f

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

controllers/cassandracluster/pod_operation.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,20 @@ func (rcc *CassandraClusterReconciler) ensureDecommission(ctx context.Context, c
363363

364364
lastPod, err := rcc.GetPod(ctx, cc.Namespace, podLastOperation.Pods[0])
365365
if err != nil {
366+
// If pod is not found, it means it was already deleted (e.g., StatefulSet scaled down)
367+
// Transition to StatusFinalizing to proceed with PVC cleanup
368+
if apierrors.IsNotFound(err) {
369+
logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName,
370+
"pod": podLastOperation.Pods[0]}).Info("Pod not found during decommission, transitioning to Finalizing")
371+
// Create a minimal pod object for deletePodPVC (it only needs the name)
372+
lastPod = &v1.Pod{
373+
ObjectMeta: metav1.ObjectMeta{
374+
Name: podLastOperation.Pods[0],
375+
Namespace: cc.Namespace,
376+
},
377+
}
378+
return rcc.deletePodPVC(ctx, cc, dcName, rackName, status, lastPod, statefulsetIsReady)
379+
}
366380
return breakResyncLoop, fmt.Errorf(
367381
"failed to get last pod '%s': %v", podLastOperation.Pods[0], err)
368382
}
@@ -640,8 +654,10 @@ func (rcc *CassandraClusterReconciler) updatePodLastOperation(clusterName, dcRac
640654
podLastOperation.Pods = k8s.RemoveString(podLastOperation.Pods, podName)
641655
}
642656

643-
/* finalizeOperation sets the labels on the pod where ran an operation depending on the error status
644-
It also updates status.CassandraRackStatus[dcRackName].PodLastOperation
657+
/*
658+
finalizeOperation sets the labels on the pod where ran an operation depending on the error status
659+
660+
It also updates status.CassandraRackStatus[dcRackName].PodLastOperation
645661
*/
646662
func (rcc *CassandraClusterReconciler) finalizeOperation(ctx context.Context, err error, cc *api.CassandraCluster, dcRackName string,
647663
pod v1.Pod, status *api.CassandraClusterStatus, operationName string) {

0 commit comments

Comments
 (0)