diff --git a/internal/controller/drplacementcontrol.go b/internal/controller/drplacementcontrol.go index 5c9c57715..e43a8f772 100644 --- a/internal/controller/drplacementcontrol.go +++ b/internal/controller/drplacementcontrol.go @@ -373,7 +373,7 @@ func (d *DRPCInstance) RunFailover() (bool, error) { return !done, nil } - return d.ensureActionCompleted(failoverCluster) + return d.ensureFailoverActionCompleted(failoverCluster) } else if yes, err := d.mwExistsAndPlacementUpdated(failoverCluster); yes || err != nil { // We have to wait for the VRG to appear on the failoverCluster or // in case of an error, try again later @@ -863,7 +863,7 @@ func (d *DRPCInstance) RunRelocate() (bool, error) { addOrUpdateCondition(&d.instance.Status.Conditions, rmn.ConditionAvailable, d.instance.Generation, metav1.ConditionTrue, string(d.instance.Status.Phase), "Completed") - return d.ensureActionCompleted(preferredCluster) + return d.ensureRelocateActionCompleted(preferredCluster) } d.setStatusInitiating() @@ -896,6 +896,29 @@ func (d *DRPCInstance) RunRelocate() (bool, error) { return d.relocate(preferredCluster, preferredClusterNamespace, rmn.Relocating) } +func (d *DRPCInstance) ensureRelocateActionCompleted(srcCluster string) (bool, error) { + d.setProgression(rmn.ProgressionCleaningUp) + + return d.ensureActionCompleted(srcCluster) +} + +func (d *DRPCInstance) ensureFailoverActionCompleted(srcCluster string) (bool, error) { + // This is the time to cleanup the workload from the preferredCluster. + // For managed apps, it will be done automatically by ACM, when we update + // the placement to the targetCluster. For discovered apps, we have to let + // the user know that they need to clean up the apps. + // So set the progression to wait on user to clean up. + // If not discovered apps, then we can set the progression to cleaning up. + if d.instance.Spec.ProtectedNamespaces != nil && + len(*d.instance.Spec.ProtectedNamespaces) > 0 { + d.setProgression(rmn.ProgressionWaitOnUserToCleanUp) + } else { + d.setProgression(rmn.ProgressionCleaningUp) + } + + return d.ensureActionCompleted(srcCluster) +} + func (d *DRPCInstance) ensureActionCompleted(srcCluster string) (bool, error) { const done = true @@ -909,8 +932,6 @@ func (d *DRPCInstance) ensureActionCompleted(srcCluster string) (bool, error) { return !done, err } - d.setProgression(rmn.ProgressionCleaningUp) - // Cleanup and setup VolSync if enabled err = d.ensureCleanupAndVolSyncReplicationSetup(srcCluster) if err != nil { @@ -974,8 +995,19 @@ func (d *DRPCInstance) quiesceAndRunFinalSync(homeCluster string) (bool, error) addOrUpdateCondition(&d.instance.Status.Conditions, rmn.ConditionAvailable, d.instance.Generation, d.getConditionStatusForTypeAvailable(), string(d.instance.Status.Phase), "Starting quiescing for relocation") - // clear current user PlacementRule's decision - d.setProgression(rmn.ProgressionClearingPlacement) + // We are going to clear the placement, this is when ACM will start + // deleting the workloads from the current cluster. In case of + // discovered apps, we have to let the user know that they need to + // clean up the apps from the current cluster. So set the progression + // to wait on user to clean up. For non-discovered apps, we can set the + // progression to clearing placement. + if d.instance.Spec.ProtectedNamespaces != nil && + len(*d.instance.Spec.ProtectedNamespaces) > 0 { + d.setProgression(rmn.ProgressionWaitOnUserToCleanUp) + } else { + // clear current user PlacementRule's decision + d.setProgression(rmn.ProgressionClearingPlacement) + } err := d.clearUserPlacementRuleStatus() if err != nil { @@ -2048,10 +2080,6 @@ func (d *DRPCInstance) ensureVRGManifestWorkOnClusterDeleted(clusterName string) d.log.Info("Request not complete yet", "cluster", clusterName) - if d.instance.Spec.ProtectedNamespaces != nil && len(*d.instance.Spec.ProtectedNamespaces) > 0 { - d.setProgression(rmn.ProgressionWaitOnUserToCleanUp) - } - // IF we get here, either the VRG has not transitioned to secondary (yet) or delete didn't succeed. In either cases, // we need to make sure that the VRG object is deleted. IOW, we still have to wait return !done, nil @@ -2067,10 +2095,6 @@ func (d *DRPCInstance) ensureVRGIsSecondaryEverywhere(clusterToSkip string) bool continue } - if d.instance.Spec.ProtectedNamespaces != nil && len(*d.instance.Spec.ProtectedNamespaces) > 0 { - d.setProgression(rmn.ProgressionWaitOnUserToCleanUp) - } - if !d.ensureVRGIsSecondaryOnCluster(clusterName) { d.log.Info("Still waiting for VRG to transition to secondary", "cluster", clusterName) diff --git a/internal/controller/volsync/vshandler.go b/internal/controller/volsync/vshandler.go index 6457c9355..bb6eff79b 100644 --- a/internal/controller/volsync/vshandler.go +++ b/internal/controller/volsync/vshandler.go @@ -73,6 +73,7 @@ type VSHandler struct { destinationCopyMethod volsyncv1alpha1.CopyMethodType volumeSnapshotClassList *snapv1.VolumeSnapshotClassList vrgInAdminNamespace bool + workloadStatus string } func NewVSHandler(ctx context.Context, client client.Client, log logr.Logger, owner metav1.Object, @@ -98,6 +99,10 @@ func NewVSHandler(ctx context.Context, client client.Client, log logr.Logger, ow return vsHandler } +func (v *VSHandler) GetWorkloadStatus() string { + return v.workloadStatus +} + // returns replication destination only if create/update is successful and the RD is considered available. // Callers should assume getting a nil replication destination back means they should retry/requeue. // @@ -364,6 +369,8 @@ func (v *VSHandler) validatePVCBeforeRS(rsSpec ramendrv1alpha1.VolSyncReplicatio return false, nil } + v.workloadStatus = "inactive" + return true, nil // Good to proceed - PVC is not in use, not mounted to node (or does not exist-should not happen) } @@ -1634,6 +1641,8 @@ func (v *VSHandler) IsRDDataProtected(pvcName, pvcNamespace string) (bool, error func (v *VSHandler) PrecreateDestPVCIfEnabled(rdSpec ramendrv1alpha1.VolSyncReplicationDestinationSpec, ) (*string, error) { if !v.IsCopyMethodDirect() { + // TODO: + // We need to check the workload status even in other cases. v.log.Info("Using default copyMethod of Snapshot") return nil, nil // use default copyMethod @@ -1659,6 +1668,9 @@ func (v *VSHandler) PrecreateDestPVCIfEnabled(rdSpec ramendrv1alpha1.VolSyncRepl util.ProtectedPVCNamespacedName(rdSpec.ProtectedPVC)) } + // At this point, we are sure that there is no active workload + v.workloadStatus = "inactive" + v.log.Info(fmt.Sprintf("Using App PVC %s for syncing directly to it", util.ProtectedPVCNamespacedName(rdSpec.ProtectedPVC))) // Using the application PVC for syncing from source to destination and save a snapshot diff --git a/internal/controller/volumereplicationgroup_controller.go b/internal/controller/volumereplicationgroup_controller.go index fc48ce2a5..9288b01c1 100644 --- a/internal/controller/volumereplicationgroup_controller.go +++ b/internal/controller/volumereplicationgroup_controller.go @@ -1238,7 +1238,9 @@ func (v *VRGInstance) updateStatusState() { // VRG is exclusively using volsync if v.instance.Spec.ReplicationState == ramendrv1alpha1.Secondary && len(v.instance.Spec.VolSync.RDSpec) > 0 { - v.instance.Status.State = ramendrv1alpha1.SecondaryState + if v.volSyncHandler.GetWorkloadStatus() == "inactive" { + v.instance.Status.State = ramendrv1alpha1.SecondaryState + } return } diff --git a/internal/controller/vrg_volsync.go b/internal/controller/vrg_volsync.go index d2059262f..be2396e32 100644 --- a/internal/controller/vrg_volsync.go +++ b/internal/controller/vrg_volsync.go @@ -225,6 +225,8 @@ func (v *VRGInstance) reconcileVolSyncAsSecondary() bool { // If we are secondary, and RDSpec is not set, then we don't want to have any PVC // flagged as a VolSync PVC. if v.instance.Spec.VolSync.RDSpec == nil { + // This might be a case where we lose the RDSpec temporarily, + // so we don't know if workload status is truly inactive. idx := 0 for _, protectedPVC := range v.instance.Status.ProtectedPVCs { @@ -250,6 +252,7 @@ func (v *VRGInstance) reconcileRDSpecForDeletionOrReplication() bool { requeue := false rdinCGs := []ramendrv1alpha1.VolSyncReplicationDestinationSpec{} + // TODO: Set the workload status in CG code path later for _, rdSpec := range v.instance.Spec.VolSync.RDSpec { cg, ok := rdSpec.ProtectedPVC.Labels[ConsistencyGroupLabel] if ok && util.IsCGEnabled(v.instance.Annotations) {