Skip to content

Commit 4e0c4f6

Browse files
authored
Merge pull request rook#13511 from ushitora-anqou/set-pdb-even-if-pgs-remain-active-clean
core: set blocking PDB even if no unhealthy PGs appear
2 parents 39f4458 + 6def9c8 commit 4e0c4f6

File tree

1 file changed

+4
-35
lines changed
  • pkg/operator/ceph/disruption/clusterdisruption

1 file changed

+4
-35
lines changed

pkg/operator/ceph/disruption/clusterdisruption/osd.go

Lines changed: 4 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -338,24 +338,9 @@ func (r *ReconcileClusterDisruption) reconcilePDBsForOSDs(
338338
}
339339

340340
switch {
341-
// osd is down but pgs are active+clean
342-
case osdDown && pgClean:
343-
lastDrainTimeStamp, err := getLastDrainTimeStamp(pdbStateMap, drainingFailureDomainDurationKey)
344-
if err != nil {
345-
return reconcile.Result{}, errors.Wrapf(err, "failed to get last drain timestamp from the configmap %q", pdbStateMap.Name)
346-
}
347-
timeSinceOSDDown := time.Since(lastDrainTimeStamp)
348-
if timeSinceOSDDown > 30*time.Second {
349-
logger.Infof("osd is down in failure domain %q is down for the last %.2f minutes, but pgs are active+clean", drainingFailureDomain, timeSinceOSDDown.Minutes())
350-
resetPDBConfig(pdbStateMap)
351-
} else {
352-
logger.Infof("osd is down in the failure domain %q, but pgs are active+clean. Requeuing in case pg status is not updated yet...", drainingFailureDomain)
353-
return reconcile.Result{Requeue: true, RequeueAfter: 15 * time.Second}, nil
354-
}
355-
356-
// osd is down and pgs are not healthy
357-
case osdDown && !pgClean:
358-
logger.Infof("osd is down in failure domain %q and pgs are not active+clean. pg health: %q", drainingFailureDomain, pgHealthMsg)
341+
// osd is down
342+
case osdDown:
343+
logger.Infof("osd is down in failure domain %q. pg health: %q", drainingFailureDomain, pgHealthMsg)
359344
currentlyDrainingFD, ok := pdbStateMap.Data[drainingFailureDomainKey]
360345
if !ok || drainingFailureDomain != currentlyDrainingFD {
361346
pdbStateMap.Data[drainingFailureDomainKey] = drainingFailureDomain
@@ -383,7 +368,7 @@ func (r *ReconcileClusterDisruption) reconcilePDBsForOSDs(
383368
}
384369
}
385370

386-
if pdbStateMap.Data[drainingFailureDomainKey] != "" && !pgClean {
371+
if pdbStateMap.Data[drainingFailureDomainKey] != "" {
387372
// delete default OSD pdb and create blocking OSD pdbs
388373
err := r.handleActiveDrains(allFailureDomains, pdbStateMap.Data[drainingFailureDomainKey], failureDomainType, clusterInfo.Namespace, pgClean)
389374
if err != nil {
@@ -646,22 +631,6 @@ func getPDBName(failureDomainType, failureDomainName string) string {
646631
return k8sutil.TruncateNodeName(fmt.Sprintf("%s-%s-%s", osdPDBAppName, failureDomainType, "%s"), failureDomainName)
647632
}
648633

649-
func getLastDrainTimeStamp(pdbStateMap *corev1.ConfigMap, key string) (time.Time, error) {
650-
var err error
651-
var lastDrainTimeStamp time.Time
652-
lastDrainTimeStampString, ok := pdbStateMap.Data[key]
653-
if !ok || len(lastDrainTimeStampString) == 0 {
654-
return time.Now(), nil
655-
} else {
656-
lastDrainTimeStamp, err = time.Parse(time.RFC3339, pdbStateMap.Data[key])
657-
if err != nil {
658-
return time.Time{}, errors.Wrapf(err, "failed to parse timestamp %q", pdbStateMap.Data[key])
659-
}
660-
}
661-
662-
return lastDrainTimeStamp, nil
663-
}
664-
665634
func (r *ReconcileClusterDisruption) getAllowedDisruptions(pdbName, namespace string) (int32, error) {
666635
usePDBV1Beta1, err := k8sutil.UsePDBV1Beta1Version(r.context.ClusterdContext.Clientset)
667636
if err != nil {

0 commit comments

Comments
 (0)