From cefc2d5e33f9f3ae6f164eca2736e41ddd9d283c Mon Sep 17 00:00:00 2001 From: Blaine Gardner Date: Thu, 3 Oct 2024 08:37:39 -0600 Subject: [PATCH 1/2] mds: fix liveness probe timeout When the MDS liveness probe times out, it should not fail the probe. If the cluster has a network partition or other issue that causes the Ceph mon cluster to become unstable, `ceph ...` commands can hang and cause a timeout. In this case, the MDS should not be restarted so as to not cause cascading cluster disruption. Signed-off-by: Blaine Gardner (cherry picked from commit ad1bae9e1bcd9646013ecec79c3b67f40b2a16d0) --- pkg/operator/ceph/file/mds/livenessprobe.go | 5 ++--- pkg/operator/ceph/file/mds/livenessprobe.sh | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/operator/ceph/file/mds/livenessprobe.go b/pkg/operator/ceph/file/mds/livenessprobe.go index beec3882ac5d..6f318d6bab89 100644 --- a/pkg/operator/ceph/file/mds/livenessprobe.go +++ b/pkg/operator/ceph/file/mds/livenessprobe.go @@ -3,7 +3,6 @@ package mds import ( "bytes" _ "embed" - "fmt" "html/template" "github.com/pkg/errors" @@ -38,6 +37,7 @@ type mdsLivenessProbeConfig struct { MdsId string FilesystemName string Keyring string + CmdTimeout int32 } func renderProbe(mdsLivenessProbeConfigValue mdsLivenessProbeConfig) (string, error) { @@ -64,6 +64,7 @@ func generateMDSLivenessProbeExecDaemon(daemonID, filesystemName, keyring string MdsId: daemonID, FilesystemName: filesystemName, Keyring: keyring, + CmdTimeout: mdsCmdTimeout, } mdsLivenessProbeCmd, err := renderProbe(mdsLivenessProbeConfigValue) @@ -75,8 +76,6 @@ func generateMDSLivenessProbeExecDaemon(daemonID, filesystemName, keyring string ProbeHandler: v1.ProbeHandler{ Exec: &v1.ExecAction{ Command: []string{ - "timeout", - fmt.Sprintf("%d", mdsCmdTimeout), "sh", "-c", mdsLivenessProbeCmd, diff --git a/pkg/operator/ceph/file/mds/livenessprobe.sh b/pkg/operator/ceph/file/mds/livenessprobe.sh index 97ad139bdbeb..5cddfac15f32 100644 --- a/pkg/operator/ceph/file/mds/livenessprobe.sh +++ b/pkg/operator/ceph/file/mds/livenessprobe.sh @@ -5,8 +5,9 @@ MDS_ID="{{ .MdsId }}" FILESYSTEM_NAME="{{ .FilesystemName }}" KEYRING="{{ .Keyring }}" +CMD_TIMEOUT="{{ .CmdTimeout }}" -outp="$(ceph fs dump --mon-host="$ROOK_CEPH_MON_HOST" --mon-initial-members="$ROOK_CEPH_MON_INITIAL_MEMBERS" --keyring "$KEYRING" --format json)" +outp="$(ceph fs dump --mon-host="$ROOK_CEPH_MON_HOST" --mon-initial-members="$ROOK_CEPH_MON_INITIAL_MEMBERS" --keyring "$KEYRING" --connect-timeout="$CMD_TIMEOUT" --format json)" rc=$? if [ $rc -ne 0 ]; then echo "ceph MDS dump check failed with the following output:" From b655f4dcf5764d71ad7d4c0f8ae1073081e38c4c Mon Sep 17 00:00:00 2001 From: Travis Nielsen Date: Fri, 4 Oct 2024 14:23:05 -0600 Subject: [PATCH 2/2] mon: do not remove extra mon in middle of failover If the mon failover is in progress, ensure the removal of an extra mon deployment is skipped since that code path only has one mon in the list for the mon that was just newly started. The extra mon was erroneously removing a random mon in that case, followed immediately by the mon failover completing and removing the expected failed mon, and potentially causing mon quroum loss. Signed-off-by: Travis Nielsen (cherry picked from commit e2cadab6fe10e7d2c66a9f2981737477cc9302db) --- pkg/operator/ceph/cluster/mon/mon.go | 9 +++++++++ pkg/operator/ceph/cluster/mon/mon_test.go | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/pkg/operator/ceph/cluster/mon/mon.go b/pkg/operator/ceph/cluster/mon/mon.go index 17258a5322d5..8375163e8d5b 100644 --- a/pkg/operator/ceph/cluster/mon/mon.go +++ b/pkg/operator/ceph/cluster/mon/mon.go @@ -1068,7 +1068,16 @@ func (c *Cluster) startDeployments(mons []*monConfig, requireAllInQuorum bool) e } func (c *Cluster) checkForExtraMonResources(mons []*monConfig, deployments []apps.Deployment) string { + // If there are fewer mon deployments than the desired count, no need to remove an extra. if len(deployments) <= c.spec.Mon.Count || len(deployments) <= len(mons) { + logger.Debug("no extra mon deployments to remove") + return "" + } + // If there are fewer mons in the list than expected, either new mons are being created for + // a new cluster, or a mon failover is in progress and the list of mons only + // includes the single mon that was just started + if len(mons) < c.spec.Mon.Count { + logger.Debug("new cluster or mon failover in progress, not checking for extra mon deployments") return "" } diff --git a/pkg/operator/ceph/cluster/mon/mon_test.go b/pkg/operator/ceph/cluster/mon/mon_test.go index 9d27be4014c9..1a4d1f3d4a5a 100644 --- a/pkg/operator/ceph/cluster/mon/mon_test.go +++ b/pkg/operator/ceph/cluster/mon/mon_test.go @@ -718,6 +718,24 @@ func TestRemoveExtraMonDeployments(t *testing.T) { c.spec.Mon.Count = 3 removed = c.checkForExtraMonResources(mons, deployments) assert.Equal(t, "", removed) + + // Do not remove a mon when it was during failover and only a single mon is in the list, even if extra deployments exist + mons = []*monConfig{ + {ResourceName: "rook-ceph-mon-d", DaemonName: "d"}, + } + deployments = append(deployments, apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rook-ceph-mon-c", + Labels: map[string]string{"ceph_daemon_id": "c"}, + }}) + deployments = append(deployments, apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rook-ceph-mon-d", + Labels: map[string]string{"ceph_daemon_id": "d"}, + }}) + c.spec.Mon.Count = 3 + removed = c.checkForExtraMonResources(mons, deployments) + assert.Equal(t, "", removed) } func TestStretchMonVolumeClaimTemplate(t *testing.T) {