valkey-io · enjoy-binbin · Nov 22, 2024 · Nov 8, 2024 · Nov 8, 2024 · Nov 11, 2024
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
@@ -4843,6 +4843,27 @@ void clusterHandleReplicaMigration(int max_replicas) {
  * data loss due to the asynchronous primary-replica replication.
  * -------------------------------------------------------------------------- */
 
+void manualFailoverCanStart(void) {
+    serverAssert(server.cluster->mf_can_start == 0);
+
+    if (server.cluster->failover_auth_time) {
+        /* There is another manual failover requested by the user.
+         * If we have an ongoing election, reset it because the user may initiate
+         * manual failover again when the previous manual failover timed out.
+         * Otherwise, if the previous election timed out (see auth_timeout) and
+         * before the next retry (see auth_retry_time), the new manual failover
+         * will pause the primary and replica can not do anything to advance the
+         * manual failover, and then the manual failover eventually times out. */
+        server.cluster->failover_auth_time = 0;
+        serverLog(LL_WARNING,
+                  "Failover election in progress for epoch %llu, but received a new manual failover. "
+                  "Resetting the election.",
+                  (unsigned long long)server.cluster->failover_auth_epoch);
+    }
+
+    server.cluster->mf_can_start = 1;
+}
+
 /* Reset the manual failover state. This works for both primaries and replicas
  * as all the state about manual failover is cleared.
  *
@@ -4883,7 +4904,7 @@ void clusterHandleManualFailover(void) {
     if (server.cluster->mf_primary_offset == replicationGetReplicaOffset()) {
         /* Our replication offset matches the primary replication offset
          * announced after clients were paused. We can start the failover. */
-        server.cluster->mf_can_start = 1;
+        manualFailoverCanStart();
         serverLog(LL_NOTICE, "All primary replication stream processed, "
                              "manual failover can start.");
         clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
@@ -6773,7 +6794,7 @@ int clusterCommandSpecial(client *c) {
              * primary to agree about the offset. We just failover taking over
              * it without coordination. */
             serverLog(LL_NOTICE, "Forced failover user request accepted (user request from '%s').", client);
-            server.cluster->mf_can_start = 1;
+            manualFailoverCanStart();
             /* We can start a manual failover as soon as possible, setting a flag
              * here so that we don't need to waiting for the cron to kick in. */
             clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER);

diff --git a/tests/unit/cluster/manual-failover.tcl b/tests/unit/cluster/manual-failover.tcl
@@ -183,3 +183,45 @@ test "Wait for instance #0 to return back alive" {
 }
 
 } ;# start_cluster
+
+start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 15000}} {
+    test "Manual failover will reset the on-going election" {
+        set CLUSTER_PACKET_AUTH_REQUEST 5
+        set CLUSTER_PACKET_TYPE_NONE -1
+
+        # Let other primaries drop AUTH_REQUEST so that the election won't
+        # get the enough votes and the election will time out.
+        R 1 debug drop-cluster-packet-filter $CLUSTER_PACKET_AUTH_REQUEST
+        R 2 debug drop-cluster-packet-filter $CLUSTER_PACKET_AUTH_REQUEST
+
+        # Replica doing the manual failover.
+        R 3 cluster failover
+
+        # Waiting for primary and replica to confirm manual failover timeout.
+        wait_for_log_messages 0 {"*Manual failover timed out*"} 0 1000 50
+        wait_for_log_messages -3 {"*Manual failover timed out*"} 0 1000 50
+        set loglines1 [count_log_lines 0]
+        set loglines2 [count_log_lines -3]
+
+        # Undo packet drop, so that replica can win the next election.
+        R 1 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_NONE
+        R 2 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_NONE
+
+        # Replica doing the manual failover again.
+        R 3 cluster failover
+
+        # Make sure the election is reset.
+        wait_for_log_messages -3 {"*Failover election in progress*Resetting the election*"} $loglines2 1000 50
+
+        # Wait for failover.
+        wait_for_condition 1000 50 {
+            [s -3 role] == "master"
+        } else {
+            fail "No failover detected"
+        }
+
+        # Make sure that the second manual failover does not time out.
+        verify_no_log_message 0 "*Manual failover timed out*" $loglines1
+        verify_no_log_message -3 "*Manual failover timed out*" $loglines2
+    }
+} ;# start_cluster