Skip to content

Commit aa6e331

Browse files
committed
mqbc: Construct merged state with uncommitted CSLs before send
Signed-off-by: Yuan Jing Vincent Yan <[email protected]>
1 parent d487452 commit aa6e331

File tree

5 files changed

+67
-34
lines changed

5 files changed

+67
-34
lines changed

src/groups/mqb/mqbblp/mqbblp_clusterorchestrator.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,14 +1084,10 @@ void ClusterOrchestrator::processNodeStatusAdvisory(
10841084
// thread scheduling, 'source' may see that heartbeat after the
10851085
// first advisory.
10861086

1087-
bsl::vector<bmqp_ctrlmsg::PartitionPrimaryInfo> partitions;
1088-
mqbc::ClusterUtil::loadPartitionsInfo(&partitions,
1089-
*clusterState());
10901087
d_stateManager_mp->sendClusterState(
10911088
true, // sendPartitionPrimaryInfo
10921089
true, // sendQueuesInfo
1093-
source,
1094-
partitions);
1090+
source);
10951091
}
10961092
}
10971093
else if (d_clusterConfig.clusterAttributes().isFSMWorkflow() &&
@@ -1201,14 +1197,10 @@ void ClusterOrchestrator::processNodeStateChangeEvent(
12011197
<< ": leader (self) is ACTIVE; will send leader"
12021198
<< " advisory to new node: " << node->nodeDescription();
12031199

1204-
bsl::vector<bmqp_ctrlmsg::PartitionPrimaryInfo> partitions;
1205-
mqbc::ClusterUtil::loadPartitionsInfo(&partitions,
1206-
*clusterState());
12071200
d_stateManager_mp->sendClusterState(
12081201
true, // sendPartitionPrimaryInfo
12091202
true, // sendQueuesInfo
1210-
node,
1211-
partitions);
1203+
node);
12121204
}
12131205
}
12141206

@@ -1827,12 +1819,9 @@ void ClusterOrchestrator::processLeaderPassiveNotification(
18271819
}
18281820

18291821
// Self is an ACTIVE leader - broadcast 'LeaaderAdvisory'
1830-
bsl::vector<bmqp_ctrlmsg::PartitionPrimaryInfo> partitions;
1831-
mqbc::ClusterUtil::loadPartitionsInfo(&partitions, *clusterState());
18321822
d_stateManager_mp->sendClusterState(true, // sendPartitionPrimaryInfo
18331823
true, // sendQueuesInfo
1834-
0,
1835-
partitions);
1824+
0);
18361825
}
18371826

18381827
void ClusterOrchestrator::onRecoverySuccess()

src/groups/mqb/mqbblp/mqbblp_clusterstatemanager.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,7 @@ void ClusterStateManager::sendClusterState(
911911
*d_state_p,
912912
sendPartitionPrimaryInfo,
913913
sendQueuesInfo,
914+
d_allocator_p,
914915
node,
915916
partitions);
916917
}

src/groups/mqb/mqbc/mqbc_clusterstatemanager.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1542,6 +1542,7 @@ void ClusterStateManager::sendClusterState(
15421542
*d_state_p,
15431543
sendPartitionPrimaryInfo,
15441544
sendQueuesInfo,
1545+
d_allocator_p,
15451546
node,
15461547
partitions);
15471548
}

src/groups/mqb/mqbc/mqbc_clusterutil.cpp

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
// limitations under the License.
1515

1616
// mqbc_clusterutil.cpp -*-C++-*-
17+
#include "mqbc_clusterstateledger.h"
1718
#include <mqbc_clusterutil.h>
1819

1920
#include <mqbscm_version.h>
@@ -1394,8 +1395,9 @@ void ClusterUtil::sendClusterState(
13941395
const ClusterState& clusterState,
13951396
bool sendPartitionPrimaryInfo,
13961397
bool sendQueuesInfo,
1398+
bslma::Allocator* allocator,
13971399
mqbnet::ClusterNode* node,
1398-
const bsl::vector<bmqp_ctrlmsg::PartitionPrimaryInfo>& partitions)
1400+
const bsl::vector<bmqp_ctrlmsg::PartitionPrimaryInfo>& newlyAssigned)
13991401
{
14001402
// executed by the cluster *DISPATCHER* thread
14011403

@@ -1406,6 +1408,13 @@ void ClusterUtil::sendClusterState(
14061408
BSLS_ASSERT_SAFE(mqbnet::ElectorState::e_LEADER ==
14071409
clusterData->electorInfo().electorState());
14081410
BSLS_ASSERT_SAFE(ledger && ledger->isOpen());
1411+
if (!sendPartitionPrimaryInfo) {
1412+
BSLS_ASSERT_SAFE(newlyAssigned.empty());
1413+
}
1414+
else if (!newlyAssigned.empty()) {
1415+
BSLS_ASSERT_SAFE(newlyAssigned.size() ==
1416+
clusterState.partitions().size());
1417+
}
14091418

14101419
if (clusterData->clusterConfig().clusterAttributes().isFSMWorkflow()) {
14111420
// In FSM mode, the *only* possible caller of this method is
@@ -1414,14 +1423,6 @@ void ClusterUtil::sendClusterState(
14141423
BSLS_ASSERT_SAFE(sendPartitionPrimaryInfo && !sendQueuesInfo);
14151424
}
14161425

1417-
if (sendPartitionPrimaryInfo) {
1418-
BSLS_ASSERT_SAFE(partitions.size() ==
1419-
clusterState.partitions().size());
1420-
}
1421-
else {
1422-
BSLS_ASSERT_SAFE(partitions.empty());
1423-
};
1424-
14251426
if (bmqp_ctrlmsg::NodeStatus::E_STOPPING ==
14261427
clusterData->membership().selfNodeStatus()) {
14271428
// No need to send cluster state since self is stopping. After self
@@ -1445,8 +1446,47 @@ void ClusterUtil::sendClusterState(
14451446
clusterData->electorInfo().nextLeaderMessageSequence(
14461447
&advisory.sequenceNumber());
14471448

1448-
advisory.partitions() = partitions;
1449-
loadQueuesInfo(&advisory.queues(), clusterState);
1449+
if (!newlyAssigned.empty()) {
1450+
// If we are sending a cluster state snapshot and there are new
1451+
// assignments, it implies that this will be the first leader
1452+
// advisory and thus there won't be any uncommitted CSL advisories.
1453+
advisory.partitions() = newlyAssigned;
1454+
loadQueuesInfo(&advisory.queues(), clusterState);
1455+
}
1456+
else {
1457+
// We construct a "merged state" of current cluster state and
1458+
// information from uncommitted CSL advisories, before sending over
1459+
// to follower(s).
1460+
//
1461+
// NOTE: This code path is *only* reachable in non-FSM mode.
1462+
ClusterState tempState(
1463+
&clusterData->cluster(),
1464+
clusterData->clusterConfig().partitionConfig().numPartitions(),
1465+
allocator);
1466+
apply(&tempState, clusterMessage, *clusterData);
1467+
const int rc = load(&tempState,
1468+
ledger->getIterator().get(),
1469+
*clusterData,
1470+
allocator);
1471+
if (rc != 0) {
1472+
BALL_LOG_ERROR
1473+
<< clusterData->identity().description()
1474+
<< ": Failed to load CSL content to temp cluster "
1475+
<< "state as part of 'sendClusterState', rc: " << rc;
1476+
}
1477+
1478+
ClusterStateLedger::ClusterMessageCRefList uncommittedAdvisories;
1479+
ledger->uncommittedAdvisories(&uncommittedAdvisories);
1480+
for (ClusterStateLedger::ClusterMessageCRefList::const_iterator
1481+
cit = uncommittedAdvisories.begin();
1482+
cit != uncommittedAdvisories.end();
1483+
++cit) {
1484+
apply(&tempState, *cit, *clusterData);
1485+
}
1486+
1487+
loadPartitionsInfo(&advisory.partitions(), tempState);
1488+
loadQueuesInfo(&advisory.queues(), tempState);
1489+
}
14501490
}
14511491
else if (sendPartitionPrimaryInfo) {
14521492
bmqp_ctrlmsg::PartitionPrimaryAdvisory& advisory =
@@ -1455,7 +1495,7 @@ void ClusterUtil::sendClusterState(
14551495
clusterData->electorInfo().nextLeaderMessageSequence(
14561496
&advisory.sequenceNumber());
14571497

1458-
advisory.partitions() = partitions;
1498+
advisory.partitions() = newlyAssigned;
14591499
}
14601500
else {
14611501
BSLS_ASSERT_SAFE(sendQueuesInfo);
@@ -1960,7 +2000,7 @@ void ClusterUtil::loadPartitionsInfo(
19602000
const ClusterState& state)
19612001
{
19622002
// PRECONDITIONS
1963-
BSLS_ASSERT_SAFE(out);
2003+
BSLS_ASSERT_SAFE(out && out->empty());
19642004

19652005
for (int pid = 0; pid < static_cast<int>(state.partitions().size());
19662006
++pid) {

src/groups/mqb/mqbc/mqbc_clusterutil.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -272,14 +272,15 @@ struct ClusterUtil {
272272
bslma::Allocator* allocator);
273273

274274
/// Send the current cluster state to follower nodes. If the specified
275-
/// `sendPartitionPrimaryInfo` is true, the specified partition-primary
276-
/// mapping `partitions` will be included. If the specified
277-
/// `sendQueuesInfo` is true, queue-partition assignments will be
275+
/// `sendPartitionPrimaryInfo` is true, the partition-primary assignments
276+
/// will be included. The optionally specified `newlyAssigned` will be
277+
/// sent instead of the current assignments. If the specified
278+
/// `sendQueuesInfo` is true, the queue-partition assignments will be
278279
/// included. If the optionally specified `node` is non-null, send the
279280
/// cluster state to that `node` only. Otherwise, broadcast to all
280-
/// followers. Behavior is undefined unless this node is the leader,
281-
/// and at least one of `sendPartitionPrimaryInfo` or `sendQueuesInfo`
282-
/// is true.
281+
/// followers. Use the specified `allocator` for memory allocations.
282+
/// Behavior is undefined unless this node is the leader, and at least one
283+
/// of `sendPartitionPrimaryInfo` or `sendQueuesInfo` is true.
283284
///
284285
/// THREAD: This method is invoked in the associated cluster's
285286
/// dispatcher thread.
@@ -289,8 +290,9 @@ struct ClusterUtil {
289290
const ClusterState& clusterState,
290291
bool sendPartitionPrimaryInfo,
291292
bool sendQueuesInfo,
293+
bslma::Allocator* allocator,
292294
mqbnet::ClusterNode* node = 0,
293-
const bsl::vector<bmqp_ctrlmsg::PartitionPrimaryInfo>& partitions =
295+
const bsl::vector<bmqp_ctrlmsg::PartitionPrimaryInfo>& newlyAssigned =
294296
bsl::vector<bmqp_ctrlmsg::PartitionPrimaryInfo>());
295297

296298
/// Append to the specified `out` a newly created cluster node

0 commit comments

Comments
 (0)