Skip to content

Commit 7599a24

Browse files
authored
Fix issue with rebalancing never happens with auto-sharding (#531)
* Fix issue with rebalancing never happens with auto-sharding * Add test for rebalancing scheme generation to make sure it works fine * Fix various critical issues in auto sharding rebalancing logic * Fix sharding rebalancing: recreate distributed table not just drop * Fix issue with cluster script * Update schema in proper time to fix issue with 5 nodes sharding rebalancing * Remove wrong added script * Remove debugging information * Fix: activate auto sharding on new node appeared in the cluster * feat(sharding): add shard creation for new nodes during rebalance - Log distinct messages for rebalancing due to inactive vs new nodes - Detect new nodes joining cluster and create missing shard tables - Set up replication from existing nodes to new nodes for new shards - Improve shard map initialization to avoid undefined keys * feat(sharding): skip rebalancing when replication factor is 1 - Prevent data movement and replication setup if original RF=1 to avoid data loss - Only add new nodes without redistributing shards when RF=1 - Perform full rebalancing and replication setup only if RF > 1 - Add method to calculate original replication factor from schema - Update rebalance logic and shard creation to respect RF for safe operations * Fix test * Fix cognitive complexity
1 parent 15ed357 commit 7599a24

File tree

5 files changed

+439
-42
lines changed

5 files changed

+439
-42
lines changed

bin/cluster

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ if ((nodes < 2)); then
1010
echo "Number of nodes must be greater than 1"
1111
exit 1
1212
fi
13-
if ((nodes > 4)); then
14-
echo "Number of nodes must be less than or equal to 555"
13+
if ((nodes > 5)); then
14+
echo "Number of nodes must be less than or equal to 5"
1515
exit 1
1616
fi
1717

src/Plugin/Sharding/Operator.php

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,9 @@ public function checkMaster(): static {
100100
public function checkBalance(): static {
101101
$cluster = $this->getCluster();
102102
$queue = $this->getQueue();
103+
// We get inactive nodes to exclude them from the rebalance in case of outages
104+
// It may be an empty list if we're adding a new node to the cluster, which is fine
103105
$inactiveNodes = $cluster->getInactiveNodes();
104-
// Do rebalance in case we have inactive nodes
105-
if ($inactiveNodes->isEmpty()) {
106-
return $this;
107-
}
108106

109107
// Do exclude repeated rebalancing we take hash of active nodes
110108
// and if the same, we do nothing, otherwise, it shows the change
@@ -115,7 +113,12 @@ public function checkBalance(): static {
115113
if ($clusterHash === $currentHash) {
116114
return $this;
117115
}
118-
Buddy::info("Rebalancing due to inactive nodes: {$inactiveNodes->join(', ')}");
116+
117+
if ($inactiveNodes->count() > 0) {
118+
Buddy::info("Rebalancing due to inactive nodes: {$inactiveNodes->join(', ')}");
119+
} else {
120+
Buddy::info('Rebalancing due to new nodes joined');
121+
}
119122

120123
// Get all tables from the state we have
121124
$list = $this->state->listRegex('table:.+');

src/Plugin/Sharding/Table.php

Lines changed: 151 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,19 @@ public function getConnectedNodes(Set $shards): Set {
113113
* @return Vector<array{node:string,shards:Set<int>}>
114114
*/
115115
public function getExternalNodeShards(Set $shards): Vector {
116+
// There is a case when we have ONLY distributed table with shards
117+
// but all shards are remote agents and nothing locally
118+
$shardsCond = $shards->count() > 0
119+
? "AND ANY(shards) not in ({$shards->join(',')})"
120+
: '';
121+
116122
$query = "
117123
SELECT node, shards FROM {$this->table}
118124
WHERE
119125
cluster = '{$this->cluster->name}'
120126
AND
121127
table = '{$this->name}'
122-
AND
123-
ANY(shards) not in ({$shards->join(',')})
128+
$shardsCond
124129
ORDER BY id ASC
125130
";
126131

@@ -192,27 +197,21 @@ public function shard(
192197
$nodes = new Set;
193198

194199
$schema = $this->configureNodeShards($shardCount, $replicationFactor);
195-
$reduceFn = function (Map $clusterMap, array $row) use ($queue, $replicationFactor, &$nodes, &$nodeShardsMap) {
200+
$reduceFn = function (Map $clusterMap, array $row) use ($queue, &$nodes, &$nodeShardsMap) {
196201
/** @var Map<string,Cluster> $clusterMap */
197202
$nodes->add($row['node']);
198203
$nodeShardsMap[$row['node']] = $row['shards'];
199204

200205
foreach ($row['shards'] as $shard) {
201206
$connectedNodes = $this->getConnectedNodes(new Set([$shard]));
202207

203-
if ($replicationFactor > 1) {
204-
$clusterMap = $this->handleReplication(
205-
$row['node'],
206-
$queue,
207-
$connectedNodes,
208-
$clusterMap,
209-
$shard
210-
);
211-
} else {
212-
// If no replication, add create table shard SQL to queue
213-
$sql = $this->getCreateTableShardSQL($shard);
214-
$queue->add($row['node'], $sql);
215-
}
208+
$clusterMap = $this->handleReplication(
209+
$row['node'],
210+
$queue,
211+
$connectedNodes,
212+
$clusterMap,
213+
$shard
214+
);
216215
}
217216

218217
return $clusterMap;
@@ -227,10 +226,7 @@ public function shard(
227226
/** @var Set<int> */
228227
$queueIds = new Set;
229228
foreach ($nodeShardsMap as $node => $shards) {
230-
// Do nothing when no shards present for this node
231-
if (!$shards->count()) {
232-
continue;
233-
}
229+
// Even when no shards, we still create distributed table
234230
$sql = $this->getCreateShardedTableSQL($shards);
235231
$queueId = $queue->add($node, $sql);
236232
$queueIds->add($queueId);
@@ -311,6 +307,13 @@ protected function handleReplication(
311307
Map $clusterMap,
312308
int $shard
313309
): Map {
310+
// If no replication, add create table shard SQL to queue
311+
if ($connectedNodes->count() === 1) {
312+
$sql = $this->getCreateTableShardSQL($shard);
313+
$queue->add($node, $sql);
314+
return $clusterMap;
315+
}
316+
314317
$clusterName = static::getClusterName($connectedNodes);
315318
$hasCluster = isset($clusterMap[$clusterName]);
316319
if ($hasCluster) {
@@ -354,9 +357,6 @@ public function rebalance(Queue $queue): void {
354357
$schema = $this->getShardSchema();
355358
$allNodes = $this->cluster->getNodes();
356359
$inactiveNodes = $this->cluster->getInactiveNodes();
357-
if (!$inactiveNodes->count()) {
358-
return;
359-
}
360360
$activeNodes = $allNodes->diff($inactiveNodes);
361361
$newSchema = Util::rebalanceShardingScheme($schema, $activeNodes);
362362

@@ -376,6 +376,7 @@ public function rebalance(Queue $queue): void {
376376
$clusterMap[$clusterName] = $cluster;
377377
}
378378

379+
// Get affected schema with nodes that are out
379380
$affectedSchema = $schema->filter(
380381
fn ($row) => $inactiveNodes->contains($row['node'])
381382
);
@@ -387,7 +388,7 @@ public function rebalance(Queue $queue): void {
387388
// First thing first, remove from inactive node using the queue
388389
$this->cleanUpNode($queue, $row['node'], $row['shards'], $processedTables);
389390

390-
// Do real rebaliance now
391+
// Do real rebalancing now
391392
foreach ($row['shards'] as $shard) {
392393
/** @var Set<string> */
393394
$nodesForShard = new Set;
@@ -438,20 +439,34 @@ public function rebalance(Queue $queue): void {
438439

439440
/** @var Set<int> */
440441
$queueIds = new Set;
442+
foreach ($clusterMap as $cluster) {
443+
$cluster->processPendingTables($queue);
444+
}
445+
446+
// Handle new nodes that need shard creation
447+
$originalNodes = new Set($schema->map(fn($row) => $row['node']));
448+
$newNodes = $activeNodes->diff($originalNodes);
449+
450+
451+
452+
if ($newNodes->count() > 0) {
453+
$this->handleShardCreationForRebalancing($queue, $schema, $newSchema, $clusterMap);
454+
}
455+
456+
// At this case we update schema
457+
// before creating distributed table
458+
$this->updateScheme($newSchema);
441459
foreach ($newSchema as $row) {
442-
$sql = "DROP TABLE {$this->name} OPTION force=1";
460+
// We should drop distributed table everywhere
461+
// even when node has ONLY it but may have no shards on it
462+
$sql = "DROP TABLE IF EXISTS {$this->name} OPTION force=1";
443463
$queueId = $queue->add($row['node'], $sql);
444464
$queueIds->add($queueId);
445-
// Do nothing when no shards present for this node
446-
if (!$row['shards']->count()) {
447-
continue;
448-
}
465+
449466
$sql = $this->getCreateShardedTableSQL($row['shards']);
450467
$queueId = $queue->add($row['node'], $sql);
451468
$queueIds->add($queueId);
452469
}
453-
454-
$this->updateScheme($newSchema);
455470
} catch (\Throwable $t) {
456471
var_dump($t->getMessage());
457472
}
@@ -659,10 +674,12 @@ protected function getCreateShardedTableSQL(Set $shards): string {
659674
$map = new Map;
660675
foreach ($nodes as $row) {
661676
foreach ($row['shards'] as $shard) {
662-
$map[$shard] ??= new Set;
677+
if (!$map->hasKey($shard)) {
678+
$map[$shard] = new Set();
679+
}
663680
$shardName = $this->getShardName($shard);
664-
// @phpstan-ignore-next-line
665-
$map[$shard]->add("{$row['node']}:{$shardName}");
681+
682+
$map[$shard]?->add("{$row['node']}:{$shardName}");
666683
}
667684
}
668685

@@ -738,6 +755,105 @@ public function setup(): void {
738755
* @return Set<int>
739756
*/
740757
protected static function parseShards(string $shards): Set {
741-
return new Set(array_map('intval', explode(',', $shards)));
758+
return trim($shards) !== ''
759+
? new Set(array_map('intval', explode(',', $shards)))
760+
: new Set
761+
;
762+
}
763+
764+
/**
765+
* Handle shard creation for rebalancing (all nodes that need new shards)
766+
*
767+
* SAFETY: Respects original replication factor - with RF=1, only creates shard tables
768+
* but does NOT set up replication to prevent data movement and potential data loss.
769+
*
770+
* @param Queue $queue
771+
* @param Vector<array{node:string,shards:Set<int>,connections:Set<string>}> $oldSchema
772+
* @param Vector<array{node:string,shards:Set<int>,connections:Set<string>}> $newSchema
773+
* @param Map<string,Cluster> $clusterMap
774+
* @return void
775+
*/
776+
protected function handleShardCreationForRebalancing(
777+
Queue $queue,
778+
Vector $oldSchema,
779+
Vector $newSchema,
780+
Map $clusterMap
781+
): void {
782+
// Calculate original replication factor to ensure safe operations
783+
$originalRf = $this->calculateReplicationFactor($oldSchema);
784+
785+
// Create map of old schema for comparison
786+
/** @var Map<string,Set<int>> */
787+
$oldShardMap = new Map();
788+
foreach ($oldSchema as $row) {
789+
$oldShardMap[$row['node']] = $row['shards'];
790+
}
791+
792+
foreach ($newSchema as $row) {
793+
$oldShards = $oldShardMap->get($row['node'], new Set());
794+
$newShards = $row['shards'];
795+
$shardsToCreate = $newShards->diff($oldShards);
796+
797+
if ($shardsToCreate->isEmpty()) {
798+
continue;
799+
}
800+
801+
// Create missing shard tables on this node
802+
foreach ($shardsToCreate as $shard) {
803+
$sql = $this->getCreateTableShardSQL($shard);
804+
$queue->add($row['node'], $sql);
805+
806+
// Find nodes that already have this shard in old schema for replication
807+
$existingNodesWithShard = $oldSchema->filter(
808+
fn($existingRow) => $existingRow['shards']->contains($shard)
809+
);
810+
811+
// If there are existing nodes with this shard, set up replication
812+
// BUT only if original RF > 1 (safe to replicate)
813+
if ($existingNodesWithShard->count() <= 0 || $originalRf === 1) {
814+
continue;
815+
}
816+
817+
$sourceNode = $existingNodesWithShard->first()['node'];
818+
$connectedNodes = new Set([$row['node'], $sourceNode]);
819+
820+
// Set up cluster replication for this shard
821+
$this->handleReplication(
822+
$sourceNode,
823+
$queue,
824+
$connectedNodes,
825+
$clusterMap,
826+
$shard
827+
);
828+
}
829+
}
830+
}
831+
832+
/**
833+
* Calculate the original replication factor from the schema
834+
* @param Vector<array{node:string,shards:Set<int>,connections:Set<string>}> $schema
835+
* @return int
836+
*/
837+
private function calculateReplicationFactor(Vector $schema): int {
838+
if ($schema->isEmpty()) {
839+
return 1;
840+
}
841+
842+
// Count how many nodes have each shard
843+
$shardCounts = new Map();
844+
845+
foreach ($schema as $row) {
846+
foreach ($row['shards'] as $shard) {
847+
$currentCount = $shardCounts->get($shard, 0);
848+
$shardCounts->put($shard, $currentCount + 1);
849+
}
850+
}
851+
852+
if ($shardCounts->isEmpty()) {
853+
return 1;
854+
}
855+
856+
// The replication factor is the maximum count of any shard
857+
return max($shardCounts->values()->toArray());
742858
}
743859
}

0 commit comments

Comments
 (0)