Skip to content

Commit 13fb462

Browse files
committed
ws solution A
1 parent d88739c commit 13fb462

File tree

6 files changed

+96
-12
lines changed

6 files changed

+96
-12
lines changed

api/v2/cassandracluster_types.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ var (
5555
ClusterPhaseRunning = ClusterStateInfo{2, "Running"}
5656
ClusterPhasePending = ClusterStateInfo{3, "Pending"}
5757

58+
//Indicates whether all racks has at least one node Ready
59+
ClusterFirstLayerInitial = ClusterStateInfo{1, "Initializing"}
60+
ClusterFirstLayerRunning = ClusterStateInfo{2, "Running"}
61+
5862
//Available actions
5963
ActionUpdateConfigMap = ClusterStateInfo{1, "UpdateConfigMap"}
6064
ActionUpdateDockerImage = ClusterStateInfo{2, "UpdateDockerImage"}
@@ -179,6 +183,10 @@ func (cc *CassandraCluster) SetDefaults() bool {
179183
}
180184
changed = true
181185
}
186+
if len(cc.Status.FirstLayerPhase) == 0 {
187+
cc.Status.FirstLayerPhase = ClusterFirstLayerInitial.Name
188+
changed = true
189+
}
182190
if ccs.MaxPodUnavailable == 0 {
183191
ccs.MaxPodUnavailable = defaultMaxPodUnavailable
184192
changed = true
@@ -306,7 +314,8 @@ func (cc *CassandraCluster) initTopology(dcName string, rackName string) {
306314
func (cc *CassandraCluster) InitCassandraRackStatus(status *CassandraClusterStatus, dcName string, rackName string) {
307315
dcRackName := cc.GetDCRackName(dcName, rackName)
308316
rackStatus := CassandraRackStatus{
309-
Phase: ClusterPhaseInitial.Name,
317+
Phase: ClusterPhaseInitial.Name,
318+
FirstLayerPhase: ClusterFirstLayerInitial.Name,
310319
CassandraLastAction: CassandraLastAction{
311320
Name: ClusterPhaseInitial.Name,
312321
Status: StatusOngoing,
@@ -655,6 +664,10 @@ func (rack *RackSlice) Remove(idx int) {
655664
*rack = append((*rack)[:idx], (*rack)[idx+1:]...)
656665
}
657666

667+
func (in *CassandraClusterStatus) IsFirstLayerDuringInitialization() bool {
668+
return in.FirstLayerPhase == ClusterFirstLayerInitial.Name
669+
}
670+
658671
// CassandraClusterSpec defines the configuration of CassandraCluster
659672

660673
type CassandraClusterSpec struct {
@@ -938,6 +951,12 @@ type CassandraRackStatus struct {
938951
// Initial -> Running <-> updating
939952
Phase string `json:"phase,omitempty"`
940953

954+
// FirstLayerPhase indicates whether the rack has at least one node Ready so further initial scale-out might be allowed
955+
// Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0
956+
// FirstLayerPhase goes as one way as below:
957+
// Initial -> Running
958+
FirstLayerPhase string `json:"firstLayerPhase,omitempty"`
959+
941960
// CassandraLastAction is the set of Cassandra State & Actions: Active, Standby..
942961
CassandraLastAction CassandraLastAction `json:"cassandraLastAction,omitempty"`
943962

@@ -952,6 +971,12 @@ type CassandraClusterStatus struct {
952971
// Initial -> Running <-> updating
953972
Phase string `json:"phase,omitempty"`
954973

974+
// FirstLayerPhase indicates whether all racks has at least one node Ready so further initial scale-out might be allowed
975+
// Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0
976+
// FirstLayerPhase goes as one way as below:
977+
// Initial -> Running
978+
FirstLayerPhase string `json:"firstLayerPhase,omitempty"`
979+
955980
// Store last action at cluster level
956981
LastClusterAction string `json:"lastClusterAction,omitempty"`
957982
LastClusterActionStatus string `json:"lastClusterActionStatus,omitempty"`

charts/casskop/crds/db.orange.com_cassandraclusters.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2270,6 +2270,9 @@ spec:
22702270
type: array
22712271
items:
22722272
type: string
2273+
firstLayerPhase:
2274+
description: 'FirstLayerPhase indicates whether the rack has at least one node Ready so further initial scale-out might be allowed Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0 FirstLayerPhase goes as one way as below: Initial -> Running'
2275+
type: string
22732276
phase:
22742277
description: |-
22752278
Phase indicates the state this Cassandra cluster jumps in.
@@ -2313,6 +2316,9 @@ spec:
23132316
type: string
23142317
lastClusterActionStatus:
23152318
type: string
2319+
firstLayerPhase:
2320+
description: 'FirstLayerPhase indicates whether all racks has at least one node Ready so further initial scale-out might be allowed Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0 FirstLayerPhase goes as one way as below: Initial -> Running'
2321+
type: string
23162322
phase:
23172323
description: |-
23182324
Phase indicates the state this Cassandra cluster jumps in.

config/crd/bases/db.orange.com_cassandraclusters.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2270,6 +2270,9 @@ spec:
22702270
type: array
22712271
items:
22722272
type: string
2273+
firstLayerPhase:
2274+
description: 'FirstLayerPhase indicates whether the rack has at least one node Ready so further initial scale-out might be allowed Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0 FirstLayerPhase goes as one way as below: Initial -> Running'
2275+
type: string
22732276
phase:
22742277
description: |-
22752278
Phase indicates the state this Cassandra cluster jumps in.
@@ -2308,6 +2311,9 @@ spec:
23082311
format: date-time
23092312
status:
23102313
type: string
2314+
firstLayerPhase:
2315+
description: 'FirstLayerPhase indicates whether all racks has at least one node Ready so further initial scale-out might be allowed Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0 FirstLayerPhase goes as one way as below: Initial -> Running'
2316+
type: string
23112317
lastClusterAction:
23122318
description: Store last action at cluster level
23132319
type: string

controllers/cassandracluster/cassandra_status.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -457,20 +457,35 @@ func (rcc *CassandraClusterReconciler) UpdateCassandraRackStatusPhase(ctx contex
457457
if err != nil || len(podsList.Items) < 1 {
458458
return
459459
}
460-
if len(podsList.Items) < int(nodesPerRacks) {
461-
logrus.WithFields(logrusFields).Infof("StatefulSet is scaling up")
462-
}
463-
pod := podsList.Items[nodesPerRacks-1]
464-
if cassandraPodIsReady(&pod) {
465-
status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name
466-
ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name)
467-
now := metav1.Now()
468-
lastAction.EndTime = &now
469-
lastAction.Status = api.StatusDone
470-
logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is okay")
460+
if cc.Status.IsFirstLayerDuringInitialization() {
461+
if len(podsList.Items) >= 1 {
462+
//TODO: maybe add cassandraPodIsReady() all over pod0
463+
status.CassandraRackStatus[dcRackName].FirstLayerPhase = api.ClusterFirstLayerRunning.Name
464+
logrus.WithFields(logrusFields).Infof("StatefulSet: first layer replicas count is okay")
465+
}
466+
} else {
467+
if len(podsList.Items) < int(nodesPerRacks) {
468+
logrus.WithFields(logrusFields).Infof("StatefulSet is scaling up")
469+
return
470+
}
471+
//TODO: range issue possible?
472+
pod := podsList.Items[nodesPerRacks-1]
473+
if cassandraPodIsReady(&pod) {
474+
status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name
475+
ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name)
476+
now := metav1.Now()
477+
lastAction.EndTime = &now
478+
lastAction.Status = api.StatusDone
479+
logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is okay")
480+
}
471481
}
472482
}
473483

484+
if cc.Status.IsFirstLayerDuringInitialization() {
485+
logrus.WithFields(logrusFields).Infof("StatefulSet: do not change rack phase while first layer is initializing")
486+
return
487+
}
488+
474489
//No more in Initializing state
475490
if isStatefulSetNotReady(storedStatefulSet) {
476491
logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is not okay")

controllers/cassandracluster/generator.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,10 @@ func generateCassandraStatefulSet(cc *api.CassandraCluster, status *api.Cassandr
347347

348348
nodeAffinity := createNodeAffinity(nodeSelector)
349349
nodesPerRacks := cc.GetNodesPerRacks(dcRackName)
350+
if cc.Status.IsFirstLayerDuringInitialization() {
351+
nodesPerRacks = 1
352+
}
353+
350354
rollingPartition := cc.GetRollingPartitionPerRacks(dcRackName)
351355
terminationPeriod := int64(api.DefaultTerminationGracePeriodSeconds)
352356
var annotations = map[string]string{}

controllers/cassandracluster/reconcile.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,14 @@ func (rcc *CassandraClusterReconciler) ReconcileRack(ctx context.Context, cc *ap
542542
return nil
543543
}
544544

545+
// in first layer (pre-initial) phase move to next rack as soon as current rack has 1 ready replica
546+
if status.IsFirstLayerDuringInitialization() {
547+
dcRackStatus := status.CassandraRackStatus[dcRackName]
548+
if dcRackStatus.FirstLayerPhase == api.ClusterFirstLayerRunning.Name {
549+
continue
550+
}
551+
}
552+
545553
//If the Phase is not running then we won't check on Next Racks so we return
546554
//We don't want to make any changes in 2 racks at the same time
547555
if dcRackStatus.Phase != api.ClusterPhaseRunning.Name ||
@@ -556,6 +564,26 @@ func (rcc *CassandraClusterReconciler) ReconcileRack(ctx context.Context, cc *ap
556564

557565
}
558566

567+
if status.IsFirstLayerDuringInitialization() {
568+
allReady := true
569+
for dc := 0; dc < cc.GetDCSize(); dc++ {
570+
dcName := cc.GetDCName(dc)
571+
for rack := 0; rack < cc.GetRackSize(dc); rack++ {
572+
rackName := cc.GetRackName(dc, rack)
573+
dcRackName := cc.GetDCRackName(dcName, rackName)
574+
dcRackStatus := status.CassandraRackStatus[dcRackName]
575+
if dcRackStatus.FirstLayerPhase != api.ClusterFirstLayerRunning.Name {
576+
allReady = false
577+
break
578+
}
579+
}
580+
}
581+
if allReady {
582+
logrus.WithFields(logrus.Fields{"cluster": cc.Name}).Info("FirstLayer is now Ready on all racks")
583+
status.FirstLayerPhase = api.ClusterFirstLayerRunning.Name
584+
}
585+
}
586+
559587
if newStatus {
560588
return nil
561589
}

0 commit comments

Comments
 (0)