Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion api/v2/cassandracluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@
ClusterPhaseRunning = ClusterStateInfo{2, "Running"}
ClusterPhasePending = ClusterStateInfo{3, "Pending"}

//Indicates whether all racks has at least one node Ready
ClusterFirstLayerInitial = ClusterStateInfo{1, "Initializing"}
ClusterFirstLayerRunning = ClusterStateInfo{2, "Running"}

//Available actions
ActionUpdateConfigMap = ClusterStateInfo{1, "UpdateConfigMap"}
ActionUpdateDockerImage = ClusterStateInfo{2, "UpdateDockerImage"}
Expand Down Expand Up @@ -172,13 +176,17 @@
if len(cc.Status.Phase) == 0 {
cc.Status.Phase = ClusterPhaseInitial.Name
if cc.InitCassandraRackList() < 1 {
logrus.Errorf("[%s]: We should have at list One Rack, Please correct the Error", cc.Name)

Check failure on line 179 in api/v2/cassandracluster_types.go

View workflow job for this annotation

GitHub Actions / lint

cc.Name undefined (type *CassandraCluster has no field or method Name) (typecheck)
}
if cc.Status.SeedList == nil {
cc.Status.SeedList = cc.InitSeedList()
}
changed = true
}
if len(cc.Status.FirstLayerPhase) == 0 {
cc.Status.FirstLayerPhase = ClusterFirstLayerInitial.Name
changed = true
}
if ccs.MaxPodUnavailable == 0 {
ccs.MaxPodUnavailable = defaultMaxPodUnavailable
changed = true
Expand Down Expand Up @@ -306,7 +314,8 @@
func (cc *CassandraCluster) InitCassandraRackStatus(status *CassandraClusterStatus, dcName string, rackName string) {
dcRackName := cc.GetDCRackName(dcName, rackName)
rackStatus := CassandraRackStatus{
Phase: ClusterPhaseInitial.Name,
Phase: ClusterPhaseInitial.Name,
FirstLayerPhase: ClusterFirstLayerInitial.Name,
CassandraLastAction: CassandraLastAction{
Name: ClusterPhaseInitial.Name,
Status: StatusOngoing,
Expand Down Expand Up @@ -655,6 +664,10 @@
*rack = append((*rack)[:idx], (*rack)[idx+1:]...)
}

func (in *CassandraClusterStatus) IsFirstLayerDuringInitialization() bool {
return in.FirstLayerPhase == ClusterFirstLayerInitial.Name
}

// CassandraClusterSpec defines the configuration of CassandraCluster

type CassandraClusterSpec struct {
Expand Down Expand Up @@ -938,6 +951,12 @@
// Initial -> Running <-> updating
Phase string `json:"phase,omitempty"`

// FirstLayerPhase indicates whether the rack has at least one node Ready so further initial scale-out might be allowed
// Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0
// FirstLayerPhase goes as one way as below:
// Initial -> Running
FirstLayerPhase string `json:"firstLayerPhase,omitempty"`

// CassandraLastAction is the set of Cassandra State & Actions: Active, Standby..
CassandraLastAction CassandraLastAction `json:"cassandraLastAction,omitempty"`

Expand All @@ -952,6 +971,12 @@
// Initial -> Running <-> updating
Phase string `json:"phase,omitempty"`

// FirstLayerPhase indicates whether all racks has at least one node Ready so further initial scale-out might be allowed
// Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0
// FirstLayerPhase goes as one way as below:
// Initial -> Running
FirstLayerPhase string `json:"firstLayerPhase,omitempty"`

// Store last action at cluster level
LastClusterAction string `json:"lastClusterAction,omitempty"`
LastClusterActionStatus string `json:"lastClusterActionStatus,omitempty"`
Expand Down
6 changes: 6 additions & 0 deletions charts/casskop/crds/db.orange.com_cassandraclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2270,6 +2270,9 @@ spec:
type: array
items:
type: string
firstLayerPhase:
description: 'FirstLayerPhase indicates whether the rack has at least one node Ready so further initial scale-out might be allowed Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0 FirstLayerPhase goes as one way as below: Initial -> Running'
type: string
phase:
description: |-
Phase indicates the state this Cassandra cluster jumps in.
Expand Down Expand Up @@ -2313,6 +2316,9 @@ spec:
type: string
lastClusterActionStatus:
type: string
firstLayerPhase:
description: 'FirstLayerPhase indicates whether all racks has at least one node Ready so further initial scale-out might be allowed Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0 FirstLayerPhase goes as one way as below: Initial -> Running'
type: string
phase:
description: |-
Phase indicates the state this Cassandra cluster jumps in.
Expand Down
6 changes: 6 additions & 0 deletions config/crd/bases/db.orange.com_cassandraclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2270,6 +2270,9 @@ spec:
type: array
items:
type: string
firstLayerPhase:
description: 'FirstLayerPhase indicates whether the rack has at least one node Ready so further initial scale-out might be allowed Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0 FirstLayerPhase goes as one way as below: Initial -> Running'
type: string
phase:
description: |-
Phase indicates the state this Cassandra cluster jumps in.
Expand Down Expand Up @@ -2308,6 +2311,9 @@ spec:
format: date-time
status:
type: string
firstLayerPhase:
description: 'FirstLayerPhase indicates whether all racks has at least one node Ready so further initial scale-out might be allowed Needed to correctly handle `allocate_tokens_for_local_replication_factor` introduced in Cassandra 4.0 FirstLayerPhase goes as one way as below: Initial -> Running'
type: string
lastClusterAction:
description: Store last action at cluster level
type: string
Expand Down
37 changes: 26 additions & 11 deletions controllers/cassandracluster/cassandra_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,20 +457,35 @@ func (rcc *CassandraClusterReconciler) UpdateCassandraRackStatusPhase(ctx contex
if err != nil || len(podsList.Items) < 1 {
return
}
if len(podsList.Items) < int(nodesPerRacks) {
logrus.WithFields(logrusFields).Infof("StatefulSet is scaling up")
}
pod := podsList.Items[nodesPerRacks-1]
if cassandraPodIsReady(&pod) {
status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name
ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name)
now := metav1.Now()
lastAction.EndTime = &now
lastAction.Status = api.StatusDone
logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is okay")
if cc.Status.IsFirstLayerDuringInitialization() {
if len(podsList.Items) >= 1 {
//TODO: maybe add cassandraPodIsReady() all over pod0
status.CassandraRackStatus[dcRackName].FirstLayerPhase = api.ClusterFirstLayerRunning.Name
logrus.WithFields(logrusFields).Infof("StatefulSet: first layer replicas count is okay")
}
} else {
if len(podsList.Items) < int(nodesPerRacks) {
logrus.WithFields(logrusFields).Infof("StatefulSet is scaling up")
return
}
//TODO: range issue possible?
pod := podsList.Items[nodesPerRacks-1]
if cassandraPodIsReady(&pod) {
status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name
ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name)
now := metav1.Now()
lastAction.EndTime = &now
lastAction.Status = api.StatusDone
logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is okay")
}
}
}

if cc.Status.IsFirstLayerDuringInitialization() {
logrus.WithFields(logrusFields).Infof("StatefulSet: do not change rack phase while first layer is initializing")
return
}

//No more in Initializing state
if isStatefulSetNotReady(storedStatefulSet) {
logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is not okay")
Expand Down
4 changes: 4 additions & 0 deletions controllers/cassandracluster/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,10 @@ func generateCassandraStatefulSet(cc *api.CassandraCluster, status *api.Cassandr

nodeAffinity := createNodeAffinity(nodeSelector)
nodesPerRacks := cc.GetNodesPerRacks(dcRackName)
if cc.Status.IsFirstLayerDuringInitialization() {
nodesPerRacks = 1
}

rollingPartition := cc.GetRollingPartitionPerRacks(dcRackName)
terminationPeriod := int64(api.DefaultTerminationGracePeriodSeconds)
var annotations = map[string]string{}
Expand Down
28 changes: 28 additions & 0 deletions controllers/cassandracluster/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,14 @@ func (rcc *CassandraClusterReconciler) ReconcileRack(ctx context.Context, cc *ap
return nil
}

// in first layer (pre-initial) phase move to next rack as soon as current rack has 1 ready replica
if status.IsFirstLayerDuringInitialization() {
dcRackStatus := status.CassandraRackStatus[dcRackName]
if dcRackStatus.FirstLayerPhase == api.ClusterFirstLayerRunning.Name {
continue
}
}

//If the Phase is not running then we won't check on Next Racks so we return
//We don't want to make any changes in 2 racks at the same time
if dcRackStatus.Phase != api.ClusterPhaseRunning.Name ||
Expand All @@ -556,6 +564,26 @@ func (rcc *CassandraClusterReconciler) ReconcileRack(ctx context.Context, cc *ap

}

if status.IsFirstLayerDuringInitialization() {
allReady := true
for dc := 0; dc < cc.GetDCSize(); dc++ {
dcName := cc.GetDCName(dc)
for rack := 0; rack < cc.GetRackSize(dc); rack++ {
rackName := cc.GetRackName(dc, rack)
dcRackName := cc.GetDCRackName(dcName, rackName)
dcRackStatus := status.CassandraRackStatus[dcRackName]
if dcRackStatus.FirstLayerPhase != api.ClusterFirstLayerRunning.Name {
allReady = false
break
}
}
}
if allReady {
logrus.WithFields(logrus.Fields{"cluster": cc.Name}).Info("FirstLayer is now Ready on all racks")
status.FirstLayerPhase = api.ClusterFirstLayerRunning.Name
}
}

if newStatus {
return nil
}
Expand Down
Loading