|
26 | 26 | import java.util.Set; |
27 | 27 | import java.util.concurrent.Callable; |
28 | 28 | import java.util.concurrent.ExecutorService; |
| 29 | +import java.util.stream.Collectors; |
29 | 30 |
|
30 | 31 | import org.apache.helix.HelixDataAccessor; |
31 | 32 | import org.apache.helix.HelixDefinedState; |
|
57 | 58 | * Compares the currentState, pendingState with IdealState and generate messages |
58 | 59 | */ |
59 | 60 | public class MessageGenerationPhase extends AbstractBaseStage { |
60 | | - private final static String NO_DESIRED_STATE = "NoDesiredState"; |
| 61 | + private static final String NO_DESIRED_STATE = "NoDesiredState"; |
61 | 62 |
|
62 | 63 | // If we see there is any invalid pending message leaving on host, i.e. message |
63 | 64 | // tells participant to change from SLAVE to MASTER, and the participant is already |
64 | 65 | // at MASTER state, we wait for timeout and if the message is still not cleaned up by |
65 | 66 | // participant, controller will cleanup them proactively to unblock further state |
66 | 67 | // transition |
67 | | - public final static long DEFAULT_OBSELETE_MSG_PURGE_DELAY = HelixUtil |
| 68 | + public static final long DEFAULT_OBSELETE_MSG_PURGE_DELAY = HelixUtil |
68 | 69 | .getSystemPropertyAsLong(SystemPropertyKeys.CONTROLLER_MESSAGE_PURGE_DELAY, 60 * 1000); |
69 | | - private final static String PENDING_MESSAGE = "pending message"; |
70 | | - private final static String STALE_MESSAGE = "stale message"; |
| 70 | + private static final String PENDING_MESSAGE = "pending message"; |
| 71 | + private static final String STALE_MESSAGE = "stale message"; |
| 72 | + private static final String OFFLINE = "OFFLINE"; |
71 | 73 |
|
72 | 74 | private static Logger logger = LoggerFactory.getLogger(MessageGenerationPhase.class); |
73 | 75 |
|
@@ -163,6 +165,18 @@ private void generateMessage(final Resource resource, final BaseControllerDataPr |
163 | 165 | // desired-state->list of generated-messages |
164 | 166 | Map<String, List<Message>> messageMap = new HashMap<>(); |
165 | 167 |
|
| 168 | + /* |
| 169 | + * Calculate the current active replica count based on state model type. |
| 170 | + * This represents the number of replicas currently serving traffic for this partition |
| 171 | + * Active replicas include: top states, secondary top states(excluding OFFLINE) and ERROR |
| 172 | + * states. |
| 173 | + * Active replicas exclude: OFFLINE and DROPPED states. |
| 174 | + * All qualifying state transitions for this partition will receive this same value, |
| 175 | + * allowing clients to understand the current availability level and prioritize accordingly. |
| 176 | + */ |
| 177 | + int currentActiveReplicaCount = |
| 178 | + calculateCurrentActiveReplicaCount(currentStateMap, stateModelDef); |
| 179 | + |
166 | 180 | for (String instanceName : instanceStateMap.keySet()) { |
167 | 181 |
|
168 | 182 | Set<Message> staleMessages = cache.getStaleMessagesByInstance(instanceName); |
@@ -250,17 +264,39 @@ private void generateMessage(final Resource resource, final BaseControllerDataPr |
250 | 264 | pendingMessage, manager, resource, partition, sessionIdMap, instanceName, |
251 | 265 | stateModelDef, cancellationMessage, isCancellationEnabled); |
252 | 266 | } else { |
| 267 | + // Set currentActiveReplicaNumber to provide metadata for potential message |
| 268 | + // prioritization by participant. |
| 269 | + // Assign the current active replica count to all qualifying upward transitions for this |
| 270 | + // partition. |
| 271 | + // This ensures consistent prioritization metadata across concurrent state transitions. |
| 272 | + // -1 indicates no prioritization metadata, for eg:Downward ST messages get a -1. |
| 273 | + int currentActiveReplicaNumber = -1; |
| 274 | + |
| 275 | + /* |
| 276 | + * Assign currentActiveReplicaNumber for qualifying upward state transitions. |
| 277 | + * Criteria for assignment: |
| 278 | + * - Must be an upward state transition according to state model |
| 279 | + * - Target state must be considered active (according to state model type) |
| 280 | + */ |
| 281 | + if (stateModelDef.isUpwardStateTransition(currentState, nextState) |
| 282 | + && isStateActive(nextState, stateModelDef)) { |
| 283 | + |
| 284 | + // All qualifying transitions for this partition get the same |
| 285 | + // currentActiveReplicaNumber |
| 286 | + currentActiveReplicaNumber = currentActiveReplicaCount; |
| 287 | + } |
| 288 | + |
253 | 289 | // Create new state transition message |
254 | | - message = MessageUtil |
255 | | - .createStateTransitionMessage(manager.getInstanceName(), manager.getSessionId(), |
256 | | - resource, partition.getPartitionName(), instanceName, currentState, nextState, |
257 | | - sessionIdMap.get(instanceName), stateModelDef.getId()); |
| 290 | + message = MessageUtil.createStateTransitionMessage(manager.getInstanceName(), |
| 291 | + manager.getSessionId(), resource, partition.getPartitionName(), instanceName, |
| 292 | + currentState, nextState, sessionIdMap.get(instanceName), stateModelDef.getId(), |
| 293 | + currentActiveReplicaNumber); |
258 | 294 |
|
259 | 295 | if (logger.isDebugEnabled()) { |
260 | 296 | LogUtil.logDebug(logger, _eventId, String.format( |
261 | | - "Resource %s partition %s for instance %s with currentState %s and nextState %s", |
| 297 | + "Resource %s partition %s for instance %s with currentState %s, nextState %s and currentActiveReplicaNumber %d", |
262 | 298 | resource.getResourceName(), partition.getPartitionName(), instanceName, |
263 | | - currentState, nextState)); |
| 299 | + currentState, nextState, currentActiveReplicaNumber)); |
264 | 300 | } |
265 | 301 | } |
266 | 302 | } |
@@ -290,7 +326,66 @@ private void generateMessage(final Resource resource, final BaseControllerDataPr |
290 | 326 | } // end of for-each-partition |
291 | 327 | } |
292 | 328 |
|
293 | | - private boolean shouldCreateSTCancellation(Message pendingMessage, String desiredState, |
| 329 | + /** |
| 330 | + * Calculate the current active replica count based on state model type. |
| 331 | + * The count includes replicas in top states, secondary top states (excluding OFFLINE), |
| 332 | + * and ERROR states since helix considers them active.Count excludes OFFLINE and DROPPED states. |
| 333 | + * @param currentStateMap |
| 334 | + * @param stateModelDef |
| 335 | + * @return The number of replicas currently in active states, used to determine the |
| 336 | + * currentActiveReplicaNumber for the partition. |
| 337 | + */ |
| 338 | + private int calculateCurrentActiveReplicaCount(Map<String, String> currentStateMap, |
| 339 | + StateModelDefinition stateModelDef) { |
| 340 | + return (int) currentStateMap.values().stream() |
| 341 | + .filter(state -> stateModelDef.getTopState().contains(state) // Top states (MASTER, ONLINE, |
| 342 | + // LEADER) |
| 343 | + || getActiveSecondaryTopStates(stateModelDef).contains(state) // Active secondary states |
| 344 | + // (SLAVE, STANDBY, |
| 345 | + // BOOTSTRAP) |
| 346 | + || HelixDefinedState.ERROR.name().equals(state) // ERROR states (still considered |
| 347 | + // active) |
| 348 | + // DROPPED and OFFLINE are automatically excluded by getActiveSecondaryTopStates() |
| 349 | + ).count(); |
| 350 | + } |
| 351 | + |
| 352 | + /** |
| 353 | + * Get active secondary top states - states that are not non-serving states like OFFLINE and |
| 354 | + * DROPPED. |
| 355 | + * Reasons for elimination: |
| 356 | + * - getSecondTopStates() can include OFFLINE as a secondary top state in some state models. |
| 357 | + * Example - OnlineOffline: |
| 358 | + * getSecondTopStates() = ["OFFLINE"] as it transitions to ONLINE. |
| 359 | + * After filtering: activeSecondaryTopStates=[] (removes "OFFLINE" as it's not a serving state). |
| 360 | + * @param stateModelDef |
| 361 | + */ |
| 362 | + private List<String> getActiveSecondaryTopStates(StateModelDefinition stateModelDef) { |
| 363 | + return stateModelDef.getSecondTopStates().stream() |
| 364 | + // Remove non-serving states |
| 365 | + .filter(state -> !OFFLINE.equals(state) && !HelixDefinedState.DROPPED.name().equals(state)) |
| 366 | + .collect(Collectors.toList()); |
| 367 | + } |
| 368 | + |
| 369 | + /** |
| 370 | + * Determines if the given state is considered active based on the state model type. |
| 371 | + * Active states include: top states, active secondary top states (excluding OFFLINE), |
| 372 | + * and ERROR states. Active states exclude OFFLINE and DROPPED states. |
| 373 | + * ERROR state replicas are always considered active in HELIX as they do not |
| 374 | + * affect availability. |
| 375 | + * @param state |
| 376 | + * @param stateModelDef |
| 377 | + * @return true if the state is considered active, false otherwise |
| 378 | + */ |
| 379 | + private boolean isStateActive(String state, StateModelDefinition stateModelDef) { |
| 380 | + // ERROR state is always considered active regardless of state model type |
| 381 | + if (HelixDefinedState.ERROR.name().equals(state)) { |
| 382 | + return true; |
| 383 | + } |
| 384 | + return stateModelDef.getTopState().contains(state) |
| 385 | + || getActiveSecondaryTopStates(stateModelDef).contains(state); |
| 386 | + } |
| 387 | + |
| 388 | + private boolean shouldCreateSTCancellation(Message pendingMessage, String desiredState, |
294 | 389 | String initialState) { |
295 | 390 | if (pendingMessage == null) { |
296 | 391 | return false; |
|
0 commit comments