-
Notifications
You must be signed in to change notification settings - Fork 108
[server] Add DoL loopback to ensure new leader is fully caught up on VT #2314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,89 @@ | ||||||||||||||||
| package com.linkedin.davinci.kafka.consumer; | ||||||||||||||||
|
|
||||||||||||||||
| import com.linkedin.venice.pubsub.api.PubSubProduceResult; | ||||||||||||||||
| import java.util.concurrent.CompletableFuture; | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| /** | ||||||||||||||||
| * Tracks Declaration of Leadership (DoL) state during STANDBY to LEADER transition. | ||||||||||||||||
| * DoL mechanism ensures the new leader is fully caught up with VT before switching to remote VT or RT. | ||||||||||||||||
| */ | ||||||||||||||||
| public class DolStamp { | ||||||||||||||||
| private final long leadershipTerm; | ||||||||||||||||
| private final String hostId; | ||||||||||||||||
| private final long produceStartTimeMs; // Timestamp when DoL production started | ||||||||||||||||
| private volatile boolean dolProduced; // DoL message was acked by broker | ||||||||||||||||
| private volatile boolean dolConsumed; // DoL message was consumed back by this replica | ||||||||||||||||
| private volatile CompletableFuture<PubSubProduceResult> dolProduceFuture; // Future tracking DoL produce result | ||||||||||||||||
|
|
||||||||||||||||
| public DolStamp(long leadershipTerm, String hostId) { | ||||||||||||||||
| this.leadershipTerm = leadershipTerm; | ||||||||||||||||
| this.hostId = hostId; | ||||||||||||||||
| this.produceStartTimeMs = System.currentTimeMillis(); | ||||||||||||||||
| this.dolProduced = false; | ||||||||||||||||
| this.dolConsumed = false; | ||||||||||||||||
| this.dolProduceFuture = null; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public long getLeadershipTerm() { | ||||||||||||||||
| return leadershipTerm; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public String getHostId() { | ||||||||||||||||
| return hostId; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public boolean isDolProduced() { | ||||||||||||||||
| return dolProduced; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public void setDolProduced(boolean dolProduced) { | ||||||||||||||||
| this.dolProduced = dolProduced; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public boolean isDolConsumed() { | ||||||||||||||||
| return dolConsumed; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public void setDolConsumed(boolean dolConsumed) { | ||||||||||||||||
| this.dolConsumed = dolConsumed; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public CompletableFuture<PubSubProduceResult> getDolProduceFuture() { | ||||||||||||||||
| return dolProduceFuture; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public void setDolProduceFuture(CompletableFuture<PubSubProduceResult> dolProduceFuture) { | ||||||||||||||||
| this.dolProduceFuture = dolProduceFuture; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public boolean isReady() { | ||||||||||||||||
| return dolProduced && dolConsumed; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| public long getProduceStartTimeMs() { | ||||||||||||||||
| return produceStartTimeMs; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| /** | ||||||||||||||||
| * Calculate latency from DoL production start to now. | ||||||||||||||||
| * @return latency in milliseconds | ||||||||||||||||
| */ | ||||||||||||||||
| public long getLatencyMs() { | ||||||||||||||||
| return System.currentTimeMillis() - produceStartTimeMs; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| @Override | ||||||||||||||||
| public String toString() { | ||||||||||||||||
| String produceResult = ""; | ||||||||||||||||
| if (dolProduceFuture != null && dolProduceFuture.isDone()) { | ||||||||||||||||
| try { | ||||||||||||||||
| produceResult = ", offset=" + dolProduceFuture.get().getPubSubPosition(); | ||||||||||||||||
| } catch (Exception e) { | ||||||||||||||||
| // Ignore, keep empty | ||||||||||||||||
|
Comment on lines
+80
to
+83
|
||||||||||||||||
| try { | |
| produceResult = ", offset=" + dolProduceFuture.get().getPubSubPosition(); | |
| } catch (Exception e) { | |
| // Ignore, keep empty | |
| PubSubProduceResult result = dolProduceFuture.getNow(null); | |
| if (result != null) { | |
| produceResult = ", offset=" + result.getPubSubPosition(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
isReady()method reads two volatile boolean fields (dolProducedanddolConsumed) without synchronization. While each field read is atomic, the combined read is not atomic, which could lead to a race condition where the state appears inconsistent. For example, between readingdolProducedanddolConsumed, another thread could update one of these values.Consider either:
This is especially important since this method is used to determine when to switch the leader topic, which is a critical operation.