Skip to content

Commit 03d399a

Browse files
committed
Addressed more review comments
1 parent 747e9e5 commit 03d399a

File tree

3 files changed

+84
-45
lines changed

3 files changed

+84
-45
lines changed

clients/da-vinci-client/src/main/java/com/linkedin/davinci/consumer/ChangelogClientConfig.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public class ChangelogClientConfig<T extends SpecificRecord> {
9191
* Total region count used for version swap in A/A setup. Each subscribed partition need to receive this many
9292
* corresponding version swap messages before it can safely go to the new version to ensure data completeness.
9393
*/
94-
private int totalRegionCount = 1;
94+
private int totalRegionCount = 0;
9595
/**
9696
* Version swap timeout in milliseconds. If the version swap is not completed within this time, the consumer will swap
9797
* to the new version and resume normal consumption from EOP for any incomplete partitions. Default is 30 minutes.

clients/da-vinci-client/src/main/java/com/linkedin/davinci/consumer/VeniceChangelogConsumerImpl.java

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import static com.linkedin.venice.ConfigKeys.DATA_BASE_PATH;
88
import static com.linkedin.venice.ConfigKeys.KAFKA_BOOTSTRAP_SERVERS;
99
import static com.linkedin.venice.ConfigKeys.ZOOKEEPER_ADDRESS;
10-
import static com.linkedin.venice.VeniceConstants.ENVIRONMENT_CONFIG_KEY_FOR_REGION_NAME;
11-
import static com.linkedin.venice.VeniceConstants.SYSTEM_PROPERTY_FOR_APP_RUNNING_REGION;
1210
import static com.linkedin.venice.kafka.protocol.enums.ControlMessageType.START_OF_SEGMENT;
1311
import static com.linkedin.venice.schema.rmd.RmdConstants.REPLICATION_CHECKPOINT_VECTOR_FIELD_POS;
1412
import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.FAIL;
@@ -98,13 +96,15 @@
9896
import java.util.Set;
9997
import java.util.concurrent.CompletableFuture;
10098
import java.util.concurrent.ConcurrentHashMap;
99+
import java.util.concurrent.CountDownLatch;
101100
import java.util.concurrent.ExecutionException;
102101
import java.util.concurrent.ExecutorService;
103102
import java.util.concurrent.Executors;
104103
import java.util.concurrent.TimeUnit;
105104
import java.util.concurrent.TimeoutException;
106105
import java.util.concurrent.atomic.AtomicBoolean;
107106
import java.util.concurrent.atomic.AtomicLong;
107+
import java.util.concurrent.atomic.AtomicReference;
108108
import java.util.concurrent.locks.ReadWriteLock;
109109
import java.util.concurrent.locks.ReentrantReadWriteLock;
110110
import java.util.stream.Collectors;
@@ -185,6 +185,7 @@ public class VeniceChangelogConsumerImpl<K, V> implements VeniceChangelogConsume
185185
protected final String clientRegionName;
186186
protected final int totalRegionCount;
187187
protected final long versionSwapTimeoutInMs;
188+
protected final AtomicReference<CountDownLatch> onGoingVersionSwapSignal = new AtomicReference<>();
188189
/**
189190
* Interaction of this field should acquire the subscriptionLock.readLock()
190191
*/
@@ -219,29 +220,24 @@ public VeniceChangelogConsumerImpl(
219220
this.pubSubMessageDeserializer = pubSubMessageDeserializer;
220221
this.versionSwapByControlMessage = changelogClientConfig.isVersionSwapByControlMessageEnabled();
221222
this.totalRegionCount = changelogClientConfig.getTotalRegionCount();
223+
this.clientRegionName = changelogClientConfig.getClientRegionName();
222224
this.versionSwapTimeoutInMs = changelogClientConfig.getVersionSwapTimeoutInMs();
223225
this.time = new SystemTime();
226+
this.onGoingVersionSwapSignal.set(new CountDownLatch(0));
224227
if (versionSwapByControlMessage) {
225-
String clientRegionNameFromConfig = changelogClientConfig.getClientRegionName();
226-
if (clientRegionNameFromConfig.isEmpty()) {
227-
String regionFromEnv = System.getenv(ENVIRONMENT_CONFIG_KEY_FOR_REGION_NAME);
228-
if (regionFromEnv == null) {
229-
regionFromEnv = System.getProperty(SYSTEM_PROPERTY_FOR_APP_RUNNING_REGION);
230-
}
231-
if (regionFromEnv == null) {
232-
throw new VeniceException(
233-
"Failed to enable version swap by control message because cannot resolve client region name from config, environment or system property");
234-
}
235-
clientRegionName = regionFromEnv;
236-
} else {
237-
clientRegionName = clientRegionNameFromConfig;
228+
// Version swap related configs should all be resolved or explicitly set at this point.
229+
if (this.clientRegionName.isEmpty()) {
230+
throw new VeniceException(
231+
"Failed to enable version swap by control message because client region name is missing");
232+
}
233+
if (this.totalRegionCount <= 0) {
234+
throw new VeniceException(
235+
"Failed to enable version swap by control message because total region count is not set");
238236
}
239237
LOGGER.info(
240238
"VeniceChangelogConsumer version swap by control message is enabled. Client region name: {}, total region count: {}",
241239
clientRegionName,
242240
totalRegionCount);
243-
} else {
244-
clientRegionName = "";
245241
}
246242

247243
seekExecutorService = Executors.newFixedThreadPool(10, new DaemonThreadFactory(getClass().getSimpleName()));
@@ -382,21 +378,39 @@ protected CompletableFuture<Void> internalSubscribe(Set<Integer> partitions, Pub
382378
throw new RuntimeException(e);
383379
}
384380

385-
PubSubTopic topicToSubscribe;
386-
if (topic == null) {
387-
topicToSubscribe = getCurrentServingVersionTopic();
381+
if (versionSwapByControlMessage) {
382+
boolean lockAcquiredAndNoOngoingVersionSwap = false;
383+
for (int i = 0; i <= MAX_SUBSCRIBE_RETRIES; i++) {
384+
// If version swap is in progress, wait for it to finish
385+
try {
386+
onGoingVersionSwapSignal.get().await();
387+
} catch (InterruptedException e) {
388+
throw new RuntimeException(e);
389+
}
390+
subscriptionLock.writeLock().lock();
391+
if (versionSwapMessageState != null) {
392+
// A new version swap is in progress, wait for it to finish again
393+
subscriptionLock.writeLock().unlock();
394+
} else {
395+
// No version swap is in progress, proceed with subscription
396+
lockAcquiredAndNoOngoingVersionSwap = true;
397+
break;
398+
}
399+
}
400+
if (!lockAcquiredAndNoOngoingVersionSwap) {
401+
// This should be extremely rare where the subscribe request is constantly conflicting with new version swaps
402+
throw new VeniceException("Unable to subscribe to new partitions due to conflicting version swaps");
403+
}
388404
} else {
389-
topicToSubscribe = topic;
405+
subscriptionLock.writeLock().lock();
390406
}
391407

392-
subscriptionLock.writeLock().lock();
393408
try {
394-
if (versionSwapByControlMessage && versionSwapMessageState != null) {
395-
throw new VeniceException(
396-
String.format(
397-
"Unable to subscribe to new partitions while the changelog consumer is undergoing version swap from topic %s to topic %s",
398-
versionSwapMessageState.getOldVersionTopic(),
399-
versionSwapMessageState.getNewVersionTopic()));
409+
PubSubTopic topicToSubscribe;
410+
if (topic == null) {
411+
topicToSubscribe = getCurrentServingVersionTopic();
412+
} else {
413+
topicToSubscribe = topic;
400414
}
401415
Set<PubSubTopicPartition> topicPartitionSet = getTopicAssignment();
402416
for (PubSubTopicPartition topicPartition: topicPartitionSet) {
@@ -841,6 +855,7 @@ protected Collection<PubSubMessage<K, ChangeEvent<V>, VeniceChangeCoordinate>> i
841855
changeCaptureStats.emitVersionSwapCountMetrics(SUCCESS);
842856
changeCaptureStats.setUndergoingVersionSwap(0);
843857
versionSwapMessageState = null;
858+
onGoingVersionSwapSignal.get().countDown();
844859
} else {
845860
return Collections.emptyList();
846861
}
@@ -860,7 +875,13 @@ protected Collection<PubSubMessage<K, ChangeEvent<V>, VeniceChangeCoordinate>> i
860875
changeCaptureStats.emitVersionSwapCountMetrics(SUCCESS);
861876
changeCaptureStats.setUndergoingVersionSwap(0);
862877
versionSwapMessageState = null;
878+
onGoingVersionSwapSignal.get().countDown();
863879
} else {
880+
LOGGER.warn(
881+
"Version swap from topic: {} to topic: {}, generation id: {} already timed out but still unable to find new topic checkpoints to go to.",
882+
versionSwapMessageState.getOldVersionTopic(),
883+
versionSwapMessageState.getNewVersionTopic(),
884+
versionSwapMessageState.getVersionSwapGenerationId());
864885
return Collections.emptyList();
865886
}
866887
}
@@ -974,6 +995,7 @@ private boolean isNewVersionCheckpointsReady(long timeoutInMs) throws Interrupte
974995
versionSwapMessageState.getNewVersionTopic(),
975996
versionSwapMessageState.getVersionSwapGenerationId(),
976997
versionSwapMessageState.getAssignedPartitions()));
998+
return false;
977999
}
9781000
return true;
9791001
}
@@ -1441,6 +1463,7 @@ protected boolean handleVersionSwapMessageInVT(
14411463
Set<PubSubTopicPartition> currentAssignment = getTopicAssignment();
14421464
versionSwapMessageState =
14431465
new VersionSwapMessageState(versionSwap, totalRegionCount, currentAssignment, time.getMilliseconds());
1466+
onGoingVersionSwapSignal.set(new CountDownLatch(1));
14441467
changeCaptureStats.setUndergoingVersionSwap(1);
14451468
LOGGER.info(
14461469
"New version detected for store: {} through version swap messages. Performing version swap from topic: {} to topic: {}, generation id: {}",

clients/da-vinci-client/src/main/java/com/linkedin/davinci/consumer/VersionSwapMessageState.java

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
package com.linkedin.davinci.consumer;
22

3-
import com.linkedin.venice.annotation.NotThreadsafe;
43
import com.linkedin.venice.exceptions.VeniceException;
54
import com.linkedin.venice.kafka.protocol.VersionSwap;
65
import com.linkedin.venice.meta.Version;
76
import com.linkedin.venice.pubsub.api.PubSubPosition;
87
import com.linkedin.venice.pubsub.api.PubSubTopicPartition;
8+
import java.util.Collections;
99
import java.util.HashMap;
1010
import java.util.HashSet;
1111
import java.util.Map;
@@ -15,10 +15,13 @@
1515

1616
/**
1717
* A class initialized to indicate that the changelog consumer is undergoing version swap. The class is also keeping
18-
* various states about this version swap. This class is NOT thread safe and the intended caller should at least
19-
* acquire the subscriptionLock.readLock().
18+
* various states about this version swap. This class is thread safe by having all methods that change or read a state
19+
* that's mutable synchronized. However, it's important to keep in mind that race can still occur if the caller is
20+
* trying to perform a sequence of events that depend on each other. In those scenarios an external lock is required.
21+
* E.g. (1) getFindNewTopicCheckpointFuture() and if the future is complete call (2) getNewTopicVersionSwapCheckpoints()
22+
* By the time (2) is called it's possible that the result from (1) is no longer valid. A different thread slipped in
23+
* between and changed the state.
2024
*/
21-
@NotThreadsafe
2225
public class VersionSwapMessageState {
2326
private final String oldVersionTopic;
2427
private final String newVersionTopic;
@@ -55,7 +58,17 @@ public VersionSwapMessageState(
5558
this.versionSwapStartTimestamp = versionSwapStartTimestamp;
5659
}
5760

58-
public PubSubPosition getVersionSwapLowWatermarkPosition(String topic, int partitionId) {
61+
/**
62+
* Get the pub sub position of the first relevant version swap message for the given partition. Null will be returned
63+
* if the partition have not consumed its version swap yet. This is acceptable because different partitions could be
64+
* making progress towards version swap at different pace. e.g. partition 0 consumed its version swap message already
65+
* but partition 1 could still be consuming regular messages from the old version topic before encountering any
66+
* version swap messages.
67+
* @param topic where the version swap message originated from
68+
* @param partitionId of the version topic
69+
* @return the pub sub position or null
70+
*/
71+
public synchronized PubSubPosition getVersionSwapLowWatermarkPosition(String topic, int partitionId) {
5972
if (oldVersionTopic.equals(topic)) {
6073
return partitionToVersionSwapLowWatermarkPositionMap.get(partitionId);
6174
} else {
@@ -75,23 +88,26 @@ public long getVersionSwapGenerationId() {
7588
return versionSwapGenerationId;
7689
}
7790

78-
public void setFindNewTopicCheckpointFuture(CompletableFuture<Void> findNewTopicCheckpointFuture) {
91+
public synchronized void setFindNewTopicCheckpointFuture(CompletableFuture<Void> findNewTopicCheckpointFuture) {
7992
this.findNewTopicCheckpointFuture = findNewTopicCheckpointFuture;
93+
this.newTopicEOPCheckpoints.clear();
94+
this.newTopicVersionSwapCheckpoints.clear();
8095
}
8196

82-
public CompletableFuture<Void> getFindNewTopicCheckpointFuture() {
97+
public synchronized CompletableFuture<Void> getFindNewTopicCheckpointFuture() {
8398
return findNewTopicCheckpointFuture;
8499
}
85100

86-
public void setNewTopicVersionSwapCheckpoints(Map<Integer, VeniceChangeCoordinate> newTopicVersionSwapCheckpoints) {
101+
public synchronized void setNewTopicVersionSwapCheckpoints(
102+
Map<Integer, VeniceChangeCoordinate> newTopicVersionSwapCheckpoints) {
87103
this.newTopicVersionSwapCheckpoints = newTopicVersionSwapCheckpoints;
88104
}
89105

90-
public void setNewTopicEOPCheckpoints(Map<Integer, VeniceChangeCoordinate> newTopicEOPCheckpoints) {
106+
public synchronized void setNewTopicEOPCheckpoints(Map<Integer, VeniceChangeCoordinate> newTopicEOPCheckpoints) {
91107
this.newTopicEOPCheckpoints = newTopicEOPCheckpoints;
92108
}
93109

94-
public Set<VeniceChangeCoordinate> getNewTopicVersionSwapCheckpoints() {
110+
public synchronized Set<VeniceChangeCoordinate> getNewTopicVersionSwapCheckpoints() {
95111
// Defensive coding
96112
if (findNewTopicCheckpointFuture == null || !findNewTopicCheckpointFuture.isDone()) {
97113
throw new VeniceException("New topic checkpoints are not available yet");
@@ -109,7 +125,7 @@ public Set<VeniceChangeCoordinate> getNewTopicVersionSwapCheckpoints() {
109125
* Intended to be used as a backup strategy if any partition still did not complete version swap within the timeout.
110126
* Remaining partitions will be resumed from EOP instead of first relevant version swap message in the new topic.
111127
*/
112-
public Set<VeniceChangeCoordinate> getNewTopicCheckpointsWithEOPAsBackup() {
128+
public synchronized Set<VeniceChangeCoordinate> getNewTopicCheckpointsWithEOPAsBackup() {
113129
Set<VeniceChangeCoordinate> checkpoints = getNewTopicVersionSwapCheckpoints();
114130
for (Integer partition: getIncompletePartitions()) {
115131
if (newTopicEOPCheckpoints.containsKey(partition)) {
@@ -123,10 +139,10 @@ public Set<VeniceChangeCoordinate> getNewTopicCheckpointsWithEOPAsBackup() {
123139
}
124140

125141
public Set<Integer> getAssignedPartitions() {
126-
return assignedPartitions;
142+
return Collections.unmodifiableSet(assignedPartitions);
127143
}
128144

129-
public Set<Integer> getIncompletePartitions() {
145+
public synchronized Set<Integer> getIncompletePartitions() {
130146
Set<Integer> incompletePartitions = new HashSet<>(assignedPartitions);
131147
incompletePartitions.removeAll(completedPartitions);
132148
return incompletePartitions;
@@ -149,7 +165,7 @@ public long getVersionSwapStartTimestamp() {
149165
* This means we can subscribe to the new version topic and resume normal consumption from the first relevant version
150166
* swap message.
151167
*/
152-
public boolean isVersionSwapMessagesReceivedForAllPartitions() {
168+
public synchronized boolean isVersionSwapMessagesReceivedForAllPartitions() {
153169
return completedPartitions.size() == receivedVersionSwapPartitionToRegionsMap.size();
154170
}
155171

@@ -161,7 +177,7 @@ public boolean isVersionSwapMessagesReceivedForAllPartitions() {
161177
* @param position of the version swap message.
162178
* @return true if all version swap messages related to this version swap event have been received.
163179
*/
164-
public boolean handleVersionSwap(
180+
public synchronized boolean handleVersionSwap(
165181
VersionSwap versionSwap,
166182
PubSubTopicPartition pubSubTopicPartition,
167183
PubSubPosition position) {
@@ -197,7 +213,7 @@ public boolean handleVersionSwap(
197213
* Remove unsubscribed partitions from the ongoing version swap states.
198214
* @param partitions to unsubscribe
199215
*/
200-
public void handleUnsubscribe(Set<Integer> partitions) {
216+
public synchronized void handleUnsubscribe(Set<Integer> partitions) {
201217
for (Integer partition: partitions) {
202218
receivedVersionSwapPartitionToRegionsMap.remove(partition);
203219
partitionToVersionSwapLowWatermarkPositionMap.remove(partition);

0 commit comments

Comments
 (0)