Skip to content

Commit b7b7c97

Browse files
committed
Add more information to NoNodeAvailableException and AllNodesFailedException
Sometimes users see these exceptions on production in elusive rare cases. Since it is PROD it is imposible to enable DEBUG logs. In order to debug these cases we need these errors to contain all the information that can help to investigate what is happening. This information needs to be as recent as it is possible and be collected as close as possible to the place when it is being read by driver.
1 parent 3446aae commit b7b7c97

28 files changed

+817
-28
lines changed

core/src/main/java/com/datastax/dse/driver/internal/core/cql/continuous/ContinuousRequestHandlerBase.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
import com.datastax.oss.driver.internal.core.session.DefaultSession;
6666
import com.datastax.oss.driver.internal.core.session.RepreparePayload;
6767
import com.datastax.oss.driver.internal.core.util.Loggers;
68+
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
6869
import com.datastax.oss.driver.internal.core.util.collection.SimpleQueryPlan;
6970
import com.datastax.oss.driver.shaded.guava.common.annotations.VisibleForTesting;
7071
import com.datastax.oss.protocol.internal.Frame;
@@ -360,7 +361,11 @@ private void sendRequest(
360361
// We've reached the end of the query plan without finding any node to write to; abort the
361362
// continuous paging session.
362363
if (activeExecutionsCount.decrementAndGet() == 0) {
363-
abortGlobalRequestOrChosenCallback(AllNodesFailedException.fromErrors(errors));
364+
if (queryPlan instanceof DebugQueryPlan) {
365+
abortGlobalRequestOrChosenCallback(AllNodesFailedException.fromErrors(errors, queryPlan));
366+
} else {
367+
abortGlobalRequestOrChosenCallback(AllNodesFailedException.fromErrors(errors));
368+
}
364369
}
365370
} else if (!chosenCallback.isDone()) {
366371
NodeResponseCallback nodeResponseCallback =

core/src/main/java/com/datastax/dse/driver/internal/core/graph/GraphRequestHandler.java

+8-5
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import com.datastax.oss.driver.internal.core.tracker.NoopRequestTracker;
6161
import com.datastax.oss.driver.internal.core.tracker.RequestLogger;
6262
import com.datastax.oss.driver.internal.core.util.Loggers;
63+
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
6364
import com.datastax.oss.driver.internal.core.util.collection.SimpleQueryPlan;
6465
import com.datastax.oss.protocol.internal.Frame;
6566
import com.datastax.oss.protocol.internal.Message;
@@ -265,11 +266,13 @@ private void sendRequest(
265266
// We've reached the end of the query plan without finding any node to write to
266267
if (!result.isDone() && activeExecutionsCount.decrementAndGet() == 0) {
267268
// We're the last execution so fail the result
268-
setFinalError(
269-
statement,
270-
AllNodesFailedException.fromErrors(this.errors),
271-
null,
272-
NO_SUCCESSFUL_EXECUTION);
269+
AllNodesFailedException exception;
270+
if (queryPlan instanceof DebugQueryPlan) {
271+
exception = AllNodesFailedException.fromErrors(this.errors, queryPlan);
272+
} else {
273+
exception = AllNodesFailedException.fromErrors(this.errors);
274+
}
275+
setFinalError(statement, exception, null, NO_SUCCESSFUL_EXECUTION);
273276
}
274277
} else {
275278
NodeResponseCallback nodeResponseCallback =

core/src/main/java/com/datastax/oss/driver/api/core/AllNodesFailedException.java

+54-7
Original file line numberDiff line numberDiff line change
@@ -30,22 +30,32 @@
3030
import java.util.List;
3131
import java.util.Map;
3232
import java.util.Map.Entry;
33+
import java.util.Queue;
3334

3435
/**
3536
* Thrown when a query failed on all the coordinators it was tried on. This exception may wrap
3637
* multiple errors, that are available either as {@linkplain #getSuppressed() suppressed
3738
* exceptions}, or via {@link #getAllErrors()} where they are grouped by node.
3839
*/
3940
public class AllNodesFailedException extends DriverException {
40-
4141
/** @deprecated Use {@link #fromErrors(List)} instead. */
4242
@NonNull
4343
@Deprecated
4444
public static AllNodesFailedException fromErrors(@Nullable Map<Node, Throwable> errors) {
4545
if (errors == null || errors.isEmpty()) {
4646
return new NoNodeAvailableException();
4747
} else {
48-
return new AllNodesFailedException(groupByNode(errors));
48+
return new AllNodesFailedException(groupByNode(errors), null);
49+
}
50+
}
51+
52+
@NonNull
53+
public static AllNodesFailedException fromErrors(
54+
@Nullable List<Entry<Node, Throwable>> errors, Queue<Node> queryPlan) {
55+
if (errors == null || errors.isEmpty()) {
56+
return new NoNodeAvailableException(queryPlan);
57+
} else {
58+
return new AllNodesFailedException(groupByNode(errors), queryPlan);
4959
}
5060
}
5161

@@ -59,6 +69,7 @@ public static AllNodesFailedException fromErrors(@Nullable List<Entry<Node, Thro
5969
}
6070

6171
private final Map<Node, List<Throwable>> errors;
72+
private final Queue<Node> queryPlan;
6273

6374
/** @deprecated Use {@link #AllNodesFailedException(String, ExecutionInfo, Iterable)} instead. */
6475
@Deprecated
@@ -68,6 +79,7 @@ protected AllNodesFailedException(
6879
@NonNull Map<Node, Throwable> errors) {
6980
super(message, executionInfo, null, true);
7081
this.errors = toDeepImmutableMap(groupByNode(errors));
82+
this.queryPlan = null;
7183
addSuppressedErrors();
7284
}
7385

@@ -77,6 +89,18 @@ protected AllNodesFailedException(
7789
@NonNull Iterable<Entry<Node, List<Throwable>>> errors) {
7890
super(message, executionInfo, null, true);
7991
this.errors = toDeepImmutableMap(errors);
92+
this.queryPlan = null;
93+
addSuppressedErrors();
94+
}
95+
96+
protected AllNodesFailedException(
97+
@NonNull String message,
98+
@Nullable ExecutionInfo executionInfo,
99+
@NonNull Iterable<Entry<Node, List<Throwable>>> errors,
100+
@Nullable Queue<Node> queryPlan) {
101+
super(message, executionInfo, null, true);
102+
this.errors = toDeepImmutableMap(errors);
103+
this.queryPlan = queryPlan;
80104
addSuppressedErrors();
81105
}
82106

@@ -91,12 +115,26 @@ private void addSuppressedErrors() {
91115
private AllNodesFailedException(Map<Node, List<Throwable>> errors) {
92116
this(
93117
buildMessage(
94-
String.format("All %d node(s) tried for the query failed", errors.size()), errors),
118+
String.format("All %d node(s) tried for the query failed", errors.size()),
119+
errors,
120+
null),
95121
null,
96122
errors.entrySet());
97123
}
98124

99-
private static String buildMessage(String baseMessage, Map<Node, List<Throwable>> errors) {
125+
private AllNodesFailedException(Map<Node, List<Throwable>> errors, Queue<Node> queryPlan) {
126+
this(
127+
buildMessage(
128+
String.format("All %d node(s) tried for the query failed", errors.size()),
129+
errors,
130+
queryPlan),
131+
null,
132+
errors.entrySet(),
133+
queryPlan);
134+
}
135+
136+
private static String buildMessage(
137+
String baseMessage, Map<Node, List<Throwable>> errors, Queue<Node> queryPlan) {
100138
int limit = Math.min(errors.size(), 3);
101139
Iterator<Entry<Node, List<Throwable>>> iterator =
102140
Iterables.limit(errors.entrySet(), limit).iterator();
@@ -108,9 +146,14 @@ private static String buildMessage(String baseMessage, Map<Node, List<Throwable>
108146
details.append(", ");
109147
}
110148
}
149+
if (queryPlan == null) {
150+
return String.format(
151+
"%s (showing first %d nodes, use getAllErrors() for more): %s",
152+
baseMessage, limit, details);
153+
}
111154
return String.format(
112-
"%s (showing first %d nodes, use getAllErrors() for more): %s",
113-
baseMessage, limit, details);
155+
"%s\nQuery Plan: %s\n(showing first %d nodes, use getAllErrors() for more): %s",
156+
baseMessage, queryPlan, limit, details);
114157
}
115158

116159
/**
@@ -131,6 +174,10 @@ public Map<Node, Throwable> getErrors() {
131174
return builder.build();
132175
}
133176

177+
protected Queue<Node> getQueryPlan() {
178+
return this.queryPlan;
179+
}
180+
134181
/** An immutable map containing all errors on each tried node. */
135182
@NonNull
136183
public Map<Node, List<Throwable>> getAllErrors() {
@@ -146,7 +193,7 @@ public DriverException copy() {
146193
@NonNull
147194
public AllNodesFailedException reword(String newMessage) {
148195
return new AllNodesFailedException(
149-
buildMessage(newMessage, errors), getExecutionInfo(), errors.entrySet());
196+
buildMessage(newMessage, errors, queryPlan), getExecutionInfo(), errors.entrySet());
150197
}
151198

152199
private static Map<Node, List<Throwable>> groupByNode(Map<Node, Throwable> errors) {

core/src/main/java/com/datastax/oss/driver/api/core/NoNodeAvailableException.java

+17-3
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
package com.datastax.oss.driver.api.core;
1919

2020
import com.datastax.oss.driver.api.core.cql.ExecutionInfo;
21+
import com.datastax.oss.driver.api.core.metadata.Node;
2122
import edu.umd.cs.findbugs.annotations.NonNull;
2223
import java.util.Collections;
24+
import java.util.Queue;
2325

2426
/**
2527
* Specialization of {@code AllNodesFailedException} when no coordinators were tried.
@@ -32,13 +34,25 @@ public NoNodeAvailableException() {
3234
this(null);
3335
}
3436

35-
private NoNodeAvailableException(ExecutionInfo executionInfo) {
36-
super("No node was available to execute the query", executionInfo, Collections.emptySet());
37+
private NoNodeAvailableException(
38+
String message, ExecutionInfo executionInfo, Queue<Node> queryPlan) {
39+
super(message, executionInfo, Collections.emptySet(), queryPlan);
40+
}
41+
42+
public NoNodeAvailableException(Queue<Node> queryPlan) {
43+
this(buildMessage(queryPlan), null, queryPlan);
44+
}
45+
46+
private static String buildMessage(Queue<Node> queryPlan) {
47+
if (queryPlan == null) {
48+
return "No node was available to execute the query";
49+
}
50+
return "No node was available to execute the query. Query Plan: " + queryPlan;
3751
}
3852

3953
@NonNull
4054
@Override
4155
public DriverException copy() {
42-
return new NoNodeAvailableException(getExecutionInfo());
56+
return new NoNodeAvailableException(getMessage(), getExecutionInfo(), getQueryPlan());
4357
}
4458
}

core/src/main/java/com/datastax/oss/driver/api/core/config/DefaultDriverOption.java

+6
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,12 @@ public enum DefaultDriverOption implements DriverOption {
141141
* <p>Value-type: boolean
142142
*/
143143
CONNECTION_WARN_INIT_ERROR("advanced.connection.warn-on-init-error"),
144+
/**
145+
* Provide more details when query execution has failed.
146+
*
147+
* <p>Value-Type: boolean
148+
*/
149+
CONNECTION_QUERY_PLAN_EXCEPTIONS("advanced.connection.detailed-query-plan-exceptions"),
144150
/**
145151
* The number of connections in the LOCAL pool.
146152
*

core/src/main/java/com/datastax/oss/driver/api/core/config/OptionsMap.java

+1
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ protected static void fillWithDriverDefaults(OptionsMap map) {
381381
map.put(TypedDriverOption.LOAD_BALANCING_DC_FAILOVER_MAX_NODES_PER_REMOTE_DC, 0);
382382
map.put(TypedDriverOption.LOAD_BALANCING_DC_FAILOVER_ALLOW_FOR_LOCAL_CONSISTENCY_LEVELS, false);
383383
map.put(TypedDriverOption.METRICS_GENERATE_AGGREGABLE_HISTOGRAMS, true);
384+
map.put(TypedDriverOption.CONNECTION_QUERY_PLAN_EXCEPTIONS, false);
384385
}
385386

386387
@Immutable

core/src/main/java/com/datastax/oss/driver/api/core/config/TypedDriverOption.java

+5
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,11 @@ public String toString() {
889889
DefaultDriverOption.LOAD_BALANCING_DC_FAILOVER_ALLOW_FOR_LOCAL_CONSISTENCY_LEVELS,
890890
GenericType.BOOLEAN);
891891

892+
/** TBD. */
893+
public static final TypedDriverOption<Boolean> CONNECTION_QUERY_PLAN_EXCEPTIONS =
894+
new TypedDriverOption<>(
895+
DefaultDriverOption.CONNECTION_QUERY_PLAN_EXCEPTIONS, GenericType.BOOLEAN);
896+
892897
private static Iterable<TypedDriverOption<?>> introspectBuiltInValues() {
893898
try {
894899
ImmutableList.Builder<TypedDriverOption<?>> result = ImmutableList.builder();

core/src/main/java/com/datastax/oss/driver/internal/core/control/ControlConnection.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import com.datastax.oss.driver.internal.core.metadata.NodeStateEvent;
3838
import com.datastax.oss.driver.internal.core.metadata.TopologyEvent;
3939
import com.datastax.oss.driver.internal.core.util.Loggers;
40+
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
4041
import com.datastax.oss.driver.internal.core.util.concurrent.CompletableFutures;
4142
import com.datastax.oss.driver.internal.core.util.concurrent.Reconnection;
4243
import com.datastax.oss.driver.internal.core.util.concurrent.RunOrSchedule;
@@ -357,7 +358,11 @@ private void connect(
357358
assert adminExecutor.inEventLoop();
358359
Node node = nodes.poll();
359360
if (node == null) {
360-
onFailure.accept(AllNodesFailedException.fromErrors(errors));
361+
if (nodes instanceof DebugQueryPlan) {
362+
onFailure.accept(AllNodesFailedException.fromErrors(errors, nodes));
363+
} else {
364+
onFailure.accept(AllNodesFailedException.fromErrors(errors));
365+
}
361366
} else {
362367
LOG.debug("[{}] Trying to establish a connection to {}", logPrefix, node);
363368
context

core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlPrepareHandler.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import com.datastax.oss.driver.internal.core.context.InternalDriverContext;
4848
import com.datastax.oss.driver.internal.core.session.DefaultSession;
4949
import com.datastax.oss.driver.internal.core.util.Loggers;
50+
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
5051
import com.datastax.oss.driver.internal.core.util.concurrent.CompletableFutures;
5152
import com.datastax.oss.protocol.internal.Frame;
5253
import com.datastax.oss.protocol.internal.Message;
@@ -197,7 +198,11 @@ private void sendRequest(PrepareRequest request, Node node, int retryCount) {
197198
}
198199
}
199200
if (channel == null) {
200-
setFinalError(AllNodesFailedException.fromErrors(this.errors));
201+
if (queryPlan instanceof DebugQueryPlan) {
202+
setFinalError(AllNodesFailedException.fromErrors(this.errors, queryPlan));
203+
} else {
204+
setFinalError(AllNodesFailedException.fromErrors(this.errors));
205+
}
201206
} else {
202207
InitialPrepareCallback initialPrepareCallback =
203208
new InitialPrepareCallback(request, node, channel, retryCount);

core/src/main/java/com/datastax/oss/driver/internal/core/cql/CqlRequestHandler.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
import com.datastax.oss.driver.internal.core.tracker.NoopRequestTracker;
7373
import com.datastax.oss.driver.internal.core.tracker.RequestLogger;
7474
import com.datastax.oss.driver.internal.core.util.Loggers;
75+
import com.datastax.oss.driver.internal.core.util.collection.DebugQueryPlan;
7576
import com.datastax.oss.driver.internal.core.util.collection.SimpleQueryPlan;
7677
import com.datastax.oss.protocol.internal.Frame;
7778
import com.datastax.oss.protocol.internal.Message;
@@ -385,7 +386,12 @@ private void sendRequest(
385386
// We've reached the end of the query plan without finding any node to write to
386387
if (!result.isDone() && activeExecutionsCount.decrementAndGet() == 0) {
387388
// We're the last execution so fail the result
388-
setFinalError(statement, AllNodesFailedException.fromErrors(this.errors), null, -1);
389+
if (queryPlan instanceof DebugQueryPlan) {
390+
setFinalError(
391+
statement, AllNodesFailedException.fromErrors(this.errors, queryPlan), null, -1);
392+
} else {
393+
setFinalError(statement, AllNodesFailedException.fromErrors(this.errors), null, -1);
394+
}
389395
}
390396
} else {
391397
NodeResponseCallback nodeResponseCallback =

0 commit comments

Comments
 (0)