Skip to content

Commit 72b2998

Browse files
committed
[fix](cloud) Add more prompt information when compute group is invalid (#56946)
If the user sets a default compute group and then deletes the compute group, the prompt information is not clear and confusing. Fix it
1 parent be1b122 commit 72b2998

File tree

9 files changed

+225
-21
lines changed

9 files changed

+225
-21
lines changed

fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudBrokerLoadJob.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.apache.doris.qe.OriginStatement;
4747
import org.apache.doris.qe.QeProcessorImpl;
4848
import org.apache.doris.qe.StmtExecutor;
49+
import org.apache.doris.resource.computegroup.ComputeGroupMgr;
4950
import org.apache.doris.system.Backend;
5051
import org.apache.doris.thrift.TStatusCode;
5152
import org.apache.doris.thrift.TUniqueId;
@@ -111,8 +112,9 @@ private void setCloudClusterId() throws MetaNotFoundException {
111112
((CloudSystemInfoService) Env.getCurrentSystemInfo()).getCloudClusterIdByName(clusterName);
112113
}
113114
if (Strings.isNullOrEmpty(this.cloudClusterId)) {
114-
LOG.warn("cluster id is empty, cluster name {}", clusterName);
115-
throw new MetaNotFoundException("cluster id is empty, cluster name: " + clusterName);
115+
LOG.warn("can not find compute group: {}", clusterName);
116+
String computeGroupHints = ComputeGroupMgr.computeGroupNotFoundPromptMsg(clusterName);
117+
throw new MetaNotFoundException(computeGroupHints);
116118
}
117119
sessionVariables.put(CLOUD_CLUSTER_ID, this.cloudClusterId);
118120
}

fe/fe-core/src/main/java/org/apache/doris/cloud/qe/CloudCoordinator.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.doris.planner.ScanNode;
3030
import org.apache.doris.qe.ConnectContext;
3131
import org.apache.doris.qe.Coordinator;
32+
import org.apache.doris.resource.computegroup.ComputeGroupMgr;
3233
import org.apache.doris.thrift.TUniqueId;
3334

3435
import com.google.common.base.Strings;
@@ -86,10 +87,8 @@ protected void prepare() throws UserException {
8687

8788
if (idToBackend == null || idToBackend.isEmpty()) {
8889
LOG.warn("no available backends, idToBackend {}", idToBackend);
89-
String clusterName = ConnectContext.get() != null
90-
? ConnectContext.get().getCloudCluster() : "ctx empty cant get clusterName";
91-
throw new UserException("no available backends, the cluster maybe not be set or been dropped clusterName = "
92-
+ clusterName);
90+
String computeGroupHints = ComputeGroupMgr.computeGroupNotFoundPromptMsg(cluster);
91+
throw new UserException(computeGroupHints);
9392
}
9493
}
9594
}

fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.doris.catalog.JdbcTable;
2424
import org.apache.doris.catalog.TableIf.TableType;
2525
import org.apache.doris.common.AnalysisException;
26+
import org.apache.doris.common.Config;
2627
import org.apache.doris.common.DdlException;
2728
import org.apache.doris.common.FeConstants;
2829
import org.apache.doris.datasource.CatalogProperty;
@@ -40,6 +41,7 @@
4041
import org.apache.doris.proto.InternalService;
4142
import org.apache.doris.proto.InternalService.PJdbcTestConnectionRequest;
4243
import org.apache.doris.proto.InternalService.PJdbcTestConnectionResult;
44+
import org.apache.doris.resource.computegroup.ComputeGroupMgr;
4345
import org.apache.doris.rpc.BackendServiceProxy;
4446
import org.apache.doris.rpc.RpcException;
4547
import org.apache.doris.system.Backend;
@@ -411,7 +413,12 @@ private void testBeToJdbcConnection(JdbcClient testClient) throws DdlException {
411413
throw new DdlException(e.getMessage());
412414
}
413415
if (aliveBe == null) {
414-
throw new DdlException("Test BE Connection to JDBC Failed: No Alive backends");
416+
String computeGroupHints = "";
417+
if (Config.isCloudMode()) {
418+
// null: computeGroupNotFoundPromptMsg select cluster for hint msg
419+
computeGroupHints = ComputeGroupMgr.computeGroupNotFoundPromptMsg(null);
420+
}
421+
throw new DdlException("Test BE Connection to JDBC Failed: No Alive backends" + computeGroupHints);
415422
}
416423
TNetworkAddress address = new TNetworkAddress(aliveBe.getHost(), aliveBe.getBrpcPort());
417424
try {

fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.apache.doris.common.DdlException;
4242
import org.apache.doris.common.ErrorCode;
4343
import org.apache.doris.common.FeConstants;
44+
import org.apache.doris.common.Pair;
4445
import org.apache.doris.common.Status;
4546
import org.apache.doris.common.UserException;
4647
import org.apache.doris.common.util.DebugUtil;
@@ -1391,6 +1392,30 @@ public String getCloudCluster(boolean updateErr) throws ComputeGroupException {
13911392

13921393
String cluster = null;
13931394
String choseWay = null;
1395+
// 1 get cluster from session
1396+
String sessionCluster = getSessionVariable().getCloudCluster();
1397+
if (!Strings.isNullOrEmpty(sessionCluster)) {
1398+
choseWay = "use session";
1399+
if (LOG.isDebugEnabled()) {
1400+
LOG.debug("finally set context compute group name {} for user {} with chose way '{}'",
1401+
sessionCluster, getCurrentUserIdentity(), choseWay);
1402+
}
1403+
return sessionCluster;
1404+
}
1405+
1406+
// 2 get cluster from user
1407+
String userPropCluster = getDefaultCloudClusterFromUser(true);
1408+
if (!StringUtils.isEmpty(userPropCluster)) {
1409+
choseWay = "user property";
1410+
if (LOG.isDebugEnabled()) {
1411+
LOG.debug("finally set context compute group name {} for user {} with chose way '{}'", userPropCluster,
1412+
getCurrentUserIdentity(), choseWay);
1413+
}
1414+
return userPropCluster;
1415+
}
1416+
1417+
// 3 get cluster from a cached variable in connect context
1418+
// this value comes from a cluster selection policy
13941419
if (!Strings.isNullOrEmpty(this.cloudCluster)) {
13951420
cluster = this.cloudCluster;
13961421
choseWay = "use context cluster";
@@ -1429,17 +1454,49 @@ public String getCloudCluster(boolean updateErr) throws ComputeGroupException {
14291454
return cluster;
14301455
}
14311456

1432-
// TODO implement this function
1433-
public String getDefaultCloudCluster() {
1434-
List<String> cloudClusterNames = ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getCloudClusterNames();
1457+
private String getDefaultCloudClusterFromUser(boolean checkExist) {
14351458
String defaultCluster = Env.getCurrentEnv().getAuth().getDefaultCloudCluster(getQualifiedUser());
1436-
if (!Strings.isNullOrEmpty(defaultCluster) && cloudClusterNames.contains(defaultCluster)) {
1459+
if (Strings.isNullOrEmpty(defaultCluster)) {
1460+
return null;
1461+
}
1462+
if (!checkExist) {
1463+
// default cluster may be dropped.
14371464
return defaultCluster;
14381465
}
14391466

1467+
// Validate cluster existence
1468+
List<String> cloudClusterNames = ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getCloudClusterNames();
1469+
if (cloudClusterNames.contains(defaultCluster)) {
1470+
return defaultCluster;
1471+
}
1472+
LOG.warn("default compute group {} of user {} is invalid, all cluster: {}", defaultCluster,
1473+
getQualifiedUser(), cloudClusterNames);
14401474
return null;
14411475
}
14421476

1477+
// for log use, compute group name and the way to get it
1478+
// the way may be context policy, session, default compute group from user
1479+
public static Pair<String, String> computeGroupFromHintMsg() {
1480+
String clusterName = "";
1481+
try {
1482+
if (ConnectContext.get() != null) {
1483+
clusterName = ConnectContext.get().getCloudCluster();
1484+
}
1485+
} catch (Exception e) {
1486+
clusterName = "ctx empty cant get clusterName";
1487+
1488+
}
1489+
String fromSession = ConnectContext.get().getSessionVariable().getCloudCluster();
1490+
String fromDefaultComputeGroup = ConnectContext.get().getDefaultCloudClusterFromUser(false);
1491+
String clusterFrom = "context policy";
1492+
if (clusterName.equalsIgnoreCase(fromSession)) {
1493+
clusterFrom = "session variable";
1494+
} else if (clusterName.equalsIgnoreCase(fromDefaultComputeGroup)) {
1495+
clusterFrom = "default compute group from user";
1496+
}
1497+
return Pair.of(clusterName, clusterFrom);
1498+
}
1499+
14431500
public StatsErrorEstimator getStatsErrorEstimator() {
14441501
return statsErrorEstimator;
14451502
}

fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,7 @@
188188
import org.apache.doris.qe.cache.CacheAnalyzer;
189189
import org.apache.doris.qe.cache.CacheAnalyzer.CacheMode;
190190
import org.apache.doris.qe.cache.SqlCache;
191-
import org.apache.doris.rewrite.ExprRewriter;
192-
import org.apache.doris.rewrite.mvrewrite.MVSelectFailedException;
191+
import org.apache.doris.resource.computegroup.ComputeGroupMgr;
193192
import org.apache.doris.rpc.BackendServiceProxy;
194193
import org.apache.doris.rpc.RpcException;
195194
import org.apache.doris.service.ExecuteEnv;
@@ -2096,7 +2095,12 @@ private void outfileWriteSuccess(OutFileClause outFileClause) throws Exception {
20962095
}
20972096
}
20982097
if (address == null) {
2099-
throw new AnalysisException("No Alive backends");
2098+
String computeGroupHints = "";
2099+
if (Config.isCloudMode()) {
2100+
// null: computeGroupNotFoundPromptMsg select cluster for hint msg
2101+
computeGroupHints = ComputeGroupMgr.computeGroupNotFoundPromptMsg(null);
2102+
}
2103+
throw new AnalysisException("No Alive backends" + computeGroupHints);
21002104
}
21012105

21022106
// 5. send rpc to BE
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.resource.computegroup;
19+
20+
import org.apache.doris.catalog.Env;
21+
import org.apache.doris.cloud.system.CloudSystemInfoService;
22+
import org.apache.doris.common.Config;
23+
import org.apache.doris.common.Pair;
24+
import org.apache.doris.common.UserException;
25+
import org.apache.doris.qe.ConnectContext;
26+
import org.apache.doris.resource.Tag;
27+
import org.apache.doris.system.Backend;
28+
import org.apache.doris.system.SystemInfoService;
29+
30+
import com.google.common.collect.Sets;
31+
import org.apache.commons.lang3.StringUtils;
32+
33+
import java.util.Set;
34+
35+
public class ComputeGroupMgr {
36+
37+
private SystemInfoService systemInfoService;
38+
39+
public ComputeGroupMgr(SystemInfoService systemInfoService) {
40+
this.systemInfoService = systemInfoService;
41+
}
42+
43+
public static String computeGroupNotFoundPromptMsg(String physicalClusterName) {
44+
StringBuilder sb = new StringBuilder();
45+
Pair<String, String> computeGroupInfos = ConnectContext.computeGroupFromHintMsg();
46+
sb.append(" Unable to find the compute group: ");
47+
sb.append("<");
48+
if (physicalClusterName == null) {
49+
sb.append(computeGroupInfos.first);
50+
} else {
51+
sb.append(physicalClusterName);
52+
}
53+
sb.append(">");
54+
sb.append(". Please check if the compute group has been deleted. how this compute group is selected: ");
55+
sb.append(computeGroupInfos.second);
56+
return sb.toString();
57+
}
58+
59+
public ComputeGroup getComputeGroupByName(String name) throws UserException {
60+
if (Config.isCloudMode()) {
61+
CloudSystemInfoService cloudSystemInfoService = (CloudSystemInfoService) systemInfoService;
62+
String physicalClusterName = ((CloudSystemInfoService) Env.getCurrentSystemInfo())
63+
.getPhysicalCluster(name);
64+
String clusterId = cloudSystemInfoService.getCloudClusterIdByName(physicalClusterName);
65+
if (StringUtils.isEmpty(clusterId)) {
66+
String computeGroupHints = ComputeGroupMgr.computeGroupNotFoundPromptMsg(physicalClusterName);
67+
throw new UserException(computeGroupHints);
68+
}
69+
return new CloudComputeGroup(clusterId, physicalClusterName, cloudSystemInfoService);
70+
} else {
71+
return new ComputeGroup(name, name, systemInfoService);
72+
}
73+
}
74+
75+
public ComputeGroup getComputeGroup(Set<Tag> rgTags) {
76+
Set<String> tagStrSet = Sets.newHashSet();
77+
for (Tag tag : rgTags) {
78+
tagStrSet.add(tag.value);
79+
}
80+
return new MergedComputeGroup(String.join(",", tagStrSet), tagStrSet, systemInfoService);
81+
}
82+
83+
// to be compatible with resource tag's logic, if root/admin user not specify a resource tag,
84+
// which means return all backends.
85+
public ComputeGroup getAllBackendComputeGroup() {
86+
return new AllBackendComputeGroup(systemInfoService);
87+
}
88+
89+
public Set<String> getAllComputeGroupIds() {
90+
Set<String> ret = Sets.newHashSet();
91+
for (Backend backend : systemInfoService.getAllClusterBackendsNoException().values()) {
92+
ret.add(backend.getComputeGroup());
93+
}
94+
return ret;
95+
}
96+
97+
}

fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.apache.doris.catalog.Table;
3333
import org.apache.doris.catalog.Type;
3434
import org.apache.doris.common.AnalysisException;
35+
import org.apache.doris.common.Config;
3536
import org.apache.doris.common.ErrorCode;
3637
import org.apache.doris.common.Pair;
3738
import org.apache.doris.common.UserException;
@@ -57,6 +58,7 @@
5758
import org.apache.doris.proto.Types.PTypeNode;
5859
import org.apache.doris.qe.ConnectContext;
5960
import org.apache.doris.qe.SessionVariable;
61+
import org.apache.doris.resource.computegroup.ComputeGroupMgr;
6062
import org.apache.doris.rpc.BackendServiceProxy;
6163
import org.apache.doris.rpc.RpcException;
6264
import org.apache.doris.system.Backend;
@@ -234,7 +236,12 @@ public List<Column> getTableColumns() throws AnalysisException {
234236
columns = Lists.newArrayList();
235237
Backend be = getBackend();
236238
if (be == null) {
237-
throw new AnalysisException("No Alive backends");
239+
String computeGroupHints = "";
240+
if (Config.isCloudMode()) {
241+
// null: computeGroupNotFoundPromptMsg select cluster for hint msg
242+
computeGroupHints = ComputeGroupMgr.computeGroupNotFoundPromptMsg(null);
243+
}
244+
throw new AnalysisException("No Alive backends" + computeGroupHints);
238245
}
239246

240247
if (fileFormatProperties.getFileFormatType() == TFileFormatType.FORMAT_WAL) {

fe/fe-core/src/main/java/org/apache/doris/tablefunction/NumbersTableValuedFunction.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import org.apache.doris.catalog.Env;
2222
import org.apache.doris.catalog.PrimitiveType;
2323
import org.apache.doris.common.AnalysisException;
24+
import org.apache.doris.common.Config;
25+
import org.apache.doris.resource.computegroup.ComputeGroupMgr;
2426
import org.apache.doris.system.Backend;
2527
import org.apache.doris.thrift.TDataGenFunctionName;
2628
import org.apache.doris.thrift.TDataGenScanRange;
@@ -122,7 +124,12 @@ public List<TableValuedFunctionTask> getTasks() throws AnalysisException {
122124
}
123125
}
124126
if (backendList.isEmpty()) {
125-
throw new AnalysisException("No Alive backends");
127+
String computeGroupHints = "";
128+
if (Config.isCloudMode()) {
129+
// null: computeGroupNotFoundPromptMsg select cluster for hint msg
130+
computeGroupHints = ComputeGroupMgr.computeGroupNotFoundPromptMsg(null);
131+
}
132+
throw new AnalysisException("No Alive backends" + computeGroupHints);
126133
}
127134

128135
Collections.shuffle(backendList);

regression-test/suites/cloud_p0/multi_cluster/test_no_cluster_hits.groovy

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ suite('test_no_cluster_hits', 'multi_cluster, docker') {
134134
assertTrue(e.getMessage().contains("ComputeGroupException: CURRENT_USER_NO_AUTH_TO_USE_COMPUTE_GROUP"))
135135
assertTrue(e.getMessage().contains("set default compute group failed"))
136136
}
137+
sql """SET PROPERTY FOR 'root' 'default_cloud_cluster' = ${currentCluster.cluster}"""
137138

138139
// no cluster
139140
def tag = getCloudBeTagByName(currentCluster.cluster)
@@ -153,14 +154,37 @@ suite('test_no_cluster_hits', 'multi_cluster, docker') {
153154
result.size() == 0
154155
}
155156

157+
cluster.addBackend(1, "testCluster")
158+
156159
try {
157-
// errCode = 2, detailMessage = The current compute group compute_cluster is not registered in the system
158-
sql """
159-
select * from $table
160-
"""
160+
// test root's default cluster invalid
161+
connectInDocker('root', '') {
162+
sql """insert into $table values (3, 3)"""
163+
}
164+
} catch (Exception e) {
165+
logger.info("exception: {}", e.getMessage())
166+
assertTrue(e.getMessage().contains("Unable to find the compute group: <compute_cluster>"))
167+
}
168+
169+
try {
170+
connectInDocker('root', '') {
171+
sql """select * from $table"""
172+
}
161173
} catch (Exception e) {
162174
logger.info("exception: {}", e.getMessage())
163-
assertTrue(e.getMessage().contains("The current compute group compute_cluster is not registered in the system"))
175+
assertTrue(e.getMessage().contains("Unable to find the compute group: <compute_cluster>"))
164176
}
177+
178+
179+
try {
180+
// test tvf
181+
connectInDocker('root', '') {
182+
sql """select * from numbers("number" = "100")"""
183+
}
184+
} catch (Exception e) {
185+
logger.info("exception: {}", e.getMessage())
186+
assertTrue(e.getMessage().contains("how this compute group is selected: default compute group from user"))
187+
}
188+
165189
}
166190
}

0 commit comments

Comments
 (0)