Skip to content

Commit 24fbabf

Browse files
[b456377401] Implement Spark History discovery
Add SparkHistoryDiscoveryService and supporting DTOs to automatically resolve Spark History Server URLs via Knox by probing Spark 2 and Spark 3 endpoints.
1 parent 2d929d1 commit 24fbabf

33 files changed

+852
-86
lines changed

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/AbstractClouderaYarnApplicationTask.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import com.fasterxml.jackson.databind.JsonNode;
2020
import com.fasterxml.jackson.databind.node.ArrayNode;
2121
import com.google.common.base.Preconditions;
22-
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiYARNApplicationDTO;
22+
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiYarnApplicationDto;
2323
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2424
import java.io.IOException;
2525
import java.net.URI;
@@ -83,20 +83,20 @@ public PaginatedClouderaYarnApplicationsLoader(ClouderaManagerHandle handle, int
8383
toAppCreationDate = toDate.format(dtFormatter);
8484
}
8585

86-
public int load(String clusterName, Consumer<List<ApiYARNApplicationDTO>> onPageLoad) {
86+
public int load(String clusterName, Consumer<List<ApiYarnApplicationDto>> onPageLoad) {
8787
return load(clusterName, null, onPageLoad);
8888
}
8989

9090
public int load(
9191
String clusterName,
9292
@Nullable String appType,
93-
Consumer<List<ApiYARNApplicationDTO>> onPageLoad) {
93+
Consumer<List<ApiYarnApplicationDto>> onPageLoad) {
9494
offset = 0;
9595
boolean fetchedNewApps;
9696
do {
9797
fetchedNewApps = false;
9898
URI yarnAppsURI = buildNextYARNApplicationPageURI(clusterName, appType);
99-
List<ApiYARNApplicationDTO> newLoad = load(yarnAppsURI);
99+
List<ApiYarnApplicationDto> newLoad = load(yarnAppsURI);
100100
if (!newLoad.isEmpty()) {
101101
onPageLoad.accept(newLoad);
102102
offset += newLoad.size();
@@ -106,7 +106,7 @@ public int load(
106106
return offset;
107107
}
108108

109-
private List<ApiYARNApplicationDTO> load(URI yarnAppURI) {
109+
private List<ApiYarnApplicationDto> load(URI yarnAppURI) {
110110
try (CloseableHttpResponse resp = httpClient.execute(new HttpGet(yarnAppURI))) {
111111
int statusCode = resp.getStatusLine().getStatusCode();
112112
if (!isStatusCodeOK(statusCode)) {
@@ -130,10 +130,10 @@ private List<ApiYARNApplicationDTO> load(URI yarnAppURI) {
130130
}
131131
}
132132

133-
private List<ApiYARNApplicationDTO> toDTOs(ArrayNode applicationsArray) {
134-
List<ApiYARNApplicationDTO> yarnApplicationDTOs = new ArrayList<>();
133+
private List<ApiYarnApplicationDto> toDTOs(ArrayNode applicationsArray) {
134+
List<ApiYarnApplicationDto> yarnApplicationDTOs = new ArrayList<>();
135135
for (JsonNode application : applicationsArray) {
136-
yarnApplicationDTOs.add(new ApiYARNApplicationDTO(application));
136+
yarnApplicationDTOs.add(new ApiYarnApplicationDto(application));
137137
}
138138

139139
return yarnApplicationDTOs;

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaAPIHostsTask.java renamed to dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaApiHostsTask.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
2222
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaClusterDTO;
2323
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaHostDTO;
24-
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiHostDTO;
25-
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiHostListDTO;
24+
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiHostDto;
25+
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiHostListDto;
2626
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
2727
import java.io.Writer;
2828
import java.nio.charset.StandardCharsets;
@@ -38,9 +38,9 @@
3838
* href="https://archive.cloudera.com/cm7/7.11.3.0/generic/jar/cm_api/apidocs/json_ApiHostList.html">Hosts
3939
* API</a> which doesn't contain usage and disk data and collected as a fallback.
4040
*/
41-
public class ClouderaAPIHostsTask extends AbstractClouderaManagerTask {
41+
public class ClouderaApiHostsTask extends AbstractClouderaManagerTask {
4242

43-
public ClouderaAPIHostsTask() {
43+
public ClouderaApiHostsTask() {
4444
super("api-hosts.jsonl");
4545
}
4646

@@ -75,8 +75,8 @@ protected void doRun(
7575
writer.write(stringifiedHosts);
7676
writer.write('\n');
7777

78-
ApiHostListDTO apiHosts = parseJsonStringToObject(stringifiedHosts, ApiHostListDTO.class);
79-
for (ApiHostDTO apiHost : apiHosts.getHosts()) {
78+
ApiHostListDto apiHosts = parseJsonStringToObject(stringifiedHosts, ApiHostListDto.class);
79+
for (ApiHostDto apiHost : apiHosts.getHosts()) {
8080
hosts.add(ClouderaHostDTO.create(apiHost.getId(), apiHost.getName()));
8181
}
8282
}

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaClusterCPUChartTask.java renamed to dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaClusterCpuChartTask.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,12 @@
4040
* href="https://docs.cloudera.com/documentation/enterprise/latest/topics/cm_dg_tsquery.html">tsquery</a>
4141
* language.
4242
*/
43-
public class ClouderaClusterCPUChartTask extends AbstractClouderaTimeSeriesTask {
44-
private static final Logger logger = LoggerFactory.getLogger(ClouderaClusterCPUChartTask.class);
43+
public class ClouderaClusterCpuChartTask extends AbstractClouderaTimeSeriesTask {
44+
private static final Logger logger = LoggerFactory.getLogger(ClouderaClusterCpuChartTask.class);
4545
private static final String TS_CPU_QUERY_TEMPLATE =
4646
"SELECT cpu_percent_across_hosts WHERE entityName = \"%s\" AND category = CLUSTER";
4747

48-
public ClouderaClusterCPUChartTask(
48+
public ClouderaClusterCpuChartTask(
4949
ZonedDateTime startDate,
5050
ZonedDateTime endDate,
5151
TimeSeriesAggregation tsAggregation,

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaClustersTask.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
import com.google.common.collect.ImmutableList;
2121
import com.google.common.io.ByteSink;
2222
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaClusterDTO;
23-
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiClusterDTO;
24-
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiClusterListDTO;
23+
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiClusterDto;
24+
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiClusterListDto;
2525
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
2626
import java.io.Writer;
2727
import java.net.URI;
@@ -52,18 +52,18 @@ protected void doRun(
5252
throws Exception {
5353
CloseableHttpClient httpClient = handle.getClouderaManagerHttpClient();
5454

55-
ApiClusterListDTO clusterList;
55+
ApiClusterListDto clusterList;
5656

5757
if (context.getArguments().getCluster() != null) {
5858
final String clusterName = context.getArguments().getCluster();
5959
try (CloseableHttpResponse clusterResponse =
6060
httpClient.execute(new HttpGet(handle.getApiURI() + "/clusters/" + clusterName))) {
6161

62-
ApiClusterDTO cluster =
62+
ApiClusterDto cluster =
6363
parseJsonStringToObject(
64-
EntityUtils.toString(clusterResponse.getEntity()), ApiClusterDTO.class);
64+
EntityUtils.toString(clusterResponse.getEntity()), ApiClusterDto.class);
6565

66-
clusterList = new ApiClusterListDTO();
66+
clusterList = new ApiClusterListDto();
6767
clusterList.setClusters(ImmutableList.of(cluster));
6868
}
6969
} else {
@@ -74,7 +74,7 @@ protected void doRun(
7474
try (CloseableHttpResponse clustersResponse =
7575
httpClient.execute(new HttpGet(handle.getApiURI() + "/clusters?clusterType=ANY"))) {
7676
String clustersJson = EntityUtils.toString(clustersResponse.getEntity());
77-
clusterList = parseJsonStringToObject(clustersJson, ApiClusterListDTO.class);
77+
clusterList = parseJsonStringToObject(clustersJson, ApiClusterListDto.class);
7878
}
7979
}
8080

@@ -83,7 +83,7 @@ protected void doRun(
8383
}
8484

8585
List<ClouderaClusterDTO> clusters = new ArrayList<>();
86-
for (ApiClusterDTO item : clusterList.getClusters()) {
86+
for (ApiClusterDto item : clusterList.getClusters()) {
8787
String clusterId = requestClusterIdByName(httpClient, handle.getBaseURI(), item.getName());
8888
clusters.add(ClouderaClusterDTO.create(clusterId, item.getName()));
8989
}

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaCMFHostsTask.java renamed to dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaCmfHostsTask.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@
3737
* The task collects hosts from Cloudera Manager {@code /cmf/} urls. These API contains
3838
* well-structured data but is not well documented.
3939
*/
40-
public class ClouderaCMFHostsTask extends AbstractClouderaManagerTask {
40+
public class ClouderaCmfHostsTask extends AbstractClouderaManagerTask {
4141

42-
private static final Logger logger = LoggerFactory.getLogger(ClouderaCMFHostsTask.class);
42+
private static final Logger logger = LoggerFactory.getLogger(ClouderaCmfHostsTask.class);
4343

44-
public ClouderaCMFHostsTask() {
44+
public ClouderaCmfHostsTask() {
4545
super("cmf-hosts.jsonl");
4646
}
4747

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaHostRAMChartTask.java renamed to dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaHostRamChartTask.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@
3939
* href="https://docs.cloudera.com/documentation/enterprise/latest/topics/cm_dg_tsquery.html">tsquery</a>
4040
* language.
4141
*/
42-
public class ClouderaHostRAMChartTask extends AbstractClouderaTimeSeriesTask {
42+
public class ClouderaHostRamChartTask extends AbstractClouderaTimeSeriesTask {
4343

44-
private static final Logger logger = LoggerFactory.getLogger(ClouderaHostRAMChartTask.class);
44+
private static final Logger logger = LoggerFactory.getLogger(ClouderaHostRamChartTask.class);
4545

4646
private static final String TS_RAM_QUERY_TEMPLATE =
4747
"select swap_used, physical_memory_used, physical_memory_total, physical_memory_cached, physical_memory_buffers where entityName = \"%s\"";
4848

49-
public ClouderaHostRAMChartTask(
49+
public ClouderaHostRamChartTask(
5050
ZonedDateTime startDate,
5151
ZonedDateTime endDate,
5252
TimeSeriesAggregation tsAggregation,

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaManagerConnector.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ public void addTasksTo(@Nonnull List<? super Task<?>> out, @Nonnull ConnectorArg
9191
out.add(new DumpMetadataTask(arguments, FORMAT_NAME));
9292
out.add(new FormatTask(FORMAT_NAME));
9393
out.add(new ClouderaClustersTask());
94-
out.add(new ClouderaCMFHostsTask());
95-
out.add(new ClouderaAPIHostsTask());
94+
out.add(new ClouderaCmfHostsTask());
95+
out.add(new ClouderaApiHostsTask());
9696
out.add(new ClouderaServicesTask());
9797
out.add(new ClouderaHostComponentsTask());
9898

@@ -107,8 +107,8 @@ public void addTasksTo(@Nonnull List<? super Task<?>> out, @Nonnull ConnectorArg
107107
endDate = arguments.getEndDate();
108108
}
109109

110-
out.add(new ClouderaClusterCPUChartTask(startDate, endDate, DAILY, REQUIRED));
111-
out.add(new ClouderaHostRAMChartTask(startDate, endDate, DAILY, REQUIRED));
110+
out.add(new ClouderaClusterCpuChartTask(startDate, endDate, DAILY, REQUIRED));
111+
out.add(new ClouderaHostRamChartTask(startDate, endDate, DAILY, REQUIRED));
112112
out.add(new ClouderaServiceResourceAllocationChartTask(startDate, endDate, HOURLY, OPTIONAL));
113113
out.add(new ClouderaYarnApplicationsTask(startDate, endDate, OPTIONAL));
114114
out.add(new ClouderaYarnApplicationTypeTask(startDate, endDate, OPTIONAL));

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaManagerHandle.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import com.google.auto.value.AutoValue;
2020
import com.google.common.base.Preconditions;
2121
import com.google.common.collect.ImmutableList;
22+
import com.google.common.io.Closer;
2223
import com.google.edwmigration.dumper.application.dumper.handle.Handle;
2324
import java.io.IOException;
2425
import java.net.URI;
@@ -35,6 +36,7 @@ public class ClouderaManagerHandle implements Handle {
3536
private final URI apiURI;
3637
private final CloseableHttpClient clouderaManagerHttpClient;
3738
private final CloseableHttpClient basicAuthHttpClient;
39+
private final Closer closer;
3840

3941
private ImmutableList<ClouderaClusterDTO> clusters;
4042
private ImmutableList<ClouderaHostDTO> hosts;
@@ -52,6 +54,9 @@ public ClouderaManagerHandle(
5254
this.apiURI = unify(apiURI);
5355
this.clouderaManagerHttpClient = clouderaManagerHttpClient;
5456
this.basicAuthHttpClient = basicAuthHttpClient;
57+
this.closer = Closer.create();
58+
closer.register(this.clouderaManagerHttpClient);
59+
closer.register(this.basicAuthHttpClient);
5560
}
5661

5762
/** 1. Remove query params and url fragments 2. Add trailing slash for safety */
@@ -130,10 +135,9 @@ public synchronized void initSparkYarnApplications(
130135
}
131136

132137
@Override
133-
public void close() throws IOException {
138+
public synchronized void close() throws IOException {
134139
try {
135-
clouderaManagerHttpClient.close();
136-
basicAuthHttpClient.close();
140+
closer.close();
137141
} catch (IOException ignore) {
138142
// The intention is to do graceful shutdown and try to release the resource.
139143
// In case of errors we do not need to interrupt the execution flow

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaYarnApplicationTypeTask.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
2828
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaClusterDTO;
2929
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaYarnApplicationDTO;
30-
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiYARNApplicationDTO;
30+
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiYarnApplicationDto;
3131
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.model.YarnApplicationType;
3232
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
3333
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
@@ -108,9 +108,9 @@ protected void doRun(
108108
}
109109

110110
private void writeYarnAppTypes(
111-
Writer writer, List<ApiYARNApplicationDTO> yarnApps, String appType, String clusterName) {
111+
Writer writer, List<ApiYarnApplicationDto> yarnApps, String appType, String clusterName) {
112112
List<ApplicationTypeToYarnApplication> yarnAppTypeMappings = new ArrayList<>();
113-
for (ApiYARNApplicationDTO yarnApp : yarnApps) {
113+
for (ApiYarnApplicationDto yarnApp : yarnApps) {
114114
yarnAppTypeMappings.add(
115115
new ApplicationTypeToYarnApplication(yarnApp.getApplicationId(), appType, clusterName));
116116
}

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaYarnApplicationsTask.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import com.google.common.collect.ImmutableMap;
2121
import com.google.common.io.ByteSink;
2222
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaClusterDTO;
23-
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiYARNApplicationDTO;
23+
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.dto.ApiYarnApplicationDto;
2424
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2525
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
2626
import java.io.IOException;
@@ -67,8 +67,8 @@ protected void doRun(
6767
}
6868

6969
private void writeYarnApplications(
70-
Writer writer, List<ApiYARNApplicationDTO> yarnApps, String clusterName) {
71-
for (ApiYARNApplicationDTO yarnApp : yarnApps) {
70+
Writer writer, List<ApiYarnApplicationDto> yarnApps, String clusterName) {
71+
for (ApiYarnApplicationDto yarnApp : yarnApps) {
7272
yarnApp.setClusterName(clusterName);
7373
}
7474
try {

0 commit comments

Comments
 (0)