Skip to content

Commit 5e1f94a

Browse files
authored
percentile histograms on by default -- which enables for catalog metadata latency (#374)
## Summary the catalog metadata latency metric has high variance and is affected by size of request. so measuring by max, which is the only available metric type, does not give enough information for load testing. also, percentiles has a hidden key word "all" which enables turning on histogram by default for all @timed annotated functions. so this is a bit of a refactor which will also help other metrics such as authentication timers. ## Changes - [ ] Client-facing API Changes - [ ] Internal API Changes - [X] Bug Fixes - [ ] New Features - [X] Performance Improvements - [ ] Code Style - [ ] Refactoring - [ ] Documentation - [X] Tests ## Testing Done - [X] Manually Tested on local docker setup. Please include commands ran, and their output. - [X] Added new tests for the changes made. - [ ] Updated existing tests to reflect the changes made. - [ ] No tests added or updated. Please explain why. If unsure, please feel free to ask for help. - [ ] Some other form of testing like staging or soak time in production. Please explain. example: <img width="1932" height="563" alt="image" src="https://github.com/user-attachments/assets/51865d80-05b3-4230-a86a-9155a45d3bf7" /> # Additional Information - [ ] Breaking Changes - [ ] Deprecations - [ ] Large PR broken into smaller PRs, and PR plan linked in the description.
1 parent 6fbdc41 commit 5e1f94a

File tree

4 files changed

+117
-17
lines changed

4 files changed

+117
-17
lines changed

iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,22 @@ void testCommitMetadataUpdateIncludesDatabaseTag() {
989989
"Timer should not have table tag (only database dimension should be included)");
990990
}
991991

992+
@Test
993+
void testRefreshMetadataLatencyHasHistogramBuckets() {
994+
testMetricHasHistogramBuckets(
995+
InternalCatalogMetricsConstant.METADATA_RETRIEVAL_LATENCY,
996+
this::setupRefreshMetadataTest,
997+
this::executeRefreshMetadata);
998+
}
999+
1000+
@Test
1001+
void testCommitMetadataUpdateLatencyHasHistogramBuckets() {
1002+
testMetricHasHistogramBuckets(
1003+
InternalCatalogMetricsConstant.METADATA_UPDATE_LATENCY,
1004+
this::setupCommitMetadataTest,
1005+
this::executeCommitMetadata);
1006+
}
1007+
9921008
/**
9931009
* Common test method for verifying metrics include database tag but not table tag.
9941010
*
@@ -1029,6 +1045,69 @@ private void testMetricIncludesDatabaseTag(
10291045
verifyMetricTags(meterRegistry, expectedMetricSuffix, noTableTagMessage);
10301046
}
10311047

1048+
/**
1049+
* Common test method for verifying that Timer metrics have histogram buckets configured.
1050+
*
1051+
* @param expectedMetricSuffix The metric name suffix (without catalog prefix)
1052+
* @param setupFunction Function to set up test-specific mocks
1053+
* @param executeFunction Function to execute the operation that should record metrics
1054+
*/
1055+
private void testMetricHasHistogramBuckets(
1056+
String expectedMetricSuffix,
1057+
Consumer<OpenHouseInternalTableOperations> setupFunction,
1058+
Consumer<OpenHouseInternalTableOperations> executeFunction) {
1059+
1060+
// Create a real SimpleMeterRegistry with histogram configuration
1061+
SimpleMeterRegistry meterRegistry = new SimpleMeterRegistry();
1062+
1063+
// Configure the registry to enable histogram buckets for all timers
1064+
// This mimics the application.properties setting:
1065+
// management.metrics.distribution.percentiles-histogram.all=true
1066+
meterRegistry
1067+
.config()
1068+
.meterFilter(
1069+
new io.micrometer.core.instrument.config.MeterFilter() {
1070+
@Override
1071+
public io.micrometer.core.instrument.distribution.DistributionStatisticConfig
1072+
configure(
1073+
io.micrometer.core.instrument.Meter.Id id,
1074+
io.micrometer.core.instrument.distribution.DistributionStatisticConfig
1075+
config) {
1076+
if (id.getType() == io.micrometer.core.instrument.Meter.Type.TIMER) {
1077+
return io.micrometer.core.instrument.distribution.DistributionStatisticConfig
1078+
.builder()
1079+
.percentilesHistogram(true)
1080+
.build()
1081+
.merge(config);
1082+
}
1083+
return config;
1084+
}
1085+
});
1086+
1087+
MetricsReporter realMetricsReporter =
1088+
new MetricsReporter(meterRegistry, "TEST_CATALOG", Lists.newArrayList());
1089+
1090+
// Create instance with real metrics reporter
1091+
OpenHouseInternalTableOperations operationsWithRealMetrics =
1092+
new OpenHouseInternalTableOperations(
1093+
mockHouseTableRepository,
1094+
new HadoopFileIO(new Configuration()),
1095+
Mockito.mock(SnapshotInspector.class),
1096+
mockHouseTableMapper,
1097+
TEST_TABLE_IDENTIFIER,
1098+
realMetricsReporter,
1099+
fileIOManager);
1100+
1101+
// Setup test-specific mocks
1102+
setupFunction.accept(operationsWithRealMetrics);
1103+
1104+
// Execute the operation that should record the metric
1105+
executeFunction.accept(operationsWithRealMetrics);
1106+
1107+
// Verify the metric has histogram buckets
1108+
verifyMetricHistogramBuckets(meterRegistry, expectedMetricSuffix);
1109+
}
1110+
10321111
/** Sets up mocks specific to refresh metadata tests. */
10331112
private void setupRefreshMetadataTest(OpenHouseInternalTableOperations operations) {
10341113
HouseTablePrimaryKey primaryKey =
@@ -1111,4 +1190,39 @@ private void verifyMetricTags(
11111190
// Verify the timer was actually used (count > 0)
11121191
Assertions.assertTrue(timer.count() > 0, "Timer should have been used at least once");
11131192
}
1193+
1194+
/**
1195+
* Verifies that a Timer metric has histogram buckets configured.
1196+
*
1197+
* @param meterRegistry The meter registry to search for metrics
1198+
* @param expectedMetricSuffix The expected metric name suffix
1199+
*/
1200+
private void verifyMetricHistogramBuckets(
1201+
SimpleMeterRegistry meterRegistry, String expectedMetricSuffix) {
1202+
String expectedMetricName = "TEST_CATALOG_" + expectedMetricSuffix;
1203+
1204+
// Find the timer in the registry
1205+
io.micrometer.core.instrument.Timer timer = meterRegistry.find(expectedMetricName).timer();
1206+
Assertions.assertNotNull(timer, "Timer should be created");
1207+
1208+
// Verify the timer was actually used (count > 0)
1209+
Assertions.assertTrue(timer.count() > 0, "Timer should have been used at least once");
1210+
1211+
// Get the timer's snapshot to access histogram data
1212+
io.micrometer.core.instrument.distribution.HistogramSnapshot snapshot = timer.takeSnapshot();
1213+
1214+
// Verify histogram buckets are present
1215+
io.micrometer.core.instrument.distribution.CountAtBucket[] buckets = snapshot.histogramCounts();
1216+
Assertions.assertNotNull(buckets, "Timer should have histogram buckets");
1217+
1218+
// Verify that basic histogram statistics are available
1219+
// Check that total time and max are not null (and not NaN)
1220+
double totalTime = timer.totalTime(java.util.concurrent.TimeUnit.NANOSECONDS);
1221+
double maxTime = timer.max(java.util.concurrent.TimeUnit.NANOSECONDS);
1222+
1223+
// In Micrometer, totalTime and max return primitive doubles, never null, but may be 0 or NaN.
1224+
// So, assertNotNull is not meaningful for primitives; instead, check for NaN.
1225+
Assertions.assertFalse(Double.isNaN(totalTime), "Timer total time should not be NaN");
1226+
Assertions.assertFalse(Double.isNaN(maxTime), "Timer max time should not be NaN");
1227+
}
11141228
}

services/housetables/src/main/resources/application.properties

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,5 @@ management.endpoint.health.enabled=true
1212
management.endpoint.shutdown.enabled=true
1313
management.endpoint.prometheus.enabled=true
1414
management.endpoint.beans.enabled=true
15-
management.metrics.web.server.request.autotime.percentiles=0.5,0.75,0.9,0.95,0.99
16-
management.metrics.distribution.percentiles-histogram.http.server.requests=true
17-
management.metrics.distribution.percentiles-histogram.jvm.gc.pause=true
18-
management.metrics.distribution.percentiles-histogram.hikaricp.connections.acquire=true
19-
management.metrics.distribution.percentiles-histogram.hikaricp.connections.usage=true
20-
management.metrics.distribution.percentiles-histogram.hikaricp.connections.creation=true
2115
spring.datasource.hikari.maximum-pool-size=20
22-
management.metrics.distribution.percentiles.http.server.requests=
16+
management.metrics.distribution.percentiles-histogram.all=true

services/jobs/src/main/resources/application.properties

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,4 @@ management.endpoint.health.enabled=true
1414
management.endpoint.shutdown.enabled=true
1515
management.endpoint.prometheus.enabled=true
1616
management.endpoint.beans.enabled=true
17-
management.metrics.web.server.request.autotime.percentiles=0.5,0.75,0.9,0.95,0.99
18-
management.metrics.distribution.percentiles-histogram.http.server.requests=true
19-
management.metrics.distribution.percentiles.http.server.requests=
17+
management.metrics.distribution.percentiles-histogram.all=true

services/tables/src/main/resources/application.properties

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,4 @@ management.endpoint.health.enabled=true
1717
management.endpoint.shutdown.enabled=true
1818
management.endpoint.prometheus.enabled=true
1919
management.endpoint.beans.enabled=true
20-
management.metrics.web.server.request.autotime.percentiles=0.5,0.75,0.9,0.95,0.99
21-
management.metrics.distribution.percentiles-histogram.http.server.requests=true
22-
management.metrics.distribution.percentiles-histogram.jvm.gc.pause=true
23-
management.metrics.distribution.percentiles-histogram.hikaricp.connections.acquire=true
24-
management.metrics.distribution.percentiles-histogram.hikaricp.connections.usage=true
25-
management.metrics.distribution.percentiles-histogram.hikaricp.connections.creation=true
26-
management.metrics.distribution.percentiles.http.server.requests=
20+
management.metrics.distribution.percentiles-histogram.all=true

0 commit comments

Comments
 (0)