Skip to content

Commit b0fab0e

Browse files
authored
Refactor aggregate query virtual column references (groupby) (#589)
1 parent 9de150c commit b0fab0e

File tree

6 files changed

+229
-43
lines changed

6 files changed

+229
-43
lines changed

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/EbeanLocalAccess.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import com.linkedin.metadata.dao.utils.EBeanDAOUtils;
1111
import com.linkedin.metadata.dao.utils.ModelUtils;
1212
import com.linkedin.metadata.dao.utils.RecordUtils;
13-
import com.linkedin.metadata.dao.utils.SQLSchemaUtils;
13+
import com.linkedin.metadata.dao.utils.SQLIndexFilterUtils;
1414
import com.linkedin.metadata.dao.utils.SQLStatementUtils;
1515
import com.linkedin.metadata.dao.utils.SchemaValidatorUtil;
1616
import com.linkedin.metadata.events.IngestionTrackingContext;
@@ -457,12 +457,11 @@ public <ASPECT extends RecordTemplate> ListResult<ASPECT> list(@Nonnull Class<AS
457457
@Override
458458
public Map<String, Long> countAggregate(@Nullable IndexFilter indexFilter,
459459
@Nonnull IndexGroupByCriterion indexGroupByCriterion) {
460-
final String tableName = SQLSchemaUtils.getTableName(_entityType);
461-
final String groupByColumn =
462-
getGeneratedColumnName(_entityType, indexGroupByCriterion.getAspect(), indexGroupByCriterion.getPath(),
463-
_nonDollarVirtualColumnsEnabled);
464-
// first, check for existence of the column we want to GROUP BY
465-
if (!validator.columnExists(tableName, groupByColumn)) {
460+
final String indexedExpressionOrColumn =
461+
SQLIndexFilterUtils.getIndexedExpressionOrColumn(_entityType, indexGroupByCriterion.getAspect(), indexGroupByCriterion.getPath(),
462+
_nonDollarVirtualColumnsEnabled, validator);
463+
// first, check for existence of the column we want to GROUP BY --> null from the previous method call means this
464+
if (indexedExpressionOrColumn == null) {
466465
// if we are trying to GROUP BY the results on a column that does not exist, just return an empty map
467466
return Collections.emptyMap();
468467
}
@@ -497,7 +496,7 @@ private SqlQuery createFilterSqlQuery(@Nullable IndexFilter indexFilter,
497496
StringBuilder filterSql = new StringBuilder();
498497
filterSql.append(SQLStatementUtils.createFilterSql(_entityType, indexFilter, true, _nonDollarVirtualColumnsEnabled, validator));
499498
filterSql.append("\n");
500-
filterSql.append(parseSortCriteria(_entityType, indexSortCriterion, _nonDollarVirtualColumnsEnabled));
499+
filterSql.append(parseSortCriteria(_entityType, indexSortCriterion, _nonDollarVirtualColumnsEnabled, validator));
501500
filterSql.append(String.format(" LIMIT %d", Math.max(pageSize, 0)));
502501
filterSql.append(String.format(" OFFSET %d", Math.max(offset, 0)));
503502
return _server.createSqlQuery(filterSql.toString());
@@ -520,7 +519,7 @@ private SqlQuery createFilterSqlQuery(@Nullable IndexFilter indexFilter,
520519
}
521520

522521
filterSql.append("\n");
523-
filterSql.append(parseSortCriteria(_entityType, indexSortCriterion, _nonDollarVirtualColumnsEnabled));
522+
filterSql.append(parseSortCriteria(_entityType, indexSortCriterion, _nonDollarVirtualColumnsEnabled, validator));
524523
filterSql.append(String.format(" LIMIT %d", Math.max(pageSize, 0)));
525524
return _server.createSqlQuery(filterSql.toString());
526525
}

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SQLIndexFilterUtils.java

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,30 @@ private static String parseIndexValue(@Nullable IndexValue indexValue) {
6565
}
6666
}
6767

68+
/**
69+
* Get the expression index "identifier", if it exists, otherwise retrieve the generated column name.
70+
* The idea behind this is that whatever is returned from this method can be used verbatim to query the database;
71+
* it's either the expression index itself (new approach) or the virtual column (old approach).
72+
*/
73+
@Nullable
74+
public static String getIndexedExpressionOrColumn(@Nonnull String assetType, @Nonnull String aspect, @Nonnull String path,
75+
boolean nonDollarVirtualColumnsEnabled, @Nonnull SchemaValidatorUtil schemaValidator) {
76+
final String indexColumn = getGeneratedColumnName(assetType, aspect, path, nonDollarVirtualColumnsEnabled);
77+
final String tableName = getTableName(assetType);
78+
79+
// Check if an expression-based index exists... if it does, use that
80+
final String expressionIndexName = getExpressionIndexName(assetType, aspect, path);
81+
final String indexExpression = schemaValidator.getIndexExpression(tableName, expressionIndexName);
82+
if (indexExpression != null) {
83+
log.info("Using expression index '{}' in table '{}' with expression '{}'", expressionIndexName, tableName, indexExpression);
84+
return indexExpression;
85+
} else if (schemaValidator.columnExists(tableName, indexColumn)) {
86+
// (Pre-functional-index logic) Check for existence of (virtual) column
87+
return indexColumn;
88+
} else {
89+
return null;
90+
}
91+
}
6892

6993
/**
7094
* Parse {@link IndexSortCriterion} into SQL syntax.
@@ -74,19 +98,20 @@ private static String parseIndexValue(@Nullable IndexValue indexValue) {
7498
* @return SQL statement of sorting, e.g. ORDER BY ... DESC ..etc.
7599
*/
76100
public static String parseSortCriteria(@Nonnull String entityType, @Nullable IndexSortCriterion indexSortCriterion,
77-
boolean nonDollarVirtualColumnsEnabled) {
101+
boolean nonDollarVirtualColumnsEnabled, @Nonnull SchemaValidatorUtil validator) {
78102
if (indexSortCriterion == null) {
79103
// Default to order by urn if user does not provide sort criterion.
80104
return "ORDER BY URN";
81105
}
82-
final String indexColumn =
83-
SQLSchemaUtils.getGeneratedColumnName(entityType, indexSortCriterion.getAspect(), indexSortCriterion.getPath(),
84-
nonDollarVirtualColumnsEnabled);
106+
107+
final String indexedExpressionOrColumn =
108+
getIndexedExpressionOrColumn(entityType, indexSortCriterion.getAspect(), indexSortCriterion.getPath(),
109+
nonDollarVirtualColumnsEnabled, validator);
85110

86111
if (!indexSortCriterion.hasOrder()) {
87-
return "ORDER BY " + indexColumn;
112+
return "ORDER BY " + indexedExpressionOrColumn;
88113
} else {
89-
return "ORDER BY " + indexColumn + " " + (indexSortCriterion.getOrder() == SortOrder.ASCENDING ? "ASC" : "DESC");
114+
return "ORDER BY " + indexedExpressionOrColumn + " " + (indexSortCriterion.getOrder() == SortOrder.ASCENDING ? "ASC" : "DESC");
90115
}
91116
}
92117

@@ -116,22 +141,16 @@ public static String parseIndexFilter(@Nonnull String entityType, @Nullable Inde
116141
if (pathParams != null) {
117142
validateConditionAndValue(indexCriterion);
118143
final Condition condition = pathParams.getCondition();
119-
final String indexColumn = getGeneratedColumnName(entityType, aspect, pathParams.getPath(), nonDollarVirtualColumnsEnabled);
120-
final String tableName = SQLSchemaUtils.getTableName(entityType);
121-
122-
// NEW: Check if an expression-based index exists, if it does, use the new logic
123-
final String expressionIndexName = getExpressionIndexName(entityType, aspect, pathParams.getPath());
124-
final String indexExpression = schemaValidator.getIndexExpression(tableName, expressionIndexName);
125-
if (indexExpression != null) {
126-
log.debug("Using expression index '{}' in table '{}' with expression '{}'", expressionIndexName, tableName, indexExpression);
127-
sqlFilters.add(parseSqlFilter(indexExpression, condition, pathParams.getValue()));
128-
} else if (schemaValidator.columnExists(tableName, indexColumn)) {
129-
// (Pre-functional-index logic) Check for existence of (virtual) column
130-
sqlFilters.add(parseSqlFilter(indexColumn, condition, pathParams.getValue()));
131-
} else {
132-
// (Pre-functional-index logic) Skip filter if column doesn't exist
133-
log.warn("Skipping filter: virtual column '{}' not found in table '{}'", indexColumn, tableName);
144+
145+
final String indexedExpressionOrColumn =
146+
getIndexedExpressionOrColumn(entityType, aspect, pathParams.getPath(), nonDollarVirtualColumnsEnabled, schemaValidator);
147+
if (indexedExpressionOrColumn == null) {
148+
log.warn("Skipping filter: Neither expression index nor virtual column found for Aspect '{}' and Path '{}' for Asset '{}'",
149+
aspect, pathParams.getPath(), entityType);
150+
continue;
134151
}
152+
153+
sqlFilters.add(parseSqlFilter(indexedExpressionOrColumn, condition, pathParams.getValue()));
135154
}
136155
}
137156
}

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SQLSchemaUtils.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,10 @@ private static String getExpectedNameHelper(
171171

172172
/**
173173
* Get generated column name from aspect and path.
174+
* DEPRECATED, when attempting to obtain an indexed value, please use
175+
* {@link SQLIndexFilterUtils#getIndexedExpressionOrColumn(String, String, String, boolean, SchemaValidatorUtil)} instead.
174176
*/
177+
@Deprecated
175178
@Nonnull
176179
public static String getGeneratedColumnName(@Nonnull String assetType, @Nonnull String aspect, @Nonnull String path,
177180
boolean nonDollarVirtualColumnsEnabled) {

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SQLStatementUtils.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -362,21 +362,21 @@ public static String createFilterSql(String entityType, @Nullable IndexFilter in
362362
*/
363363
public static String createGroupBySql(String entityType, @Nullable IndexFilter indexFilter,
364364
@Nonnull IndexGroupByCriterion indexGroupByCriterion, boolean nonDollarVirtualColumnsEnabled, @Nonnull SchemaValidatorUtil schemaValidator) {
365-
final String tableName = getTableName(entityType);
366-
final String columnName =
367-
getGeneratedColumnName(entityType, indexGroupByCriterion.getAspect(), indexGroupByCriterion.getPath(),
368-
nonDollarVirtualColumnsEnabled);
369-
// Check if the column exists in the schema
370-
if (!schemaValidator.columnExists(tableName, columnName)) {
371-
log.warn("Skipping group-by: column '{}' not found in table '{}'", columnName, tableName);
365+
final String indexedExpressionOrColumn =
366+
SQLIndexFilterUtils.getIndexedExpressionOrColumn(entityType, indexGroupByCriterion.getAspect(), indexGroupByCriterion.getPath(),
367+
nonDollarVirtualColumnsEnabled, schemaValidator);
368+
if (indexedExpressionOrColumn == null) {
369+
log.warn("Skipping group-by: Neither expression index nor virtual column found for Aspect '{}' and Path '{}' for Asset '{}'",
370+
indexGroupByCriterion.getAspect(), indexGroupByCriterion.getPath(), entityType);
372371
return ""; // skip query generation
373372
}
373+
374374
StringBuilder sb = new StringBuilder();
375-
sb.append(String.format(INDEX_GROUP_BY_CRITERION, columnName, tableName));
375+
sb.append(String.format(INDEX_GROUP_BY_CRITERION, indexedExpressionOrColumn, getTableName(entityType)));
376376
sb.append("\n");
377377
sb.append(parseIndexFilter(entityType, indexFilter, nonDollarVirtualColumnsEnabled, schemaValidator));
378378
sb.append("\nGROUP BY ");
379-
sb.append(columnName);
379+
sb.append(indexedExpressionOrColumn);
380380
return sb.toString();
381381
}
382382

dao-impl/ebean-dao/src/test/java/com/linkedin/metadata/dao/utils/SQLIndexFilterUtilsTest.java

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,18 +55,30 @@ public void testParseSortCriteria() throws URISyntaxException {
5555
assertEquals(indexSortCriterion.getOrder(), SortOrder.ASCENDING);
5656
assertEquals(indexSortCriterion.getAspect(), AspectFoo.class.getCanonicalName());
5757

58-
String sql1 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, false);
58+
String sql1 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, false, mockValidator);
5959
assertEquals(sql1, "ORDER BY i_aspectfoo$id ASC");
6060

61-
String sql2 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, true);
61+
String sql2 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, true, mockValidator);
6262
assertEquals(sql2, "ORDER BY i_aspectfoo0id ASC");
6363

6464
indexSortCriterion.setOrder(SortOrder.DESCENDING);
65-
sql1 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, false);
65+
sql1 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, false, mockValidator);
6666
assertEquals(sql1, "ORDER BY i_aspectfoo$id DESC");
6767

68-
sql2 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, true);
68+
sql2 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, true, mockValidator);
6969
assertEquals(sql2, "ORDER BY i_aspectfoo0id DESC");
70+
71+
72+
// new tests that test with functional index, additionally ensuring sql parsability
73+
indexSortCriterion = SQLIndexFilterUtils.createIndexSortCriterion(AspectBar.class, "value", SortOrder.ASCENDING);
74+
sql1 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, false, mockValidator);
75+
assertEquals(sql1, "ORDER BY (cast(json_extract(`a_aspectbar`, '$.aspect.value') as char(1024))) ASC");
76+
assertValidSql(sql1);
77+
78+
// results should not change even with "nonDollar..." enabled... the functional index will always be '0'-delimited
79+
sql2 = SQLIndexFilterUtils.parseSortCriteria(fooUrn.getEntityType(), indexSortCriterion, true, mockValidator);
80+
assertEquals(sql2, "ORDER BY (cast(json_extract(`a_aspectbar`, '$.aspect.value') as char(1024))) ASC");
81+
assertValidSql(sql2);
7082
}
7183

7284
@Test
@@ -210,4 +222,22 @@ public void testParseIndexFilterWithFunctionalIndex() {
210222
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
211223
expectedSql10);
212224
}
225+
226+
@Test
227+
public void testGetIndexedExpressionOrColumn() {
228+
// Get something that is NOT an expression (not mocked) -- '$' variant
229+
assertEquals(SQLIndexFilterUtils.getIndexedExpressionOrColumn(FooUrn.ENTITY_TYPE, AspectFoo.class.getCanonicalName(), "value",
230+
false, mockValidator),
231+
"i_aspectfoo$value");
232+
233+
// Get something that is NOT an expression (not mocked) -- '0' variant
234+
assertEquals(SQLIndexFilterUtils.getIndexedExpressionOrColumn(FooUrn.ENTITY_TYPE, AspectFoo.class.getCanonicalName(), "value",
235+
true, mockValidator),
236+
"i_aspectfoo0value");
237+
238+
// Get something that is an expression (mocked)
239+
assertEquals(SQLIndexFilterUtils.getIndexedExpressionOrColumn(FooUrn.ENTITY_TYPE, AspectBar.class.getCanonicalName(), "value",
240+
false, mockValidator),
241+
"(cast(json_extract(`a_aspectbar`, '$.aspect.value') as char(1024)))");
242+
}
213243
}

0 commit comments

Comments
 (0)