Skip to content

Commit 00d3ff1

Browse files
authored
Wire up Functional Index logic + testing (#586)
1 parent 1ae7f1a commit 00d3ff1

File tree

6 files changed

+193
-30
lines changed

6 files changed

+193
-30
lines changed

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ project.ext.spec = [
3434

3535
project.ext.externalDependency = [
3636
'assertJ': 'org.assertj:assertj-core:3.11.1',
37+
'calcite': 'org.apache.calcite:calcite-core:1.32.0',
3738
'commonsIo': 'commons-io:commons-io:2.16.1',
3839
'commonsLang': 'commons-lang:commons-lang:2.6',
3940
'caffeine': 'com.github.ben-manes.caffeine:caffeine:2.9.2',

dao-impl/ebean-dao/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ dependencies {
2929
testCompile externalDependency.mockito
3030
testCompile externalDependency.mockitoInline
3131
testCompile externalDependency.maria4j
32+
testCompile externalDependency.calcite
3233
enhance externalDependency.ebeanAgent
3334
}
3435

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SQLIndexFilterUtils.java

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -119,22 +119,20 @@ public static String parseIndexFilter(@Nonnull String entityType, @Nullable Inde
119119
final String indexColumn = getGeneratedColumnName(entityType, aspect, pathParams.getPath(), nonDollarVirtualColumnsEnabled);
120120
final String tableName = SQLSchemaUtils.getTableName(entityType);
121121

122-
// NEW / TODO: Check if an expression-based index exists, if it does, use the new logic
122+
// NEW: Check if an expression-based index exists, if it does, use the new logic
123+
// NOTE: With functional indexes, we establish that the index name is EXPECTED to follow the same naming
124+
// convention as old column naming to keep things consistent.
123125
final String indexExpression = schemaValidator.getIndexExpression(tableName, indexColumn);
124126
if (indexExpression != null) {
125127
log.debug("Using expression index '{}' in table '{}' with expression '{}'", indexColumn, tableName, indexExpression);
126-
//// Commenting this out for now... to be extra safe, will not currently make this queryable yet
127-
//// and should verify that the above debug log is printed to properly acknoledge an expression.
128-
// sqlFilters.add(parseSqlFilter(indexExpression, condition, pathParams.getValue()));
129-
}
130-
131-
// FOR NOW: keep old logic to allow parallel usage of new indices and validation
132-
if (!schemaValidator.columnExists(tableName, indexColumn)) {
133-
// Else: (old logic) Skip filter if column doesn't exist
128+
sqlFilters.add(parseSqlFilter(indexExpression, condition, pathParams.getValue()));
129+
} else if (schemaValidator.columnExists(tableName, indexColumn)) {
130+
// (Pre-functional-index logic) Check for existence of (virtual) column
131+
sqlFilters.add(parseSqlFilter(indexColumn, condition, pathParams.getValue()));
132+
} else {
133+
// (Pre-functional-index logic) Skip filter if column doesn't exist
134134
log.warn("Skipping filter: virtual column '{}' not found in table '{}'", indexColumn, tableName);
135-
continue;
136135
}
137-
sqlFilters.add(parseSqlFilter(indexColumn, condition, pathParams.getValue()));
138136
}
139137
}
140138
}
@@ -148,42 +146,43 @@ public static String parseIndexFilter(@Nonnull String entityType, @Nullable Inde
148146

149147
/**
150148
* Parse condition expression.
151-
* @param indexColumn the virtual generated column
149+
* @param index the name of the virtual generated column OR the actual expression of a functional index
150+
* (TODO: is this valid?) Note: the functional index is expected to be wrapped in parentheses already.
152151
* @param condition {@link Condition} filter condition
153152
* @param indexValue {@link IndexValue} index value
154153
* @return SQL expression of the condition expression
155154
*/
156-
private static String parseSqlFilter(String indexColumn, Condition condition, IndexValue indexValue) {
155+
private static String parseSqlFilter(String index, Condition condition, IndexValue indexValue) {
157156
switch (condition) {
158157
// TODO: add validation to check that the index column value is an array type
159158
case ARRAY_CONTAINS:
160-
return String.format("'%s' MEMBER OF(%s)", parseIndexValue(indexValue), indexColumn);
159+
return String.format("'%s' MEMBER OF(%s)", parseIndexValue(indexValue), index); // JSON Array
161160
case CONTAIN:
162-
return String.format("JSON_SEARCH(%s, 'one', '%s') IS NOT NULL", indexColumn, parseIndexValue(indexValue));
161+
return String.format("JSON_SEARCH(%s, 'one', '%s') IS NOT NULL", index, parseIndexValue(indexValue)); // JSON String, Array, Struct
163162
case IN:
164-
return indexColumn + " IN " + parseIndexValue(indexValue);
163+
return index + " IN " + parseIndexValue(indexValue); // note the usage here is "swapped": indexValue IN (input)
165164
case EQUAL:
166165
if (indexValue.isString() || indexValue.isBoolean()) {
167-
return indexColumn + " = '" + parseIndexValue(indexValue) + "'";
166+
return index + " = '" + parseIndexValue(indexValue) + "'";
168167
}
169168

170169
if (indexValue.isArray()) {
171-
return indexColumn + " = '" + convertToJsonArray(indexValue.getArray()) + "'";
170+
return index + " = '" + convertToJsonArray(indexValue.getArray()) + "'";
172171
}
173172

174-
return indexColumn + " = " + parseIndexValue(indexValue);
173+
return index + " = " + parseIndexValue(indexValue);
175174
case START_WITH:
176-
return indexColumn + " LIKE '" + parseIndexValue(indexValue) + "%'";
175+
return index + " LIKE '" + parseIndexValue(indexValue) + "%'";
177176
case END_WITH:
178-
return indexColumn + " LIKE '%" + parseIndexValue(indexValue) + "'";
177+
return index + " LIKE '%" + parseIndexValue(indexValue) + "'";
179178
case GREATER_THAN_OR_EQUAL_TO:
180-
return indexColumn + " >= " + parseIndexValue(indexValue);
179+
return index + " >= " + parseIndexValue(indexValue);
181180
case GREATER_THAN:
182-
return indexColumn + " > " + parseIndexValue(indexValue);
181+
return index + " > " + parseIndexValue(indexValue);
183182
case LESS_THAN_OR_EQUAL_TO:
184-
return indexColumn + " <= " + parseIndexValue(indexValue);
183+
return index + " <= " + parseIndexValue(indexValue);
185184
case LESS_THAN:
186-
return indexColumn + " < " + parseIndexValue(indexValue);
185+
return index + " < " + parseIndexValue(indexValue);
187186
default:
188187
throw new UnsupportedOperationException("Unsupported condition operation: " + condition);
189188
}

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SchemaValidatorUtil.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,22 +119,22 @@ public boolean indexExists(@Nonnull String tableName, @Nonnull String indexName)
119119

120120

121121
/**
122-
* Cleans SQL expression by removing MySQL-specific encoding artifacts.
123-
* Removes _utf8mb4 charset prefix, unescapes quotes, and removes newlines.
122+
* Cleans SQL expression by removing MySQL-specific encoding artifacts that otherwise result in unrecognized syntax.
123+
* Removes _utf8mb4 and _utf8mb3 charset prefix, unescapes quotes, and removes newlines.
124124
* MySQL team is the POC for questions about this since there is preprocessing needed to transform the as-is
125125
* index expression from the index table to a (string) expression that is usable directly in an indexed query.
126126
*
127127
* @param expression Raw SQL expression from database
128128
* @return Cleaned expression string, with enclosing parentheses
129129
*/
130-
@VisibleForTesting
131-
protected String cleanIndexExpression(@Nullable String expression) {
130+
public static String cleanIndexExpression(@Nullable String expression) {
132131
if (expression == null) {
133132
return null;
134133
}
135134

136135
return "(" + expression
137136
.replace("_utf8mb4\\'", "'")
137+
.replace("_utf8mb3\\'", "'")
138138
.replace("\\'", "'")
139139
.replace("\\\"", "\"")
140140
.replace("\n", "") + ")";

dao-impl/ebean-dao/src/test/java/com/linkedin/metadata/dao/utils/SQLIndexFilterUtilsTest.java

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
package com.linkedin.metadata.dao.utils;
22

3+
import com.linkedin.data.template.StringArray;
34
import com.linkedin.metadata.query.Condition;
45
import com.linkedin.metadata.query.IndexCriterion;
56
import com.linkedin.metadata.query.IndexCriterionArray;
67
import com.linkedin.metadata.query.IndexFilter;
78
import com.linkedin.metadata.query.IndexSortCriterion;
89
import com.linkedin.metadata.query.IndexValue;
910
import com.linkedin.metadata.query.SortOrder;
11+
import com.linkedin.testing.AspectBar;
1012
import com.linkedin.testing.AspectFoo;
1113
import com.linkedin.testing.urn.FooUrn;
1214
import java.net.URISyntaxException;
15+
import org.apache.calcite.config.Lex;
16+
import org.apache.calcite.sql.parser.SqlParser;
1317
import org.testng.annotations.BeforeClass;
1418
import org.testng.annotations.Test;
1519

@@ -26,6 +30,21 @@ public class SQLIndexFilterUtilsTest {
2630
public void setupValidator() {
2731
mockValidator = mock(SchemaValidatorUtil.class);
2832
when(mockValidator.columnExists(anyString(), anyString())).thenReturn(true);
33+
34+
/////// NEW MOCKS for functional index testing
35+
// NOTE that " charset utf8mb4" is appended after "char(1024)" for some of the (real) use cases in our DB's
36+
// but does NOT pass Calcite's syntax checker. However, in actual queries, it is appended as a part of the index
37+
// and works just fine: we will omit it here so that we can check the syntax otherwise.
38+
39+
// "AspectBar" as the aspect with a functional index and "value" as the field (path) to be indexed
40+
when(mockValidator.getIndexExpression(anyString(), matches("i_aspectbar[$0]value")))
41+
.thenReturn("(cast(json_extract(`a_aspectbar`, '$.aspect.value') as char(1024)))");
42+
// This is an existing new way of Array extraction (AssetLabels.derived_labels)
43+
when(mockValidator.getIndexExpression(anyString(), matches("i_aspectbar[$0]value_array")))
44+
.thenReturn("(cast(json_extract(`a_aspectbar`, '$.aspect.value_array') as char(128) array))");
45+
// This is an existing legacy way of array extraction, casting to a string (DataPolicyInfo.annotation.ontologyIris)
46+
when(mockValidator.getIndexExpression(anyString(), matches("i_aspectbar[$0]annotation[$0]ontologyIris")))
47+
.thenReturn("(cast(replace(json_unquote(json_extract(`a_aspectbar`,'$.aspect.annotation.ontologyIris[*]')),'\"','') as char(255)))");
2948
}
3049

3150
@Test
@@ -67,4 +86,128 @@ public void testParseIndexFilter() {
6786
assertEquals(sql,
6887
"WHERE a_aspectfoo IS NOT NULL\nAND JSON_EXTRACT(a_aspectfoo, '$.gma_deleted') IS NULL\nAND i_aspectfoo0id < 12\nAND deleted_ts IS NULL");
6988
}
89+
90+
private static void assertValidSql(String sql) {
91+
final String sqlPrefix = "SELECT * FROM metadata_entity_fakeplaceholder\n";
92+
try {
93+
SqlParser.create(sqlPrefix + sql, SqlParser.config().withLex(Lex.MYSQL)).parseQuery();
94+
} catch (Exception e) {
95+
System.err.println("\nINPUT: " + sqlPrefix + sql);
96+
throw new AssertionError("Expected valid SQL but got exception: " + e.getMessage());
97+
}
98+
}
99+
100+
// Note that the other tests passing with the existing mocks inherently test the non-functional index case
101+
@SuppressWarnings("checkstyle:LineLength")
102+
@Test
103+
public void testParseIndexFilterWithFunctionalIndex() {
104+
// set up reusable index filter code
105+
IndexFilter indexFilter = new IndexFilter();
106+
IndexCriterionArray indexCriterionArray = new IndexCriterionArray();
107+
indexFilter.setCriteria(indexCriterionArray);
108+
109+
// Test 1: ARRAY_CONTAINS
110+
indexCriterionArray.add(0,
111+
SQLIndexFilterUtils.createIndexCriterion(AspectBar.class, "value_array", Condition.ARRAY_CONTAINS,
112+
IndexValue.create(12L)));
113+
final String expectedSql1 =
114+
"WHERE a_aspectbar IS NOT NULL\nAND JSON_EXTRACT(a_aspectbar, '$.gma_deleted') IS NULL\nAND '12' MEMBER OF((cast(json_extract(`a_aspectbar`, '$.aspect.value_array') as char(128) array)))\nAND deleted_ts IS NULL";
115+
assertValidSql(expectedSql1); // assert that the expected SQL is valid to begin with
116+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, false, mockValidator),
117+
expectedSql1);
118+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
119+
expectedSql1);
120+
121+
// Test 2.0: CONTAIN with simple string value
122+
indexCriterionArray.set(0,
123+
SQLIndexFilterUtils.createIndexCriterion(AspectBar.class, "value", Condition.CONTAIN, IndexValue.create(12L)));
124+
final String expectedSql20 =
125+
"WHERE a_aspectbar IS NOT NULL\nAND JSON_EXTRACT(a_aspectbar, '$.gma_deleted') IS NULL\nAND JSON_SEARCH((cast(json_extract(`a_aspectbar`, '$.aspect.value') as char(1024))), 'one', '12') IS NOT NULL\nAND deleted_ts IS NULL";
126+
assertValidSql(expectedSql20); // assert that the expected SQL is valid to begin with
127+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, false, mockValidator),
128+
expectedSql20);
129+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
130+
expectedSql20);
131+
132+
// Test 2.1: CONTAIN with array value
133+
indexCriterionArray.set(0,
134+
SQLIndexFilterUtils.createIndexCriterion(AspectBar.class, "value_array", Condition.CONTAIN,
135+
IndexValue.create(12L)));
136+
final String expectedSql21 =
137+
"WHERE a_aspectbar IS NOT NULL\nAND JSON_EXTRACT(a_aspectbar, '$.gma_deleted') IS NULL\nAND JSON_SEARCH((cast(json_extract(`a_aspectbar`, '$.aspect.value_array') as char(128) array)), 'one', '12') IS NOT NULL\nAND deleted_ts IS NULL";
138+
assertValidSql(expectedSql21); // assert that the expected SQL is valid to begin with
139+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, false, mockValidator),
140+
expectedSql21);
141+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
142+
expectedSql21);
143+
144+
// Test 2.2: CONTAIN with string value (note that this is a legacy way of array extraction, which results in a string)
145+
indexCriterionArray.set(0,
146+
SQLIndexFilterUtils.createIndexCriterion(AspectBar.class, "annotation/ontologyIris", Condition.CONTAIN,
147+
IndexValue.create(12L)));
148+
final String expectedSql22 =
149+
"WHERE a_aspectbar IS NOT NULL\nAND JSON_EXTRACT(a_aspectbar, '$.gma_deleted') IS NULL\nAND JSON_SEARCH((cast(replace(json_unquote(json_extract(`a_aspectbar`,'$.aspect.annotation.ontologyIris[*]')),'\"','') as char(255))), 'one', '12') IS NOT NULL\nAND deleted_ts IS NULL";
150+
assertValidSql(expectedSql22); // assert that the expected SQL is valid to begin with
151+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, false, mockValidator),
152+
expectedSql22);
153+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
154+
expectedSql22);
155+
156+
// Test 3: IN
157+
indexCriterionArray.set(0, SQLIndexFilterUtils.createIndexCriterion(AspectBar.class, "value", Condition.IN,
158+
IndexValue.create(new StringArray("six", "seven"))));
159+
final String expectedSql3 =
160+
"WHERE a_aspectbar IS NOT NULL\nAND JSON_EXTRACT(a_aspectbar, '$.gma_deleted') IS NULL\nAND (cast(json_extract(`a_aspectbar`, '$.aspect.value') as char(1024))) IN ('six', 'seven')\nAND deleted_ts IS NULL";
161+
assertValidSql(expectedSql3); // assert that the expected SQL is valid to begin with
162+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, false, mockValidator),
163+
expectedSql3);
164+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
165+
expectedSql3);
166+
167+
// Test 4.1: EQUAL -- with string
168+
indexCriterionArray.set(0,
169+
SQLIndexFilterUtils.createIndexCriterion(AspectBar.class, "value", Condition.EQUAL, IndexValue.create("six")));
170+
final String expectedSql41 =
171+
"WHERE a_aspectbar IS NOT NULL\nAND JSON_EXTRACT(a_aspectbar, '$.gma_deleted') IS NULL\nAND (cast(json_extract(`a_aspectbar`, '$.aspect.value') as char(1024))) = 'six'\nAND deleted_ts IS NULL";
172+
assertValidSql(expectedSql41); // assert that the expected SQL is valid to begin with
173+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, false, mockValidator),
174+
expectedSql41);
175+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
176+
expectedSql41);
177+
178+
// Test 4.2: EQUAL -- with JSON array
179+
indexCriterionArray.set(0,
180+
SQLIndexFilterUtils.createIndexCriterion(AspectBar.class, "value_array", Condition.EQUAL,
181+
IndexValue.create(new StringArray("six", "seven"))));
182+
final String expectedSql42 =
183+
"WHERE a_aspectbar IS NOT NULL\nAND JSON_EXTRACT(a_aspectbar, '$.gma_deleted') IS NULL\nAND (cast(json_extract(`a_aspectbar`, '$.aspect.value_array') as char(128) array)) = '[\"six\", \"seven\"]'\nAND deleted_ts IS NULL";
184+
assertValidSql(expectedSql42); // assert that the expected SQL is valid to begin with
185+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, false, mockValidator),
186+
expectedSql42);
187+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
188+
expectedSql42);
189+
190+
// Test 5: START_WITH -- will arbitrarily use "annotation/ontologyIris", the more complex string option
191+
indexCriterionArray.set(0,
192+
SQLIndexFilterUtils.createIndexCriterion(AspectBar.class, "annotation/ontologyIris", Condition.START_WITH,
193+
IndexValue.create("six")));
194+
final String expectedSql5 =
195+
"WHERE a_aspectbar IS NOT NULL\nAND JSON_EXTRACT(a_aspectbar, '$.gma_deleted') IS NULL\nAND (cast(replace(json_unquote(json_extract(`a_aspectbar`,'$.aspect.annotation.ontologyIris[*]')),'\"','') as char(255))) LIKE 'six%'\nAND deleted_ts IS NULL";
196+
assertValidSql(expectedSql5); // assert that the expected SQL is valid to begin with
197+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, false, mockValidator),
198+
expectedSql5);
199+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
200+
expectedSql5);
201+
202+
// Test 10: LESS_THAN
203+
indexCriterionArray.set(0, SQLIndexFilterUtils.createIndexCriterion(AspectBar.class, "value", Condition.LESS_THAN,
204+
IndexValue.create(12L)));
205+
final String expectedSql10 =
206+
"WHERE a_aspectbar IS NOT NULL\nAND JSON_EXTRACT(a_aspectbar, '$.gma_deleted') IS NULL\nAND (cast(json_extract(`a_aspectbar`, '$.aspect.value') as char(1024))) < 12\nAND deleted_ts IS NULL";
207+
assertValidSql(expectedSql10); // assert that the expected SQL is valid to begin with
208+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, false, mockValidator),
209+
expectedSql10);
210+
assertEquals(SQLIndexFilterUtils.parseIndexFilter(FooUrn.ENTITY_TYPE, indexFilter, true, mockValidator),
211+
expectedSql10);
212+
}
70213
}

0 commit comments

Comments
 (0)