Skip to content

Commit 0732056

Browse files
authored
(feat) Add Expression Index Support Utility (#585)
1 parent fe4bc21 commit 0732056

File tree

3 files changed

+173
-2
lines changed

3 files changed

+173
-2
lines changed

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SQLIndexFilterUtils.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,19 @@ public static String parseIndexFilter(@Nonnull String entityType, @Nullable Inde
118118
final Condition condition = pathParams.getCondition();
119119
final String indexColumn = getGeneratedColumnName(entityType, aspect, pathParams.getPath(), nonDollarVirtualColumnsEnabled);
120120
final String tableName = SQLSchemaUtils.getTableName(entityType);
121-
// New: Skip filter if column doesn't exist
121+
122+
// NEW / TODO: Check if an expression-based index exists, if it does, use the new logic
123+
final String indexExpression = schemaValidator.getIndexExpression(tableName, indexColumn);
124+
if (indexExpression != null) {
125+
log.debug("Using expression index '{}' in table '{}' with expression '{}'", indexColumn, tableName, indexExpression);
126+
//// Commenting this out for now... to be extra safe, will not currently make this queryable yet
127+
//// and should verify that the above debug log is printed to properly acknoledge an expression.
128+
// sqlFilters.add(parseSqlFilter(indexExpression, condition, pathParams.getValue()));
129+
}
130+
131+
// FOR NOW: keep old logic to allow parallel usage of new indices and validation
122132
if (!schemaValidator.columnExists(tableName, indexColumn)) {
133+
// Else: (old logic) Skip filter if column doesn't exist
123134
log.warn("Skipping filter: virtual column '{}' not found in table '{}'", indexColumn, tableName);
124135
continue;
125136
}

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SchemaValidatorUtil.java

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55
import com.google.common.annotations.VisibleForTesting;
66
import io.ebean.EbeanServer;
77
import io.ebean.SqlRow;
8+
import java.util.HashMap;
89
import java.util.HashSet;
910
import java.util.List;
11+
import java.util.Map;
1012
import java.util.Set;
1113
import java.util.concurrent.TimeUnit;
1214
import javax.annotation.Nonnull;
15+
import javax.annotation.Nullable;
1316
import lombok.extern.slf4j.Slf4j;
1417

1518

@@ -39,10 +42,27 @@ public class SchemaValidatorUtil {
3942
.maximumSize(1000)
4043
.build();
4144

45+
// Cache: tableName → Set of index names -> expression that defines the index, used as a replacement for creating an index on virtual columns
46+
// Configuration:
47+
// - expireAfterWrite(10 minutes): Ensures that newly added indexes (e.g., via Pretzel) are picked up automatically
48+
// without requiring a service restart. After 10 minutes, the next request will trigger a DB refresh.
49+
// - maximumSize(1000): Limits cache memory footprint by retaining entries for up to 1000 distinct tables.
50+
// Least recently used entries are evicted when the size limit is reached.
51+
// ** THIS IS NEEDED ** because of local testing limitations by MariaDB: expression-based indexes are not supported,
52+
// so no existing logic should depend on anything introduced by the support of this. Otherwise, we'd need to mock
53+
// all indexing code in the test DB, which I want to avoid if possible.
54+
// TODO: This can become the only cache needed for indexes once we are 100% migrated over to this logic.
55+
private final Cache<String, Map<String, String>> indexExpressionCache = Caffeine.newBuilder()
56+
.expireAfterWrite(10, TimeUnit.MINUTES)
57+
.maximumSize(1000)
58+
.build();
59+
4260
private static final String SQL_GET_ALL_COLUMNS =
4361
"SELECT COLUMN_NAME FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = database() AND TABLE_NAME = '%s'";
4462
private static final String SQL_GET_ALL_INDEXES =
4563
"SELECT DISTINCT INDEX_NAME FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = database() AND TABLE_NAME = '%s'";
64+
private static final String SQL_GET_ALL_INDEXES_WITH_EXPRESSIONS =
65+
"SELECT DISTINCT INDEX_NAME, EXPRESSION FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = database() AND TABLE_NAME = '%s'";
4666

4767
public SchemaValidatorUtil(EbeanServer server) {
4868
this.server = server;
@@ -97,6 +117,60 @@ public boolean indexExists(@Nonnull String tableName, @Nonnull String indexName)
97117
return indexes.contains(lowerIndex);
98118
}
99119

120+
121+
/**
122+
* Cleans SQL expression by removing MySQL-specific encoding artifacts.
123+
* Removes _utf8mb4 charset prefix, unescapes quotes, and removes newlines.
124+
* MySQL team is the POC for questions about this since there is preprocessing needed to transform the as-is
125+
* index expression from the index table to a (string) expression that is usable directly in an indexed query.
126+
*
127+
* @param expression Raw SQL expression from database
128+
* @return Cleaned expression string, with enclosing parentheses
129+
*/
130+
@VisibleForTesting
131+
protected String cleanIndexExpression(@Nullable String expression) {
132+
if (expression == null) {
133+
return null;
134+
}
135+
136+
return "(" + expression
137+
.replace("_utf8mb4\\'", "'")
138+
.replace("\\'", "'")
139+
.replace("\\\"", "\"")
140+
.replace("\n", "") + ")";
141+
}
142+
143+
144+
/**
145+
* Retrieves the expression associated with the given index.
146+
*
147+
* <p>NULL doesn't necessarily mean that an index doesn't exist, use {@link #indexExists(String, String)} to check for index existence.
148+
*
149+
* @param tableName Table name
150+
* @param indexName Index name
151+
* @return Expression string, or null if index does not exist OR is not created on an expression; will be enclosed in
152+
* parentheses '()'
153+
*/
154+
@Nullable
155+
public String getIndexExpression(@Nonnull String tableName, @Nonnull String indexName) {
156+
String lowerTable = tableName.toLowerCase();
157+
String lowerIndex = indexName.toLowerCase();
158+
159+
try {
160+
Map<String, String> indexes = indexExpressionCache.get(lowerTable, tbl -> {
161+
log.info("Refreshing index cache for table '{}' from expression retrieval call", tbl);
162+
return loadIndexesAndExpressions(tbl);
163+
});
164+
165+
// This will also return null if the Expression column is null itself
166+
return cleanIndexExpression(indexes.getOrDefault(lowerIndex, null));
167+
} catch (Exception e) {
168+
// MariaDB for local testing doesn't support EXPRESSION column - gracefully degrade
169+
log.debug("Unable to load index expressions for table '{}': {}", lowerTable, e.getMessage());
170+
return null; // same logic as "no expression exists", which is good (handled gracefully)
171+
}
172+
}
173+
100174
/**
101175
* Loads all columns for the given table from information_schema.
102176
*
@@ -127,4 +201,21 @@ private Set<String> loadIndexes(String tableName) {
127201
return indexes;
128202
}
129203

204+
/**
205+
* Loads all index names and expressions for the given table from information_schema.
206+
* See the comment for indexExpressionCache for more details.
207+
*
208+
* @param tableName Table to query
209+
* @return Map of lowercase index names -> expressions
210+
*/
211+
private Map<String, String> loadIndexesAndExpressions(String tableName) {
212+
List<SqlRow> rows = server.createSqlQuery(String.format(SQL_GET_ALL_INDEXES_WITH_EXPRESSIONS, tableName)).findList();
213+
Map<String, String> indexes = new HashMap<>();
214+
for (SqlRow row : rows) {
215+
// The Expression value will be null if the index is not created on an expression
216+
indexes.put(row.getString("INDEX_NAME").toLowerCase(), row.getString("EXPRESSION"));
217+
}
218+
return indexes;
219+
}
220+
130221
}

dao-impl/ebean-dao/src/test/java/com/linkedin/metadata/dao/utils/SchemaValidatorUtilTest.java

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44
import com.linkedin.metadata.dao.EBeanDAOConfig;
55
import io.ebean.Ebean;
66
import io.ebean.EbeanServer;
7+
import io.ebean.SqlQuery;
8+
import io.ebean.SqlRow;
79
import java.io.IOException;
810
import java.nio.charset.StandardCharsets;
11+
import java.util.ArrayList;
12+
import java.util.List;
913
import org.testng.annotations.BeforeClass;
1014
import org.testng.annotations.BeforeMethod;
1115
import org.testng.annotations.DataProvider;
@@ -14,6 +18,7 @@
1418

1519
import static com.linkedin.common.AuditStamps.*;
1620
import static com.linkedin.testing.TestUtils.*;
21+
import static org.mockito.Mockito.*;
1722
import static org.testng.AssertJUnit.*;
1823

1924

@@ -38,7 +43,9 @@ public static Object[][] inputList() {
3843

3944
@BeforeClass
4045
public void init() {
41-
server = EmbeddedMariaInstance.getServer(SchemaValidatorUtilTest.class.getSimpleName());
46+
// need to mock this since we will be stubbing in for the EXPRESSION column retrieval for that test since
47+
// MariaDB doesn't support functional indexes
48+
server = spy(EmbeddedMariaInstance.getServer(SchemaValidatorUtilTest.class.getSimpleName()));
4249
}
4350

4451
@BeforeMethod
@@ -77,4 +84,66 @@ public void testCheckIndexExists() {
7784
}
7885
}
7986

87+
@Test
88+
public void testCleanExpression() {
89+
90+
}
91+
92+
/**
93+
* These tests require mocking because MariaDB, our embedded test database, does not support functional indexes, which
94+
* the code under test is trying to access.
95+
*/
96+
@Test
97+
public void testGetIndexExpression() {
98+
// NEED to set up all mocks for DB access BEFORE running ANY tests because it will be cached
99+
SqlQuery sqlQuery = mock(SqlQuery.class);
100+
List<SqlRow> indexTable = new ArrayList<>();
101+
102+
when(sqlQuery.findList()).thenReturn(indexTable);
103+
when(server.createSqlQuery(anyString())).thenReturn(sqlQuery);
104+
105+
// setup mock for the LEGACY index use case: no expression-based index, but the index still exists!
106+
SqlRow row1 = mock(SqlRow.class);
107+
indexTable.add(row1);
108+
when(row1.getString("EXPRESSION")).thenReturn(null);
109+
110+
// setup mock for the EXPRESSION index use case
111+
SqlRow row2 = mock(SqlRow.class);
112+
indexTable.add(row2);
113+
when(row2.getString("EXPRESSION")).thenReturn(
114+
"cast(json_extract(`a_aspectfoo`, '$.aspect.value') as char(1024) charset utf8mb4)");
115+
116+
if (!ebeanConfig.isNonDollarVirtualColumnsEnabled()) {
117+
when(row1.getString("INDEX_NAME")).thenReturn("i_aspectfoo$value");
118+
when(row2.getString("INDEX_NAME")).thenReturn("idx_aspectfoo$value");
119+
} else {
120+
when(row1.getString("INDEX_NAME")).thenReturn("i_aspectfoo0value");
121+
when(row2.getString("INDEX_NAME")).thenReturn("idx_aspectfoo0value");
122+
}
123+
124+
125+
// NONEXISTENT test
126+
assertNull(validator.getIndexExpression("metadata_entity_burger", "idx_fake"));
127+
128+
if (!ebeanConfig.isNonDollarVirtualColumnsEnabled()) {
129+
/// Verify!
130+
assertNotNull(validator.getIndexExpression("metadata_entity_burger", "idx_aspectfoo$value"));
131+
assertEquals("(cast(json_extract(`a_aspectfoo`, '$.aspect.value') as char(1024) charset utf8mb4))",
132+
validator.getIndexExpression("metadata_entity_burger", "idx_aspectfoo$value"));
133+
134+
// Make sure that retrieving a "legacy" column-based index still returns true but returns null
135+
assertTrue(validator.indexExists("metadata_entity_foo", "i_aspectfoo$value"));
136+
assertNull(validator.getIndexExpression("metadata_entity_foo", "i_aspectfoo$value"));
137+
} else {
138+
/// Verify!
139+
assertNotNull(validator.getIndexExpression("metadata_entity_burger", "idx_aspectfoo0value"));
140+
assertEquals("(cast(json_extract(`a_aspectfoo`, '$.aspect.value') as char(1024) charset utf8mb4))",
141+
validator.getIndexExpression("metadata_entity_burger", "idx_aspectfoo0value"));
142+
143+
// Make sure that retrieving a "legacy" column-based index still returns true but returns null
144+
assertTrue(validator.indexExists("metadata_entity_foo", "i_aspectfoo0value"));
145+
assertNull(validator.getIndexExpression("metadata_entity_foo", "i_aspectfoo0value"));
146+
}
147+
}
148+
80149
}

0 commit comments

Comments
 (0)