diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index 04fdbeca250..c35cf7c2c68 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -178,6 +178,8 @@ import datawave.query.planner.comparator.GeoWaveQueryPlanComparator; import datawave.query.planner.pushdown.PushDownVisitor; import datawave.query.planner.pushdown.rules.PushDownRule; +import datawave.query.planner.replacement.FieldReplacementVisitor; +import datawave.query.planner.replacement.rules.FieldReplacementRule; import datawave.query.planner.rules.FieldTransformRule; import datawave.query.planner.rules.FieldTransformRuleVisitor; import datawave.query.planner.rules.NodeTransformRule; @@ -227,6 +229,13 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable { */ protected boolean disableTestNonExistentFields = false; + /** + * Disables Field Replacement rules + * + * @see FieldReplacementVisitor + */ + protected boolean disableFieldReplacementRules = false; + /** * Disables Whindex (value-specific) field mappings for GeoWave functions. * @@ -274,6 +283,8 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable { // force certain regex patterns to be pushed down to evaluation private List transformRules = Lists.newArrayList(); + private List replacementRules = Lists.newArrayList(); + protected Class> queryIteratorClazz = QueryIterator.class; protected String plannedScript = null; @@ -402,6 +413,7 @@ protected DefaultQueryPlanner(DefaultQueryPlanner other) { setPushdownThreshold(other.getPushdownThreshold()); setVisitorManager(other.getVisitorManager()); setTransformRules(other.getTransformRules() == null ? null : new ArrayList<>(other.transformRules)); + setReplacementRules(other.getReplacementRules() == null ? null : new ArrayList<>(other.replacementRules)); } public void setMetadataHelper(final MetadataHelper metadataHelper) { @@ -625,12 +637,12 @@ protected CloseableIterable process(ScannerFactory scannerFactory, Me /** * This method can be used to recreate a range stream based on plan in the configuration. The plan will be adjusted if needed for executability. * - * @see DatePartitionedQueryPlanner * @param config * @param settings * @param scannerFactory * @return a range stream * @throws DatawaveQueryException + * @see DatePartitionedQueryPlanner */ public CloseableIterable reprocess(ShardQueryConfiguration config, Query settings, ScannerFactory scannerFactory) throws DatawaveQueryException { @@ -1174,6 +1186,11 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard // need to fetch field to datatype map first timedFetchDatatypes(timers, "Fetch Required Datatypes", config.getQueryTree(), config); + if (!disableFieldReplacementRules) { + // apply the configured field replacements + config.setQueryTree(timedApplyFieldReplacementRules(timers, config.getQueryTree(), replacementRules)); + } + if (!disableWhindexFieldMappings) { // apply the value-specific field mappings for GeoWave functions config.setQueryTree(timedApplyWhindexFieldMappings(timers, config.getQueryTree(), config, metadataHelper, settings)); @@ -1753,6 +1770,11 @@ protected ASTJexlScript expandRegexFunctionNodes(final ASTJexlScript script, Sha return visitorManager.validateAndVisit(() -> (RegexFunctionVisitor.expandRegex(config, metadataHelper, indexOnlyFields, script))); } + protected ASTJexlScript timedApplyFieldReplacementRules(QueryStopwatch timers, final ASTJexlScript script, List fieldReplacementRules) + throws DatawaveQueryException { + return visitorManager.timedVisit(timers, "Apply Replacement Field Mappings", () -> (FieldReplacementVisitor.apply(script, fieldReplacementRules))); + } + protected ASTJexlScript timedApplyWhindexFieldMappings(QueryStopwatch timers, final ASTJexlScript script, ShardQueryConfiguration config, MetadataHelper metadataHelper, Query settings) throws DatawaveQueryException { try { @@ -3347,6 +3369,14 @@ public void setTransformRules(List transformRules) { this.transformRules.addAll(transformRules); } + public List getReplacementRules() { + return Collections.unmodifiableList(replacementRules); + } + + public void setReplacementRules(List replacementRules) { + this.replacementRules.addAll(replacementRules); + } + /* * (non-Javadoc) * @@ -3426,6 +3456,14 @@ public boolean getDisableTestNonExistentFields() { return disableTestNonExistentFields; } + public void setDisableFieldReplacementRules(boolean disableFieldReplacementRules) { + this.disableFieldReplacementRules = disableFieldReplacementRules; + } + + public boolean getDisableFieldReplacementRules() { + return disableFieldReplacementRules; + } + public void setDisableWhindexFieldMappings(boolean disableWhindexFieldMappings) { this.disableWhindexFieldMappings = disableWhindexFieldMappings; } diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/replacement/FieldReplacementVisitor.java b/warehouse/query-core/src/main/java/datawave/query/planner/replacement/FieldReplacementVisitor.java new file mode 100644 index 00000000000..d73b38ab638 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/planner/replacement/FieldReplacementVisitor.java @@ -0,0 +1,48 @@ +package datawave.query.planner.replacement; + +import java.util.List; + +import org.apache.commons.jexl3.parser.ASTAndNode; +import org.apache.commons.jexl3.parser.ASTIdentifier; +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.JexlNode; +import org.apache.log4j.Logger; + +import datawave.core.common.logging.ThreadConfigurableLogger; +import datawave.query.jexl.visitors.RebuildingVisitor; +import datawave.query.planner.replacement.rules.FieldReplacementRule; + +public class FieldReplacementVisitor extends RebuildingVisitor { + private static final Logger log = ThreadConfigurableLogger.getLogger(FieldReplacementVisitor.class); + private final List rules; + + public FieldReplacementVisitor(List rules) { + this.rules = rules; + } + + public static ASTJexlScript apply(ASTJexlScript script, List rules) { + FieldReplacementVisitor visitor = new FieldReplacementVisitor(rules); + + return visitor.apply(script); + } + + @Override + public Object visit(ASTAndNode node, Object data) { + return applyRules(super.visit(node, data)); + } + + @Override + public Object visit(ASTIdentifier node, Object data) { + return applyRules(super.visit(node, data)); + } + + public JexlNode applyRules(Object node) { + JexlNode jexlNode = (JexlNode) node; + for (FieldReplacementRule rule : rules) { + if (rule.matches(jexlNode)) { + jexlNode = rule.apply(jexlNode); + } + } + return jexlNode; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/replacement/rules/DirectFieldReplacementRule.java b/warehouse/query-core/src/main/java/datawave/query/planner/replacement/rules/DirectFieldReplacementRule.java new file mode 100644 index 00000000000..5af363e6a44 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/planner/replacement/rules/DirectFieldReplacementRule.java @@ -0,0 +1,43 @@ +package datawave.query.planner.replacement.rules; + +import org.apache.commons.jexl3.parser.ASTIdentifier; +import org.apache.commons.jexl3.parser.JexlNode; + +import datawave.query.jexl.JexlASTHelper; + +public class DirectFieldReplacementRule implements FieldReplacementRule { + private String field = null; + private String replacement = null; + + public DirectFieldReplacementRule() {} + + public DirectFieldReplacementRule(String field, String replacement) { + this.field = field; + this.replacement = replacement; + } + + public boolean matches(JexlNode node) { + return node instanceof ASTIdentifier && ((ASTIdentifier) node).getName().equals(field); + } + + public JexlNode apply(JexlNode node) { + JexlASTHelper.setField(node, replacement); + return node; + } + + public String getField() { + return field; + } + + public void setField(String field) { + this.field = field; + } + + public String getReplacement() { + return replacement; + } + + public void setReplacement(String replacement) { + this.replacement = replacement; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/replacement/rules/FieldReplacementRule.java b/warehouse/query-core/src/main/java/datawave/query/planner/replacement/rules/FieldReplacementRule.java new file mode 100644 index 00000000000..b29daa63d2a --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/planner/replacement/rules/FieldReplacementRule.java @@ -0,0 +1,9 @@ +package datawave.query.planner.replacement.rules; + +import org.apache.commons.jexl3.parser.JexlNode; + +public interface FieldReplacementRule { + boolean matches(JexlNode node); + + JexlNode apply(JexlNode node); +} diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/replacement/rules/RangeFieldReplacementRule.java b/warehouse/query-core/src/main/java/datawave/query/planner/replacement/rules/RangeFieldReplacementRule.java new file mode 100644 index 00000000000..843b45a80c9 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/planner/replacement/rules/RangeFieldReplacementRule.java @@ -0,0 +1,85 @@ +package datawave.query.planner.replacement.rules; + +import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.EVALUATION_ONLY; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.jexl3.parser.ASTAndNode; +import org.apache.commons.jexl3.parser.ASTIdentifier; +import org.apache.commons.jexl3.parser.ASTReferenceExpression; +import org.apache.commons.jexl3.parser.JexlNode; +import org.apache.commons.jexl3.parser.JexlNodes; +import org.apache.commons.jexl3.parser.ParserTreeConstants; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.JexlNodeFactory; +import datawave.query.jexl.LiteralRange; +import datawave.query.jexl.nodes.QueryPropertyMarker; +import datawave.query.jexl.visitors.RebuildingVisitor; + +public class RangeFieldReplacementRule implements FieldReplacementRule { + private Map fieldMap = new HashMap<>(); + + public RangeFieldReplacementRule() {} + + public RangeFieldReplacementRule(Map fieldMap) { + this.fieldMap = fieldMap; + } + + @Override + public boolean matches(JexlNode node) { + LiteralRange range = JexlASTHelper.findRange().getRange(node); + return range != null && fieldMap.containsKey(range.getFieldName()); + } + + @Override + public JexlNode apply(JexlNode node) { + LiteralRange range = JexlASTHelper.findRange().getRange(node); + + if (range == null || !fieldMap.containsKey(range.getFieldName())) { + return node; + } + + // Create a copy and mark it as "evaluationOnly" + JexlNode copy = RebuildingVisitor.copy(node); + JexlNode evalNode = QueryPropertyMarker.create(copy, EVALUATION_ONLY); + + // rename the field for the range nodes + replaceField(range.getLowerNode(), fieldMap.get(range.getFieldName())); + replaceField(range.getUpperNode(), fieldMap.get(range.getFieldName())); + + // Create a Reference Expression for the original node and top level AND for both the ref and the eval node + ASTReferenceExpression ref = JexlNodes.makeRefExp(); + ASTAndNode topLevel = new ASTAndNode(ParserTreeConstants.JJTANDNODE); + node.jjtSetParent(ref); + ref.jjtSetParent(topLevel); + evalNode.jjtSetParent(topLevel); + ref.jjtAddChild(node, 0); + topLevel.jjtAddChild(evalNode, 0); + topLevel.jjtAddChild(ref, 1); + + return topLevel; + } + + private void replaceField(JexlNode node, String newName) { + for (int i = 0; i < node.jjtGetNumChildren(); i++) { + JexlNode child = node.jjtGetChild(i); + if (child instanceof ASTIdentifier) { + ASTIdentifier newId = JexlNodeFactory.buildIdentifier(newName); + node.jjtAddChild(newId, i); + newId.jjtSetParent(node); + } else { + replaceField(child, newName); + } + } + } + + public void setFieldMap(Map fieldMap) { + this.fieldMap = fieldMap; + } + + public Map getFieldMap() { + return fieldMap; + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/planner/replacement/FieldReplacementVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/planner/replacement/FieldReplacementVisitorTest.java new file mode 100644 index 00000000000..f63f742af82 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/planner/replacement/FieldReplacementVisitorTest.java @@ -0,0 +1,186 @@ +package datawave.query.planner.replacement; + +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.Map; + +import org.apache.commons.jexl3.parser.ASTAndNode; +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.JexlNode; +import org.apache.commons.jexl3.parser.ParseException; +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Test; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.visitors.PrintingVisitor; +import datawave.query.jexl.visitors.RebuildingVisitor; +import datawave.query.jexl.visitors.TreeEqualityVisitor; +import datawave.query.planner.replacement.rules.DirectFieldReplacementRule; +import datawave.query.planner.replacement.rules.FieldReplacementRule; +import datawave.query.planner.replacement.rules.RangeFieldReplacementRule; + +public class FieldReplacementVisitorTest { + private static final Logger log = Logger.getLogger(FieldReplacementVisitorTest.class); + private static final DirectFieldReplacementRule dfrRule = new DirectFieldReplacementRule("ABC", "XYZ"); + private static final Map rangeMap = Map.of("AA", "BB", "CC", "DD"); + private static final RangeFieldReplacementRule rfrRule = new RangeFieldReplacementRule(rangeMap); + + private void testReplacement(String original, String expected, List rules, boolean checkRange) throws Exception { + // create a query tree + ASTJexlScript originalScript = JexlASTHelper.parseJexlQuery(original); + + // apply the visitor + ASTJexlScript resultScript = FieldReplacementVisitor.apply(originalScript, rules); + + // Verify the script is as expected, and has a valid lineage. + assertScriptEquality(resultScript, expected); + assertLineage(resultScript); + + // Verify the original script was not modified, and still has a valid lineage. + assertScriptEquality(originalScript, original); + assertLineage(originalScript); + + if (checkRange) { + RangeVerificationVisitor resultVisitor = new RangeVerificationVisitor(); + resultScript.jjtAccept(resultVisitor, null); + + ASTJexlScript expectedScript = JexlASTHelper.parseJexlQuery(expected); + RangeVerificationVisitor expectedVistor = new RangeVerificationVisitor(); + expectedScript.jjtAccept(expectedVistor, null); + + Assert.assertEquals(expectedVistor.getRangesFound(), resultVisitor.getRangesFound()); + } + + } + + private void assertScriptEquality(ASTJexlScript actualScript, String expected) throws ParseException { + ASTJexlScript expectedScript = JexlASTHelper.parseJexlQuery(expected); + TreeEqualityVisitor.Comparison comparison = TreeEqualityVisitor.checkEquality(expectedScript, actualScript); + if (!comparison.isEqual()) { + log.error("Expected " + PrintingVisitor.formattedQueryString(expectedScript)); + log.error("Actual " + PrintingVisitor.formattedQueryString(actualScript)); + } + assertTrue(comparison.getReason(), comparison.isEqual()); + } + + private void assertLineage(JexlNode node) { + assertTrue(JexlASTHelper.validateLineage(node, true)); + } + + @Test + public void rangeFieldReplacementTest() throws Exception { + // @formatter:off + String query = "(_Bounded_ = true) && (AA >= '2' && AA <= '4')"; + String expected = "((_Eval_ = true) && ((_Bounded_ = true) && (AA >= '2' && AA <= '4'))) && " + + "((_Bounded_ = true) && (BB >= '2' && BB <= '4'))" ; + // @formatter:on + testReplacement(query, expected, List.of(rfrRule), true); + } + + @Test + public void rangeFieldReplacementWithDecimalsTest() throws Exception { + // @formatter:off + String query = "(_Bounded_ = true) && (AA >= '2.12' && AA <= '2.24')"; + String expected = "((_Eval_ = true) && ((_Bounded_ = true) && (AA >= '2.12' && AA <= '2.24'))) &&" + + "((_Bounded_ = true) && (BB >= '2.12' && BB <= '2.24'))" ; + // @formatter:on + testReplacement(query, expected, List.of(rfrRule), true); + } + + @Test + public void rangeFieldReplacementInLargerQueryTest() throws Exception { + // @formatter:off + String query = "(AA == '6') || ((_Bounded_ = true) && (AA >= '2' && AA <= '4'))"; + String expected = "(AA == '6') || " + + "(((_Eval_ = true) && ((_Bounded_ = true) && (AA >= '2' && AA <= '4'))) && " + + "((_Bounded_ = true) && (BB >= '2' && BB <= '4')))" ; + // @formatter:on + testReplacement(query, expected, List.of(rfrRule), true); + } + + @Test + public void rangeFieldReplacementWithMultipleRangesTest() throws Exception { + // @formatter:off + String query = "((_Bounded_ = true) && (CC >= '2' && CC <= '4')) || ((_Bounded_ = true) && (AA >= '2' && AA <= '4'))"; + String expected = "(((_Eval_ = true) && ((_Bounded_ = true) && (CC >= '2' && CC <= '4'))) && " + + "((_Bounded_ = true) && (DD >= '2' && DD <= '4'))) || " + + "(((_Eval_ = true) && ((_Bounded_ = true) && (AA >= '2' && AA <= '4'))) && " + + "((_Bounded_ = true) && (BB >= '2' && BB <= '4')))" ; + // @formatter:on + testReplacement(query, expected, List.of(rfrRule), true); + } + + @Test + public void directFieldReplacementTest() throws Exception { + // @formatter:off + String query = "ABC == 'x'"; + String expected = "XYZ == 'x'" ; + // @formatter:on + testReplacement(query, expected, List.of(dfrRule), false); + } + + @Test + public void multiRuleReplacementTest() throws Exception { + // @formatter:off + String query = "(ABC = 6) || ((_Bounded_ = true) && (AA >= '2' && AA <= '4'))"; + String expected = "(XYZ = 6) || " + + "(((_Eval_ = true) && ((_Bounded_ = true) && (AA >= '2' && AA <= '4'))) && " + + "((_Bounded_ = true) && (BB >= '2' && BB <= '4')))" ; + // @formatter:on + testReplacement(query, expected, List.of(rfrRule, dfrRule), true); + } + + @Test + public void onlyExactStringsAreReplacedTest() throws Exception { + // @formatter:off + String query = "ABCD == 'x'"; + String expected = "ABCD == 'x'" ; + // @formatter:on + testReplacement(query, expected, List.of(dfrRule), false); + + // @formatter:off + query = "TABC == 'x'"; + expected = "TABC == 'x'" ; + // @formatter:on + testReplacement(query, expected, List.of(dfrRule), false); + + // @formatter:off + query = "(_Bounded_ = true) && (AA1 >= '2' && AA1 <= '4')"; + expected = "(_Bounded_ = true) && (AA1 >= '2' && AA1 <= '4')" ; + // @formatter:on + testReplacement(query, expected, List.of(rfrRule), false); + + // @formatter:off + query = "(_Bounded_ = true) && (RAA >= '2' && RAA <= '4')"; + expected = "(_Bounded_ = true) && (RAA >= '2' && RAA <= '4')" ; + // @formatter:on + testReplacement(query, expected, List.of(rfrRule), false); + } + + @Test + public void onlyBoundedRangesAreReplacedTest() throws Exception { + // @formatter:off + String query = "(AA >= '2' && AA <= '4')"; + String expected = "(AA >= '2' && AA <= '4')" ; + // @formatter:on + testReplacement(query, expected, List.of(rfrRule), false); + } + + public class RangeVerificationVisitor extends RebuildingVisitor { + private int rangesFound = 0; + + @Override + public Object visit(ASTAndNode node, Object data) { + if (JexlASTHelper.findRange().isRange(node)) { + rangesFound++; + } + return super.visit(node, data); + } + + public int getRangesFound() { + return rangesFound; + } + } +}