-
Notifications
You must be signed in to change notification settings - Fork 271
Create a Field Replacement Visitor for replacing fields in ranges #3211
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: integration
Are you sure you want to change the base?
Changes from all commits
48c61a1
e9283b4
de2dbd4
bd84764
51e5b64
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -178,6 +178,8 @@ | |
| import datawave.query.planner.comparator.GeoWaveQueryPlanComparator; | ||
| import datawave.query.planner.pushdown.PushDownVisitor; | ||
| import datawave.query.planner.pushdown.rules.PushDownRule; | ||
| import datawave.query.planner.replacement.FieldReplacementVisitor; | ||
| import datawave.query.planner.replacement.rules.FieldReplacementRule; | ||
| import datawave.query.planner.rules.FieldTransformRule; | ||
| import datawave.query.planner.rules.FieldTransformRuleVisitor; | ||
| import datawave.query.planner.rules.NodeTransformRule; | ||
|
|
@@ -227,6 +229,13 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable { | |
| */ | ||
| protected boolean disableTestNonExistentFields = false; | ||
|
|
||
| /** | ||
| * Disables Field Replacement rules | ||
| * | ||
| * @see FieldReplacementVisitor | ||
| */ | ||
| protected boolean disableFieldReplacementRules = false; | ||
|
|
||
| /** | ||
| * Disables Whindex (value-specific) field mappings for GeoWave functions. | ||
| * | ||
|
|
@@ -274,6 +283,8 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable { | |
| // force certain regex patterns to be pushed down to evaluation | ||
| private List<NodeTransformRule> transformRules = Lists.newArrayList(); | ||
|
|
||
| private List<FieldReplacementRule> replacementRules = Lists.newArrayList(); | ||
|
|
||
| protected Class<? extends SortedKeyValueIterator<Key,Value>> queryIteratorClazz = QueryIterator.class; | ||
|
|
||
| protected String plannedScript = null; | ||
|
|
@@ -402,6 +413,7 @@ protected DefaultQueryPlanner(DefaultQueryPlanner other) { | |
| setPushdownThreshold(other.getPushdownThreshold()); | ||
| setVisitorManager(other.getVisitorManager()); | ||
| setTransformRules(other.getTransformRules() == null ? null : new ArrayList<>(other.transformRules)); | ||
| setReplacementRules(other.getReplacementRules() == null ? null : new ArrayList<>(other.replacementRules)); | ||
| } | ||
|
|
||
| public void setMetadataHelper(final MetadataHelper metadataHelper) { | ||
|
|
@@ -625,12 +637,12 @@ protected CloseableIterable<QueryData> process(ScannerFactory scannerFactory, Me | |
| /** | ||
| * This method can be used to recreate a range stream based on plan in the configuration. The plan will be adjusted if needed for executability. | ||
| * | ||
| * @see DatePartitionedQueryPlanner | ||
| * @param config | ||
| * @param settings | ||
| * @param scannerFactory | ||
| * @return a range stream | ||
| * @throws DatawaveQueryException | ||
| * @see DatePartitionedQueryPlanner | ||
| */ | ||
| public CloseableIterable<QueryData> reprocess(ShardQueryConfiguration config, Query settings, ScannerFactory scannerFactory) throws DatawaveQueryException { | ||
|
|
||
|
|
@@ -1174,6 +1186,11 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard | |
| // need to fetch field to datatype map first | ||
| timedFetchDatatypes(timers, "Fetch Required Datatypes", config.getQueryTree(), config); | ||
|
|
||
| if (!disableFieldReplacementRules) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. perhaps a comment here for our future selves about how this must be called before expand multi normalized terms, so no one accidentally refactors this in the wrong order
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be extremely helpful |
||
| // apply the configured field replacements | ||
| config.setQueryTree(timedApplyFieldReplacementRules(timers, config.getQueryTree(), replacementRules)); | ||
| } | ||
|
|
||
| if (!disableWhindexFieldMappings) { | ||
| // apply the value-specific field mappings for GeoWave functions | ||
| config.setQueryTree(timedApplyWhindexFieldMappings(timers, config.getQueryTree(), config, metadataHelper, settings)); | ||
|
|
@@ -1753,6 +1770,11 @@ protected ASTJexlScript expandRegexFunctionNodes(final ASTJexlScript script, Sha | |
| return visitorManager.validateAndVisit(() -> (RegexFunctionVisitor.expandRegex(config, metadataHelper, indexOnlyFields, script))); | ||
| } | ||
|
|
||
| protected ASTJexlScript timedApplyFieldReplacementRules(QueryStopwatch timers, final ASTJexlScript script, List<FieldReplacementRule> fieldReplacementRules) | ||
| throws DatawaveQueryException { | ||
| return visitorManager.timedVisit(timers, "Apply Replacement Field Mappings", () -> (FieldReplacementVisitor.apply(script, fieldReplacementRules))); | ||
| } | ||
|
|
||
| protected ASTJexlScript timedApplyWhindexFieldMappings(QueryStopwatch timers, final ASTJexlScript script, ShardQueryConfiguration config, | ||
| MetadataHelper metadataHelper, Query settings) throws DatawaveQueryException { | ||
| try { | ||
|
|
@@ -3347,6 +3369,14 @@ public void setTransformRules(List<NodeTransformRule> transformRules) { | |
| this.transformRules.addAll(transformRules); | ||
| } | ||
|
|
||
| public List<FieldReplacementRule> getReplacementRules() { | ||
| return Collections.unmodifiableList(replacementRules); | ||
| } | ||
|
|
||
| public void setReplacementRules(List<FieldReplacementRule> replacementRules) { | ||
| this.replacementRules.addAll(replacementRules); | ||
| } | ||
|
|
||
| /* | ||
| * (non-Javadoc) | ||
| * | ||
|
|
@@ -3426,6 +3456,14 @@ public boolean getDisableTestNonExistentFields() { | |
| return disableTestNonExistentFields; | ||
| } | ||
|
|
||
| public void setDisableFieldReplacementRules(boolean disableFieldReplacementRules) { | ||
| this.disableFieldReplacementRules = disableFieldReplacementRules; | ||
| } | ||
|
|
||
| public boolean getDisableFieldReplacementRules() { | ||
| return disableFieldReplacementRules; | ||
| } | ||
|
|
||
| public void setDisableWhindexFieldMappings(boolean disableWhindexFieldMappings) { | ||
| this.disableWhindexFieldMappings = disableWhindexFieldMappings; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| package datawave.query.planner.replacement; | ||
|
|
||
| import java.util.List; | ||
|
|
||
| import org.apache.commons.jexl3.parser.ASTAndNode; | ||
| import org.apache.commons.jexl3.parser.ASTIdentifier; | ||
| import org.apache.commons.jexl3.parser.ASTJexlScript; | ||
| import org.apache.commons.jexl3.parser.JexlNode; | ||
| import org.apache.log4j.Logger; | ||
|
|
||
| import datawave.core.common.logging.ThreadConfigurableLogger; | ||
| import datawave.query.jexl.visitors.RebuildingVisitor; | ||
| import datawave.query.planner.replacement.rules.FieldReplacementRule; | ||
|
|
||
| public class FieldReplacementVisitor extends RebuildingVisitor { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add class level docs |
||
| private static final Logger log = ThreadConfigurableLogger.getLogger(FieldReplacementVisitor.class); | ||
| private final List<FieldReplacementRule> rules; | ||
|
|
||
| public FieldReplacementVisitor(List<FieldReplacementRule> rules) { | ||
| this.rules = rules; | ||
| } | ||
|
|
||
| public static ASTJexlScript apply(ASTJexlScript script, List<FieldReplacementRule> rules) { | ||
| FieldReplacementVisitor visitor = new FieldReplacementVisitor(rules); | ||
|
|
||
| return visitor.apply(script); | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTAndNode node, Object data) { | ||
| return applyRules(super.visit(node, data)); | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTIdentifier node, Object data) { | ||
| return applyRules(super.visit(node, data)); | ||
| } | ||
|
Comment on lines
+35
to
+37
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We typically avoid overriding the visit method to an identifier node. Instead you may want to override the specific nodes you care about, EQ, NE, ER, RN, LT, LE, GT, GE, etc. |
||
|
|
||
| public JexlNode applyRules(Object node) { | ||
| JexlNode jexlNode = (JexlNode) node; | ||
| for (FieldReplacementRule rule : rules) { | ||
| if (rule.matches(jexlNode)) { | ||
| jexlNode = rule.apply(jexlNode); | ||
| } | ||
| } | ||
| return jexlNode; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| package datawave.query.planner.replacement.rules; | ||
|
|
||
| import org.apache.commons.jexl3.parser.ASTIdentifier; | ||
| import org.apache.commons.jexl3.parser.JexlNode; | ||
|
|
||
| import datawave.query.jexl.JexlASTHelper; | ||
|
|
||
| public class DirectFieldReplacementRule implements FieldReplacementRule { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add class level documentation for what the expected behavior is, what types of nodes are affected, etc |
||
| private String field = null; | ||
| private String replacement = null; | ||
|
Comment on lines
+9
to
+10
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These can be final |
||
|
|
||
| public DirectFieldReplacementRule() {} | ||
|
|
||
| public DirectFieldReplacementRule(String field, String replacement) { | ||
| this.field = field; | ||
| this.replacement = replacement; | ||
| } | ||
|
|
||
| public boolean matches(JexlNode node) { | ||
| return node instanceof ASTIdentifier && ((ASTIdentifier) node).getName().equals(field); | ||
| } | ||
|
|
||
| public JexlNode apply(JexlNode node) { | ||
| JexlASTHelper.setField(node, replacement); | ||
| return node; | ||
| } | ||
|
|
||
| public String getField() { | ||
| return field; | ||
| } | ||
|
|
||
| public void setField(String field) { | ||
| this.field = field; | ||
| } | ||
|
|
||
| public String getReplacement() { | ||
| return replacement; | ||
| } | ||
|
|
||
| public void setReplacement(String replacement) { | ||
| this.replacement = replacement; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| package datawave.query.planner.replacement.rules; | ||
|
|
||
| import org.apache.commons.jexl3.parser.JexlNode; | ||
|
|
||
| public interface FieldReplacementRule { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add brief class level doc |
||
| boolean matches(JexlNode node); | ||
|
|
||
| JexlNode apply(JexlNode node); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| package datawave.query.planner.replacement.rules; | ||
|
|
||
| import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.EVALUATION_ONLY; | ||
|
|
||
| import java.util.HashMap; | ||
| import java.util.Map; | ||
|
|
||
| import org.apache.commons.jexl3.parser.ASTAndNode; | ||
| import org.apache.commons.jexl3.parser.ASTIdentifier; | ||
| import org.apache.commons.jexl3.parser.ASTReferenceExpression; | ||
| import org.apache.commons.jexl3.parser.JexlNode; | ||
| import org.apache.commons.jexl3.parser.JexlNodes; | ||
| import org.apache.commons.jexl3.parser.ParserTreeConstants; | ||
|
|
||
| import datawave.query.jexl.JexlASTHelper; | ||
| import datawave.query.jexl.JexlNodeFactory; | ||
| import datawave.query.jexl.LiteralRange; | ||
| import datawave.query.jexl.nodes.QueryPropertyMarker; | ||
| import datawave.query.jexl.visitors.RebuildingVisitor; | ||
|
|
||
| public class RangeFieldReplacementRule implements FieldReplacementRule { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Visitor name should be updated to include "BoundedRange", range on it's own could be any range operator (LT, LE, GT, GE). The direct field replacement rule actually replaces the field. This visitor doesn't actually replace the bounded range, consider an alternate name. |
||
| private Map<String,String> fieldMap = new HashMap<>(); | ||
|
|
||
| public RangeFieldReplacementRule() {} | ||
|
|
||
| public RangeFieldReplacementRule(Map<String,String> fieldMap) { | ||
| this.fieldMap = fieldMap; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean matches(JexlNode node) { | ||
| LiteralRange<?> range = JexlASTHelper.findRange().getRange(node); | ||
| return range != null && fieldMap.containsKey(range.getFieldName()); | ||
| } | ||
|
|
||
| @Override | ||
| public JexlNode apply(JexlNode node) { | ||
| LiteralRange<?> range = JexlASTHelper.findRange().getRange(node); | ||
|
|
||
| if (range == null || !fieldMap.containsKey(range.getFieldName())) { | ||
| return node; | ||
| } | ||
|
|
||
| // Create a copy and mark it as "evaluationOnly" | ||
| JexlNode copy = RebuildingVisitor.copy(node); | ||
| JexlNode evalNode = QueryPropertyMarker.create(copy, EVALUATION_ONLY); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why mark the original as evaluation only? Are the replacement fields always indexed? If you had a case where the original field is indexed and the replacement field is not indexed then the executability of the query was just destroyed, in cases where the bounded range is an anchor term. |
||
|
|
||
| // rename the field for the range nodes | ||
| replaceField(range.getLowerNode(), fieldMap.get(range.getFieldName())); | ||
| replaceField(range.getUpperNode(), fieldMap.get(range.getFieldName())); | ||
|
|
||
| // Create a Reference Expression for the original node and top level AND for both the ref and the eval node | ||
| ASTReferenceExpression ref = JexlNodes.makeRefExp(); | ||
| ASTAndNode topLevel = new ASTAndNode(ParserTreeConstants.JJTANDNODE); | ||
| node.jjtSetParent(ref); | ||
| ref.jjtSetParent(topLevel); | ||
| evalNode.jjtSetParent(topLevel); | ||
| ref.jjtAddChild(node, 0); | ||
| topLevel.jjtAddChild(evalNode, 0); | ||
| topLevel.jjtAddChild(ref, 1); | ||
|
|
||
| return topLevel; | ||
| } | ||
|
|
||
| private void replaceField(JexlNode node, String newName) { | ||
| for (int i = 0; i < node.jjtGetNumChildren(); i++) { | ||
| JexlNode child = node.jjtGetChild(i); | ||
| if (child instanceof ASTIdentifier) { | ||
| ASTIdentifier newId = JexlNodeFactory.buildIdentifier(newName); | ||
| node.jjtAddChild(newId, i); | ||
| newId.jjtSetParent(node); | ||
| } else { | ||
| replaceField(child, newName); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public void setFieldMap(Map<String,String> fieldMap) { | ||
| this.fieldMap = fieldMap; | ||
| } | ||
|
|
||
| public Map<String,String> getFieldMap() { | ||
| return fieldMap; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
double negatives have gotten us into trouble in the past. This should be one of the following