Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@
import datawave.query.planner.comparator.GeoWaveQueryPlanComparator;
import datawave.query.planner.pushdown.PushDownVisitor;
import datawave.query.planner.pushdown.rules.PushDownRule;
import datawave.query.planner.replacement.FieldReplacementVisitor;
import datawave.query.planner.replacement.rules.FieldReplacementRule;
import datawave.query.planner.rules.FieldTransformRule;
import datawave.query.planner.rules.FieldTransformRuleVisitor;
import datawave.query.planner.rules.NodeTransformRule;
Expand Down Expand Up @@ -227,6 +229,13 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable {
*/
protected boolean disableTestNonExistentFields = false;

/**
* Disables Field Replacement rules
*
* @see FieldReplacementVisitor
*/
protected boolean disableFieldReplacementRules = false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

double negatives have gotten us into trouble in the past. This should be one of the following

fieldReplacementRulesEnabled = false

enableFieldReplacementRules = false


/**
* Disables Whindex (value-specific) field mappings for GeoWave functions.
*
Expand Down Expand Up @@ -274,6 +283,8 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable {
// force certain regex patterns to be pushed down to evaluation
private List<NodeTransformRule> transformRules = Lists.newArrayList();

private List<FieldReplacementRule> replacementRules = Lists.newArrayList();

protected Class<? extends SortedKeyValueIterator<Key,Value>> queryIteratorClazz = QueryIterator.class;

protected String plannedScript = null;
Expand Down Expand Up @@ -402,6 +413,7 @@ protected DefaultQueryPlanner(DefaultQueryPlanner other) {
setPushdownThreshold(other.getPushdownThreshold());
setVisitorManager(other.getVisitorManager());
setTransformRules(other.getTransformRules() == null ? null : new ArrayList<>(other.transformRules));
setReplacementRules(other.getReplacementRules() == null ? null : new ArrayList<>(other.replacementRules));
}

public void setMetadataHelper(final MetadataHelper metadataHelper) {
Expand Down Expand Up @@ -625,12 +637,12 @@ protected CloseableIterable<QueryData> process(ScannerFactory scannerFactory, Me
/**
* This method can be used to recreate a range stream based on plan in the configuration. The plan will be adjusted if needed for executability.
*
* @see DatePartitionedQueryPlanner
* @param config
* @param settings
* @param scannerFactory
* @return a range stream
* @throws DatawaveQueryException
* @see DatePartitionedQueryPlanner
*/
public CloseableIterable<QueryData> reprocess(ShardQueryConfiguration config, Query settings, ScannerFactory scannerFactory) throws DatawaveQueryException {

Expand Down Expand Up @@ -1174,6 +1186,11 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard
// need to fetch field to datatype map first
timedFetchDatatypes(timers, "Fetch Required Datatypes", config.getQueryTree(), config);

if (!disableFieldReplacementRules) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps a comment here for our future selves about how this must be called before expand multi normalized terms, so no one accidentally refactors this in the wrong order

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be extremely helpful

// apply the configured field replacements
config.setQueryTree(timedApplyFieldReplacementRules(timers, config.getQueryTree(), replacementRules));
}

if (!disableWhindexFieldMappings) {
// apply the value-specific field mappings for GeoWave functions
config.setQueryTree(timedApplyWhindexFieldMappings(timers, config.getQueryTree(), config, metadataHelper, settings));
Expand Down Expand Up @@ -1753,6 +1770,11 @@ protected ASTJexlScript expandRegexFunctionNodes(final ASTJexlScript script, Sha
return visitorManager.validateAndVisit(() -> (RegexFunctionVisitor.expandRegex(config, metadataHelper, indexOnlyFields, script)));
}

protected ASTJexlScript timedApplyFieldReplacementRules(QueryStopwatch timers, final ASTJexlScript script, List<FieldReplacementRule> fieldReplacementRules)
throws DatawaveQueryException {
return visitorManager.timedVisit(timers, "Apply Replacement Field Mappings", () -> (FieldReplacementVisitor.apply(script, fieldReplacementRules)));
}

protected ASTJexlScript timedApplyWhindexFieldMappings(QueryStopwatch timers, final ASTJexlScript script, ShardQueryConfiguration config,
MetadataHelper metadataHelper, Query settings) throws DatawaveQueryException {
try {
Expand Down Expand Up @@ -3347,6 +3369,14 @@ public void setTransformRules(List<NodeTransformRule> transformRules) {
this.transformRules.addAll(transformRules);
}

public List<FieldReplacementRule> getReplacementRules() {
return Collections.unmodifiableList(replacementRules);
}

public void setReplacementRules(List<FieldReplacementRule> replacementRules) {
this.replacementRules.addAll(replacementRules);
}

/*
* (non-Javadoc)
*
Expand Down Expand Up @@ -3426,6 +3456,14 @@ public boolean getDisableTestNonExistentFields() {
return disableTestNonExistentFields;
}

public void setDisableFieldReplacementRules(boolean disableFieldReplacementRules) {
this.disableFieldReplacementRules = disableFieldReplacementRules;
}

public boolean getDisableFieldReplacementRules() {
return disableFieldReplacementRules;
}

public void setDisableWhindexFieldMappings(boolean disableWhindexFieldMappings) {
this.disableWhindexFieldMappings = disableWhindexFieldMappings;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package datawave.query.planner.replacement;

import java.util.List;

import org.apache.commons.jexl3.parser.ASTAndNode;
import org.apache.commons.jexl3.parser.ASTIdentifier;
import org.apache.commons.jexl3.parser.ASTJexlScript;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.log4j.Logger;

import datawave.core.common.logging.ThreadConfigurableLogger;
import datawave.query.jexl.visitors.RebuildingVisitor;
import datawave.query.planner.replacement.rules.FieldReplacementRule;

public class FieldReplacementVisitor extends RebuildingVisitor {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add class level docs

private static final Logger log = ThreadConfigurableLogger.getLogger(FieldReplacementVisitor.class);
private final List<FieldReplacementRule> rules;

public FieldReplacementVisitor(List<FieldReplacementRule> rules) {
this.rules = rules;
}

public static ASTJexlScript apply(ASTJexlScript script, List<FieldReplacementRule> rules) {
FieldReplacementVisitor visitor = new FieldReplacementVisitor(rules);

return visitor.apply(script);
}

@Override
public Object visit(ASTAndNode node, Object data) {
return applyRules(super.visit(node, data));
}

@Override
public Object visit(ASTIdentifier node, Object data) {
return applyRules(super.visit(node, data));
}
Comment on lines +35 to +37
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We typically avoid overriding the visit method to an identifier node. Instead you may want to override the specific nodes you care about, EQ, NE, ER, RN, LT, LE, GT, GE, etc.


public JexlNode applyRules(Object node) {
JexlNode jexlNode = (JexlNode) node;
for (FieldReplacementRule rule : rules) {
if (rule.matches(jexlNode)) {
jexlNode = rule.apply(jexlNode);
}
}
return jexlNode;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package datawave.query.planner.replacement.rules;

import org.apache.commons.jexl3.parser.ASTIdentifier;
import org.apache.commons.jexl3.parser.JexlNode;

import datawave.query.jexl.JexlASTHelper;

public class DirectFieldReplacementRule implements FieldReplacementRule {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add class level documentation for what the expected behavior is, what types of nodes are affected, etc

private String field = null;
private String replacement = null;
Comment on lines +9 to +10
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These can be final


public DirectFieldReplacementRule() {}

public DirectFieldReplacementRule(String field, String replacement) {
this.field = field;
this.replacement = replacement;
}

public boolean matches(JexlNode node) {
return node instanceof ASTIdentifier && ((ASTIdentifier) node).getName().equals(field);
}

public JexlNode apply(JexlNode node) {
JexlASTHelper.setField(node, replacement);
return node;
}

public String getField() {
return field;
}

public void setField(String field) {
this.field = field;
}

public String getReplacement() {
return replacement;
}

public void setReplacement(String replacement) {
this.replacement = replacement;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package datawave.query.planner.replacement.rules;

import org.apache.commons.jexl3.parser.JexlNode;

public interface FieldReplacementRule {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add brief class level doc

boolean matches(JexlNode node);

JexlNode apply(JexlNode node);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package datawave.query.planner.replacement.rules;

import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.EVALUATION_ONLY;

import java.util.HashMap;
import java.util.Map;

import org.apache.commons.jexl3.parser.ASTAndNode;
import org.apache.commons.jexl3.parser.ASTIdentifier;
import org.apache.commons.jexl3.parser.ASTReferenceExpression;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.JexlNodes;
import org.apache.commons.jexl3.parser.ParserTreeConstants;

import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.JexlNodeFactory;
import datawave.query.jexl.LiteralRange;
import datawave.query.jexl.nodes.QueryPropertyMarker;
import datawave.query.jexl.visitors.RebuildingVisitor;

public class RangeFieldReplacementRule implements FieldReplacementRule {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Visitor name should be updated to include "BoundedRange", range on it's own could be any range operator (LT, LE, GT, GE).

The direct field replacement rule actually replaces the field. This visitor doesn't actually replace the bounded range, consider an alternate name.

private Map<String,String> fieldMap = new HashMap<>();

public RangeFieldReplacementRule() {}

public RangeFieldReplacementRule(Map<String,String> fieldMap) {
this.fieldMap = fieldMap;
}

@Override
public boolean matches(JexlNode node) {
LiteralRange<?> range = JexlASTHelper.findRange().getRange(node);
return range != null && fieldMap.containsKey(range.getFieldName());
}

@Override
public JexlNode apply(JexlNode node) {
LiteralRange<?> range = JexlASTHelper.findRange().getRange(node);

if (range == null || !fieldMap.containsKey(range.getFieldName())) {
return node;
}

// Create a copy and mark it as "evaluationOnly"
JexlNode copy = RebuildingVisitor.copy(node);
JexlNode evalNode = QueryPropertyMarker.create(copy, EVALUATION_ONLY);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why mark the original as evaluation only? Are the replacement fields always indexed?

If you had a case where the original field is indexed and the replacement field is not indexed then the executability of the query was just destroyed, in cases where the bounded range is an anchor term.


// rename the field for the range nodes
replaceField(range.getLowerNode(), fieldMap.get(range.getFieldName()));
replaceField(range.getUpperNode(), fieldMap.get(range.getFieldName()));

// Create a Reference Expression for the original node and top level AND for both the ref and the eval node
ASTReferenceExpression ref = JexlNodes.makeRefExp();
ASTAndNode topLevel = new ASTAndNode(ParserTreeConstants.JJTANDNODE);
node.jjtSetParent(ref);
ref.jjtSetParent(topLevel);
evalNode.jjtSetParent(topLevel);
ref.jjtAddChild(node, 0);
topLevel.jjtAddChild(evalNode, 0);
topLevel.jjtAddChild(ref, 1);

return topLevel;
}

private void replaceField(JexlNode node, String newName) {
for (int i = 0; i < node.jjtGetNumChildren(); i++) {
JexlNode child = node.jjtGetChild(i);
if (child instanceof ASTIdentifier) {
ASTIdentifier newId = JexlNodeFactory.buildIdentifier(newName);
node.jjtAddChild(newId, i);
newId.jjtSetParent(node);
} else {
replaceField(child, newName);
}
}
}

public void setFieldMap(Map<String,String> fieldMap) {
this.fieldMap = fieldMap;
}

public Map<String,String> getFieldMap() {
return fieldMap;
}
}
Loading
Loading