Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@
import datawave.ingest.protobuf.TermWeight;
import datawave.iterators.FrequencyMetadataAggregator;
import datawave.query.iterator.SortedListKeyValueIterator;
import datawave.util.accumulo.RFileUtil;
import datawave.query.model.DateFrequencyMap;
import datawave.util.accumulo.RFileUtil;

public class ShardReindexMapperTest extends EasyMockSupport {
private Configuration conf;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
package datawave.query.jexl.visitors;

import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;

import org.apache.commons.jexl3.parser.ASTEQNode;
import org.apache.commons.jexl3.parser.ASTERNode;
import org.apache.commons.jexl3.parser.ASTFunctionNode;
import org.apache.commons.jexl3.parser.ASTGENode;
import org.apache.commons.jexl3.parser.ASTGTNode;
import org.apache.commons.jexl3.parser.ASTIdentifier;
import org.apache.commons.jexl3.parser.ASTJexlScript;
import org.apache.commons.jexl3.parser.ASTLENode;
import org.apache.commons.jexl3.parser.ASTLTNode;
import org.apache.commons.jexl3.parser.ASTNENode;
import org.apache.commons.jexl3.parser.ASTNRNode;
import org.apache.commons.jexl3.parser.ASTOrNode;
import org.apache.commons.jexl3.parser.JexlNode;

import datawave.microservice.query.Query;
import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory;
import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor;
import datawave.query.util.MetadataHelper;

/**
* Class to check that each query node contains a field which exists in the schema for the given date range.
*
* <pre>
* 1. If a datatype filter was specified, then the existence check is limited to only those datatypes
* 2. If a datatype filter is NOT specified (null or empty), this implies ALL datatypes.
* 3. If querySettings is NOT specified (null), it will not report any missing fields. This is due to no begin or end date being provided.
* </pre>
*/
public class FieldMissingFromDateRangeVisitor extends ShortCircuitBaseVisitor {

private final MetadataHelper helper;
private final Set<String> datatypeFilter;
private final Query querySettings;
private final Set<String> specialFields;

public FieldMissingFromDateRangeVisitor(MetadataHelper helper, Set<String> datatypeFilter, Set<String> specialFields, Query querySettings) {
this.helper = helper;
this.querySettings = querySettings;
this.specialFields = specialFields;
// if given datatypeFilter is empty or null, assume that means ALL datatypes
if (datatypeFilter == null) {
datatypeFilter = Collections.emptySet();
}
this.datatypeFilter = datatypeFilter;
}

@SuppressWarnings("unchecked")
public static Set<String> getNonIngestedFields(MetadataHelper helper, ASTJexlScript script, Set<String> datatypes, Set<String> specialFields,
Query querySettings) {
FieldMissingFromDateRangeVisitor visitor = new FieldMissingFromDateRangeVisitor(helper, datatypes, specialFields, querySettings);
// Maintain insertion order.
return (Set<String>) script.jjtAccept(visitor, new LinkedHashSet<>());
}

/**
* @param node
* Jexl node
* @param data
* The set of names which we have determined have not been ingested during the date range.
* @return the updated set of names which have not been ingested during the date range.
*/
private Object findMissingFields(ASTOrNode node, Object data) {
@SuppressWarnings("unchecked")
Set<String> nonExistentFieldNames = (null == data) ? new LinkedHashSet<>() : (Set<String>) data;
Set<String> fieldNamesToTestDateRange = new HashSet<>();
List<ASTIdentifier> identifiers;

int numChildren = node.jjtGetNumChildren();

for (int i = 0; i < numChildren; i++) {
JexlNode child = node.jjtGetChild(i);

// A node could be literal == literal in terms of an identityQuery
try {
identifiers = JexlASTHelper.getIdentifiers(child);
} catch (NoSuchElementException e) {
return nonExistentFieldNames;
}

if (identifiers.isEmpty()) {
// Catch cases where we have two literals
// essentially everything but identifier op literal
return nonExistentFieldNames;
}

for (ASTIdentifier identifier : identifiers) {
String fieldName = JexlASTHelper.deconstructIdentifier(identifier);
fieldNamesToTestDateRange.add(fieldName);
}
}
// Find the amount of times the fields have been ingested during the date range for all fields in the OR.
long occurrences = this.querySettings != null ? helper.getCountsForFieldsInDateRange(fieldNamesToTestDateRange, this.datatypeFilter,
this.querySettings.getBeginDate(), this.querySettings.getEndDate()).values().stream().mapToLong(Long::longValue).sum() : 1;
if (occurrences < 1) {
return nonExistentFieldNames.addAll(fieldNamesToTestDateRange);
} else {
return nonExistentFieldNames;
}
}

/**
* @param node
* Jexl node
* @param data
* The set of names which we have determined have not been ingested during the date range.
* @return the updated set of names which have not been ingested during the date range.
*/
private Object findMissingFields(JexlNode node, Object data) {
@SuppressWarnings("unchecked")
Set<String> nonIngestedFieldNames = (null == data) ? new HashSet<>() : (Set<String>) data;
List<ASTIdentifier> identifiers;

// A node could be literal == literal in terms of an identityQuery
try {
identifiers = JexlASTHelper.getIdentifiers(node);
} catch (NoSuchElementException e) {
return nonIngestedFieldNames;
}

if (identifiers.isEmpty()) {
// Catch cases where we have two literals
// essentially everything but identifier op literal
return nonIngestedFieldNames;
}

for (ASTIdentifier identifier : identifiers) {
String fieldName = JexlASTHelper.deconstructIdentifier(identifier);
long occurrences = this.querySettings != null
? helper.getCountsByFieldForDays(fieldName, this.querySettings.getBeginDate(), this.querySettings.getEndDate(), this.datatypeFilter)
: 1;
if (!specialFields.contains(fieldName) && occurrences < 1) {
nonIngestedFieldNames.add(fieldName);
}
}
return nonIngestedFieldNames;

}

@Override
public Object visit(ASTERNode node, Object data) {
return findMissingFields(node, data);
}

@Override
public Object visit(ASTNRNode node, Object data) {
return findMissingFields(node, data);
}

@Override
public Object visit(ASTEQNode node, Object data) {
return findMissingFields(node, data);
}

@Override
public Object visit(ASTNENode node, Object data) {
return findMissingFields(node, data);
}

@Override
public Object visit(ASTGENode node, Object data) {
return findMissingFields(node, data);
}

@Override
public Object visit(ASTGTNode node, Object data) {
return findMissingFields(node, data);
}

@Override
public Object visit(ASTLENode node, Object data) {
return findMissingFields(node, data);
}

@Override
public Object visit(ASTLTNode node, Object data) {
return findMissingFields(node, data);
}

@Override
public Object visit(ASTFunctionNode node, Object data) {
JexlArgumentDescriptor desc = JexlFunctionArgumentDescriptorFactory.F.getArgumentDescriptor(node);
@SuppressWarnings("unchecked")
Set<String> nonIngestedFieldNames = (null == data) ? new HashSet<>() : (Set<String>) data;

for (String fieldName : desc.fields(this.helper, this.datatypeFilter)) {
// deconstruct the identifier
final String testFieldName = JexlASTHelper.deconstructIdentifier(fieldName);
long occurrences = this.querySettings != null
? helper.getCountsByFieldForDays(fieldName, this.querySettings.getBeginDate(), this.querySettings.getEndDate(), this.datatypeFilter)
: 1;
// changed to allow _ANYFIELD_ in functions
if (!specialFields.contains(fieldName) && occurrences < 1) {
nonIngestedFieldNames.add(testFieldName);
}
}

return nonIngestedFieldNames;
}

// Descend through these nodes
@Override
public Object visit(ASTJexlScript node, Object data) {
node.childrenAccept(this, data);
return data;
}

@Override
public Object visit(ASTOrNode node, Object data) {
return findMissingFields(node, data);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.apache.commons.jexl3.parser.ASTJexlScript;
import org.apache.log4j.Logger;

import datawave.query.jexl.visitors.FieldMissingFromDateRangeVisitor;
import datawave.query.jexl.visitors.FieldMissingFromSchemaVisitor;

/**
Expand Down Expand Up @@ -77,10 +78,15 @@ public QueryRuleResult validate(QueryValidationConfiguration ruleConfiguration)
ASTJexlScript jexlQuery = (ASTJexlScript) ruleConfig.getParsedQuery();
Set<String> nonExistentFields = FieldMissingFromSchemaVisitor.getNonExistentFields(ruleConfig.getMetadataHelper(), jexlQuery,
Collections.emptySet(), getSpecialFields());
Set<String> nonIngestedFieldsForDateRange = FieldMissingFromDateRangeVisitor.getNonIngestedFields(ruleConfig.getMetadataHelper(), jexlQuery,
Collections.emptySet(), getSpecialFields(), ruleConfig.getQuerySettings());
// If any non-existent fields were found, add them to the result.
if (!nonExistentFields.isEmpty()) {
result.addMessage("Fields not found in data dictionary: " + String.join(", ", nonExistentFields));
}
if (!nonIngestedFieldsForDateRange.isEmpty()) {
result.addMessage("Fields not ingested in provided date range: " + String.join(", ", nonIngestedFieldsForDateRange));
}
} catch (Exception e) {
// If an exception occurred, log and preserve it in the result.
log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e);
Expand Down
Loading
Loading