-
Notifications
You must be signed in to change notification settings - Fork 271
Make /validate check for field existence in date range #3174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
foster33
wants to merge
18
commits into
integration
Choose a base branch
from
task/validate-nonIngested-fields
base: integration
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+867
−34
Open
Changes from 8 commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
360c2ca
Add validation for if field ingested during date range
foster33 5ecf26b
Small improvement
foster33 7294446
Feedback
foster33 494cb07
Improvements and new tests
foster33 46e27ee
Improve proficiency of visitor & other fixes
foster33 38e1cd3
Merge branch 'integration' into task/validate-nonIngested-fields
foster33 845e3e0
Improve missing fields method & improve/fix tests
foster33 f838523
Improve handling of special fields & improve/fix tests
foster33 faac031
Add support for non-aggregated rows & other feedback improvements
foster33 056862f
Improve iterator logic & fix issue with FirstEntryInRowIterator
foster33 be6c183
remove iterator option
foster33 40943f8
feedback improvement
foster33 992619a
Merge branch 'integration' into task/validate-nonIngested-fields
foster33 4bc884f
Push latest changes for testing, WIP
foster33 447cbd9
Fix issues with visitor (#3249)
lbschanno 1160258
Correct test expect message
foster33 7cb963f
Merge branch 'integration' into task/validate-nonIngested-fields
foster33 09da8b4
Merge branch 'integration' into task/validate-nonIngested-fields
foster33 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
262 changes: 262 additions & 0 deletions
262
...ery-core/src/main/java/datawave/query/jexl/visitors/FieldMissingFromDateRangeVisitor.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,262 @@ | ||
| package datawave.query.jexl.visitors; | ||
|
|
||
| import java.text.SimpleDateFormat; | ||
| import java.util.Collections; | ||
| import java.util.HashSet; | ||
| import java.util.LinkedHashSet; | ||
| import java.util.List; | ||
| import java.util.NoSuchElementException; | ||
| import java.util.Set; | ||
|
|
||
| import org.apache.accumulo.core.client.TableNotFoundException; | ||
| import org.apache.commons.jexl3.parser.ASTEQNode; | ||
| import org.apache.commons.jexl3.parser.ASTERNode; | ||
| import org.apache.commons.jexl3.parser.ASTFunctionNode; | ||
| import org.apache.commons.jexl3.parser.ASTGENode; | ||
| import org.apache.commons.jexl3.parser.ASTGTNode; | ||
| import org.apache.commons.jexl3.parser.ASTIdentifier; | ||
| import org.apache.commons.jexl3.parser.ASTJexlScript; | ||
| import org.apache.commons.jexl3.parser.ASTLENode; | ||
| import org.apache.commons.jexl3.parser.ASTLTNode; | ||
| import org.apache.commons.jexl3.parser.ASTNENode; | ||
| import org.apache.commons.jexl3.parser.ASTNRNode; | ||
| import org.apache.commons.jexl3.parser.ASTOrNode; | ||
| import org.apache.commons.jexl3.parser.JexlNode; | ||
|
|
||
| import datawave.microservice.query.Query; | ||
| import datawave.query.jexl.JexlASTHelper; | ||
| import datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory; | ||
| import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor; | ||
| import datawave.query.util.MetadataHelper; | ||
|
|
||
| /** | ||
| * Class to check that each query node contains a field which exists in the schema for the given date range. | ||
| * | ||
| * <pre> | ||
| * 1. If a datatype filter was specified, then the existence check is limited to only those datatypes | ||
| * 2. If a datatype filter is NOT specified (null or empty), this implies ALL datatypes. | ||
| * 3. If querySettings is NOT specified (null), it will not report any missing fields. This is due to no begin or end date being provided. | ||
| * </pre> | ||
| */ | ||
| public class FieldMissingFromDateRangeVisitor extends ShortCircuitBaseVisitor { | ||
foster33 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| private final MetadataHelper helper; | ||
| private final Set<String> datatypeFilter; | ||
| private final Query querySettings; | ||
| private final Set<String> specialFields; | ||
|
|
||
| public FieldMissingFromDateRangeVisitor(MetadataHelper helper, Set<String> datatypeFilter, Set<String> specialFields, Query querySettings) { | ||
| this.helper = helper; | ||
| this.querySettings = querySettings; | ||
| this.specialFields = specialFields; | ||
| // if given datatypeFilter is empty or null, assume that means ALL datatypes | ||
| if (datatypeFilter == null) { | ||
| datatypeFilter = Collections.emptySet(); | ||
| } | ||
| this.datatypeFilter = datatypeFilter; | ||
| } | ||
|
|
||
| @SuppressWarnings("unchecked") | ||
| public static Set<String> getNonIngestedFields(MetadataHelper helper, ASTJexlScript script, Set<String> datatypes, Set<String> specialFields, | ||
| Query querySettings) { | ||
| FieldMissingFromDateRangeVisitor visitor = new FieldMissingFromDateRangeVisitor(helper, datatypes, specialFields, querySettings); | ||
| // Maintain insertion order. | ||
| return (Set<String>) script.jjtAccept(visitor, new LinkedHashSet<>()); | ||
foster33 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| /** | ||
| * @param node | ||
| * Jexl node | ||
| * @param data | ||
| * The set of names which we have determined have not been ingested during the date range. | ||
| * @return the updated set of names which have not been ingested during the date range. | ||
| */ | ||
| private Object findMissingFields(ASTOrNode node, Object data) throws TableNotFoundException { | ||
| @SuppressWarnings("unchecked") | ||
| Set<String> nonExistentFieldNames = (null == data) ? new LinkedHashSet<>() : (Set<String>) data; | ||
| Set<String> fieldNamesToTestDateRange = new HashSet<>(); | ||
| SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd"); | ||
| List<ASTIdentifier> identifiers; | ||
|
|
||
| int numChildren = node.jjtGetNumChildren(); | ||
|
|
||
| for (int i = 0; i < numChildren; i++) { | ||
| JexlNode child = node.jjtGetChild(i); | ||
|
|
||
| // A node could be literal == literal in terms of an identityQuery | ||
| try { | ||
| identifiers = JexlASTHelper.getIdentifiers(child); | ||
| } catch (NoSuchElementException e) { | ||
| return nonExistentFieldNames; | ||
| } | ||
|
|
||
| if (identifiers.isEmpty()) { | ||
| // Catch cases where we have two literals | ||
| // essentially everything but identifier op literal | ||
| return nonExistentFieldNames; | ||
| } | ||
|
|
||
| for (ASTIdentifier identifier : identifiers) { | ||
| String fieldName = JexlASTHelper.deconstructIdentifier(identifier); | ||
| if (!specialFields.contains(fieldName)) { | ||
| fieldNamesToTestDateRange.add(fieldName); | ||
| } | ||
| } | ||
| } | ||
| Set<String> missingFields = helper.getMissingFieldsInDateRange(fieldNamesToTestDateRange, datatypeFilter, | ||
| formatter.format(this.querySettings.getBeginDate()), formatter.format(this.querySettings.getEndDate()), specialFields); | ||
| if (missingFields.containsAll(fieldNamesToTestDateRange)) { | ||
| return nonExistentFieldNames.addAll(missingFields); | ||
| } else { | ||
| return nonExistentFieldNames; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * @param node | ||
| * Jexl node | ||
| * @param data | ||
| * The set of names which we have determined have not been ingested during the date range. | ||
| * @return the updated set of names which have not been ingested during the date range. | ||
| */ | ||
| private Object findMissingFields(JexlNode node, Object data) throws TableNotFoundException { | ||
| @SuppressWarnings("unchecked") | ||
| Set<String> nonIngestedFieldNames = (null == data) ? new HashSet<>() : (Set<String>) data; | ||
| List<ASTIdentifier> identifiers; | ||
| SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd"); | ||
|
|
||
| // A node could be literal == literal in terms of an identityQuery | ||
| try { | ||
| identifiers = JexlASTHelper.getIdentifiers(node); | ||
| } catch (NoSuchElementException e) { | ||
| return nonIngestedFieldNames; | ||
| } | ||
|
|
||
| if (identifiers.isEmpty()) { | ||
| // Catch cases where we have two literals | ||
| // essentially everything but identifier op literal | ||
| return nonIngestedFieldNames; | ||
| } | ||
|
|
||
| for (ASTIdentifier identifier : identifiers) { | ||
| String fieldName = JexlASTHelper.deconstructIdentifier(identifier); | ||
| if (!specialFields.contains(fieldName)) { | ||
| nonIngestedFieldNames.addAll(helper.getMissingFieldsInDateRange(Set.of(fieldName), datatypeFilter, | ||
| formatter.format(this.querySettings.getBeginDate()), formatter.format(this.querySettings.getEndDate()), specialFields)); | ||
| } | ||
| } | ||
| return nonIngestedFieldNames; | ||
|
|
||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTERNode node, Object data) { | ||
| try { | ||
| return findMissingFields(node, data); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTNRNode node, Object data) { | ||
| try { | ||
| return findMissingFields(node, data); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTEQNode node, Object data) { | ||
| try { | ||
| return findMissingFields(node, data); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTNENode node, Object data) { | ||
| try { | ||
| return findMissingFields(node, data); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTGENode node, Object data) { | ||
| try { | ||
| return findMissingFields(node, data); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTGTNode node, Object data) { | ||
| try { | ||
| return findMissingFields(node, data); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTLENode node, Object data) { | ||
| try { | ||
| return findMissingFields(node, data); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTLTNode node, Object data) { | ||
| try { | ||
| return findMissingFields(node, data); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTFunctionNode node, Object data) { | ||
| JexlArgumentDescriptor desc = JexlFunctionArgumentDescriptorFactory.F.getArgumentDescriptor(node); | ||
| @SuppressWarnings("unchecked") | ||
| Set<String> nonIngestedFieldNames = (null == data) ? new HashSet<>() : (Set<String>) data; | ||
| SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd"); | ||
|
|
||
| for (String fieldName : desc.fields(this.helper, this.datatypeFilter)) { | ||
| // deconstruct the identifier | ||
| final String testFieldName = JexlASTHelper.deconstructIdentifier(fieldName); | ||
| // changed to allow _ANYFIELD_ in functions | ||
| if (!specialFields.contains(fieldName)) { | ||
| try { | ||
| nonIngestedFieldNames.addAll(helper.getMissingFieldsInDateRange(Set.of(testFieldName), datatypeFilter, | ||
| formatter.format(this.querySettings.getBeginDate()), formatter.format(this.querySettings.getEndDate()), specialFields)); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
| } | ||
| return nonIngestedFieldNames; | ||
| } | ||
|
|
||
| // Descend through these nodes | ||
| @Override | ||
| public Object visit(ASTJexlScript node, Object data) { | ||
| node.childrenAccept(this, data); | ||
| return data; | ||
| } | ||
|
|
||
| @Override | ||
| public Object visit(ASTOrNode node, Object data) { | ||
| try { | ||
| return findMissingFields(node, data); | ||
| } catch (TableNotFoundException e) { | ||
| throw new RuntimeException(e); | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.