Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import datawave.data.normalizer.regex.AnyCharNode;
import datawave.data.normalizer.regex.EncodedPatternNode;
import datawave.data.normalizer.regex.EscapedSingleCharNode;
import datawave.data.normalizer.regex.ExpressionNode;
import datawave.data.normalizer.regex.GroupNode;
import datawave.data.normalizer.regex.IntegerNode;
import datawave.data.normalizer.regex.IntegerRangeNode;
Expand All @@ -32,7 +31,7 @@ public class ZeroTrimmer extends CopyVisitor {

/**
* Return a copy of the node tree with all leading zeros for partially encoded regex patterns either trimmed and/or consolidated.
*
*
* @param node
* the node
* @return the trimmed tree
Expand All @@ -46,7 +45,7 @@ public static Node trim(Node node) {
}

public static ZeroRegexStatus getStatus(List<Node> encodedRegexNodes) {
if (hasPossiblyLeadingZeroes(encodedRegexNodes)) {
if (hasLeadingZeroes(encodedRegexNodes)) {
return ZeroRegexStatus.LEADING;
} else if (hasTrailingZeroes(encodedRegexNodes)) {
return ZeroRegexStatus.TRAILING;
Expand All @@ -59,45 +58,41 @@ private static boolean hasTrailingZeroes(List<Node> encodedRegexNodes) {
Collections.reverse(encodedRegexNodes);

NodeListIterator iter = new NodeListIterator(encodedRegexNodes);
return checkZeroes(iter);
}

private static boolean hasLeadingZeroes(List<Node> encodedRegexNodes) {
NodeListIterator iter = new NodeListIterator(encodedRegexNodes);
return checkZeroes(iter);
}

private static boolean checkZeroes(NodeListIterator iter) {
Node next;

while (iter.hasNext()) {
iter.seekPastQuestionMarks();
iter.seekPastQuantifiers();
iter.seekPastQuestionMarks();

Node next = iter.peekNext();
if (iter.hasNext()) {
next = iter.peekNext();
} else {
return false;
}

if (RegexUtils.matchesZero(next)) {
if (RegexUtils.matchesZeroExplicitly(next)) {
return true;
}
iter.next();
} else {
return false;
}

}
return true;

}

private static boolean hasPossiblyLeadingZeroes(List<Node> encodedRegexNodes) {
NodeListIterator iter = new NodeListIterator(encodedRegexNodes);

while (iter.hasNext()) {
Node next = iter.peekNext();

if (RegexUtils.matchesZero(next)) {
return true;
} else if (RegexUtils.isChar(next, RegexConstants.HYPHEN) || next.equals(new EscapedSingleCharNode(RegexConstants.PERIOD))) {
iter.next();

} else {
return false;
}

}

return true;

}

@Override
Expand Down Expand Up @@ -142,7 +137,7 @@ public Object visitEncodedPattern(EncodedPatternNode node, Object data) {

/**
* Trim/consolidate leading zeros.
*
*
* @param nodes
* the nodes to trim
* @return the trimmed nodes
Expand All @@ -154,7 +149,7 @@ private List<Node> trimLeadingZeros(List<Node> nodes) {

/**
* Trim/consolidate trailing zeros.
*
*
* @param nodes
* the nodes to trim
* @return the trimmed nodes
Expand All @@ -171,7 +166,7 @@ private List<Node> trimTrailingZeros(List<Node> nodes) {

/**
* Return true if the given list consists only of one regex element that may or may not be followed by a quantifier or question mark.
*
*
* @param nodes
* the nodes
* @return true if the list consists of a single element pattern, or false otherwise
Expand All @@ -186,7 +181,7 @@ private boolean isSingleElementPattern(List<Node> nodes) {

/**
* Trim all leading nodes that only match zero. Trimming will stop once the first element that can match something other than zero is seen.
*
*
* @param nodes
* the nodes
* @return a list of trimmed nodes
Expand All @@ -211,7 +206,7 @@ private List<Node> trimLeadingZeroOnlyElements(List<Node> nodes) {

/**
* Return a list with all possible leading zeros consolidated, and any elements made optional as needed.
*
*
* @param nodes
* the nodes to consolidate
* @return a list of consolidated nodes
Expand Down Expand Up @@ -250,7 +245,7 @@ private List<Node> consolidatePossibleLeadingZeros(List<Node> nodes) {

/**
* Consolidate any leading zeros that can possibly match zero.
*
*
* @param iter
* the iterator
* @return the consolidated nodes.
Expand Down Expand Up @@ -337,7 +332,7 @@ private List<Node> consolidateLeadingMatchesZero(NodeListIterator iter) {

/**
* Consolidate the next consecutive elements that can only match zero.
*
*
* @param iter
* the iterator
* @return a list of the consolidated nodes
Expand Down Expand Up @@ -430,7 +425,7 @@ private List<Node> consolidateLeadingMatchesZeroOnly(NodeListIterator iter) {

/**
* Trim all trailing nodes that explicitly only match zero. Trimming will stop once the first element that can match something other than zero is seen.
*
*
* @param nodes
* the nodes
* @return a list of trimmed nodes
Expand Down Expand Up @@ -461,7 +456,7 @@ private List<Node> trimTrailingZeroOnlyElements(List<Node> nodes) {

/**
* Return a list with all possible trailing zeros consolidated, and any elements made optional as needed.
*
*
* @param nodes
* the nodes to consolidate
* @return a list of consolidated nodes
Expand Down Expand Up @@ -525,7 +520,7 @@ private List<Node> consolidatePossibleTrailingZeros(List<Node> nodes) {

/**
* Consolidate any trailing zeros that can possibly match zero.
*
*
* @param iter
* the iterator
* @return the consolidated nodes.
Expand Down Expand Up @@ -623,7 +618,7 @@ private List<Node> consolidateTrailingMatchesZero(NodeListIterator iter) {

/**
* Consolidate the next consecutive elements that can only match zero.
*
*
* @param iter
* the iterator
* @return a list of the consolidated nodes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,20 +311,21 @@ void testStatus() {
assertStatus("300.*000[1-9]", status);
assertStatus("45.*", status);
assertStatus("-45.*", status);
assertStatus(".*?11", status);
assertStatus(".*?abc", status);

status = ZeroRegexStatus.LEADING;
assertStatus(".*", status);
assertStatus(".*?", status);
assertStatus(".*?11", status);
assertStatus("[04][05][06]", status);
assertStatus("[04]{1,3}[05][06]", status);
assertStatus("\\d{3}", status);
assertStatus(".\\.000034.*", status);
assertStatus("00345.*", status);
assertStatus("\\.000034.*", status);
assertStatus("-00345.*", status);
assertStatus(".\\.000034.*", status);
assertStatus("\\.000034.*", status);

status = ZeroRegexStatus.TRAILING;
assertStatus("\\d{3}", status);
assertStatus(".*", status);
assertStatus(".*?", status);
assertStatus("3.*0{0,}[01]", status);
assertStatus("3.*?0{0,}[01]", status);
assertStatus("3400\\.0000.", status);
Expand All @@ -350,7 +351,7 @@ void testMixedAlternation() {
}

private void assertStatus(String pattern, ZeroRegexStatus status) {
Assert.equals(ZeroTrimmer.getStatus(RegexParser.parse(pattern).getChildren()), status);
Assert.equals(status, ZeroTrimmer.getStatus(RegexParser.parse(pattern).getChildren()));
}

private void assertTrimmedTo(String pattern, String expectedPattern) {
Expand Down