Skip to content

Commit

Permalink
Merge branch 'integration' into dockerquickstart-issue2611
Browse files Browse the repository at this point in the history
  • Loading branch information
apmoriarty authored Oct 30, 2024
2 parents 607d919 + 3d7d40d commit 3b5f05c
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,15 @@ public long process(KEYIN key, RawRecordContainer event, Multimap<String,Normali
excludedGroups = arithmetic.getExcludedGroups();

for (Entry excluded : excludedGroups.entrySet()) {
matchingGroups.remove(excluded.getKey(), excluded.getValue());
for (Object value : (HashSet) excluded.getValue()) {
if (matchingGroups.containsKey(excluded.getKey())) {
matchingGroups.get(excluded.getKey()).remove(value);
if (matchingGroups.get(excluded.getKey()).isEmpty()) {
matchingGroups.remove(excluded.getKey());
}
}
}

}

if (log.isTraceEnabled()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -335,15 +335,46 @@ public void testAwarePreconSameGroup() {

}

@Test
public void testAwareTwoNegated() {
// CHEESE != 'apple' AND WINE != 'chianti'
// make sure negations don't take the cross products of groups that each contained things that don't match

fields.put("EVENT_DATE", new BaseNormalizedContent("EVENT_DATE", "2022-10-26T01:31:53Z"));
fields.put("UUID", new BaseNormalizedContent("UUID", "0016dd72-0000-827d-dd4d-001b2163ba09"));
fields.put("FRUIT", new NormalizedFieldAndValue("FRUIT", "apple", "FOOD", "0"));
fields.put("FRUIT", new NormalizedFieldAndValue("FRUIT", "pear", "FOOD", "1"));
fields.put("FRUIT", new NormalizedFieldAndValue("FRUIT", "orange", "FOOD", "2"));
fields.put("WINE", new NormalizedFieldAndValue("WINE", "pinot noir", "FOOD", "0"));
fields.put("WINE", new NormalizedFieldAndValue("WINE", "chianti", "FOOD", "1"));
fields.put("WINE", new NormalizedFieldAndValue("WINE", "cabernet", "FOOD", "2"));

ProtobufEdgeDataTypeHandler<Text,BulkIngestKey,Value> edgeHandler = new ProtobufEdgeDataTypeHandler<>();
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
edgeHandler.setup(context);

Set<String> expectedKeys = new HashSet<>();
expectedKeys.add("cabernet");
expectedKeys.add("cabernet%00;orange");
expectedKeys.add("orange");
expectedKeys.add("orange%00;cabernet");

RawRecordContainer myEvent = getEvent(conf);

EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 4, true, false);
Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet());

}

@Test
public void testAwareAllNegated() {
// CHEESE != 'apple' AND WINE != 'chianti'
// make sure negations don't take the cross products of groups that each contained things that don't match

fields.put("EVENT_DATE", new BaseNormalizedContent("EVENT_DATE", "2022-10-26T01:31:53Z"));
fields.put("UUID", new BaseNormalizedContent("UUID", "0016dd72-0000-827d-dd4d-001b2163ba09"));
fields.put("CHEESE", new NormalizedFieldAndValue("FRUIT", "apple", "FOOD", "0"));
fields.put("CHEESE", new NormalizedFieldAndValue("FRUIT", "pear", "FOOD", "1"));
fields.put("FRUIT", new NormalizedFieldAndValue("FRUIT", "apple", "FOOD", "0"));
fields.put("FRUIT", new NormalizedFieldAndValue("FRUIT", "pear", "FOOD", "1"));
fields.put("WINE", new NormalizedFieldAndValue("WINE", "pinot noir", "FOOD", "0"));
fields.put("WINE", new NormalizedFieldAndValue("WINE", "chianti", "FOOD", "1"));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ http://www.springframework.org/schema/util/spring-util-4.0.xsd">
<list>
<bean class="datawave.ingest.mapreduce.handler.edge.define.EdgeNode">
<!-- (data-driven key component) for a given CSV record, use its EDGE_VERTEX_FROM value for the "SOURCE" component of the DataWave edge key -->
<property name="selector" value="CHEESE.FOOD"/>
<property name="selector" value="FRUIT.FOOD"/>
<!-- (config-driven key component) use this value to denote how this vertex is related to the "SINK" vertex,
defined by 'protobufedge.table.relationships' bean below -->
<property name="relationship" value="FROM"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,17 +217,25 @@ private JexlNode pruneNode(JexlNode node, Set<String> fields) {
* @return the original node, or null if it is pruned
*/
private JexlNode pruneUnion(JexlNode node, Set<String> fields) {
// if there is a isNotNull in the union, and we know we have an equality node involving one of the isNotNull nodes,
// we have the means to prune the entire union.
boolean willPrune = false;

for (int i = 0; i < node.jjtGetNumChildren(); i++) {
JexlNode deref = JexlASTHelper.dereference(node.jjtGetChild(i));
if (!isIsNotNullFunction(deref)) {
return node;
if (isIsNotNullFunction(deref) && !willPrune) {
String field = fieldForNode(deref);
if (fields.contains(field)) {
willPrune = true;
}
}

String field = fieldForNode(deref);
if (!fields.contains(field)) {
return node;
}
}

if (!willPrune) {
return node;
}

return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,13 +493,13 @@ public void testFutureCase_PartialPruneOfUnionViaUnion() {

// union of same field should allow us to perform a partial prune
String query = "(!(FOO == null) || !(FOO2 == null)) && (FOO == 'bar' || FOO == 'baz')";
// String expected = "!(FOO2 == null) && (FOO == 'bar' || FOO == 'baz')";
test(query, query);
String expected = "(FOO == 'bar' || FOO == 'baz')";
test(query, expected);

// should also work for filter:includeRegex
query = "(!(FOO == null) || !(FOO2 == null)) && (filter:includeRegex(FOO, 'bar.*') || filter:includeRegex(FOO, 'baz.*'))";
// expected = "!(FOO2 == null) && (filter:includeRegex(FOO, 'bar.*') || filter:includeRegex(FOO, 'baz.*'))";
test(query, query);
expected = "(filter:includeRegex(FOO, 'bar.*') || filter:includeRegex(FOO, 'baz.*'))";
test(query, expected);
}

// test cases where nothing should be done
Expand Down Expand Up @@ -537,10 +537,12 @@ public void testNoOpCases() {

// cannot prune half of a union
query = "(!(FOO == null) || !(FOO2 == null)) && FOO == 'bar'";
test(query, query);
String expected = "FOO == 'bar'";
test(query, expected);

query = "(!(FOO == null) || !(FOO2 == null)) && FOO =~ 'ba.*'";
test(query, query);
expected = "FOO =~ 'ba.*'";
test(query, expected);
}

@Test
Expand Down Expand Up @@ -575,6 +577,22 @@ public void testNoOpQueryPropertyMarkers() {
test(query, query);
}

@Test
public void testPruningNestedUnionOfIsNotNullFunctions() {
// logically, these unions are equivalent and the 'is not null' side can be pruned
String query = "FOO == 'bar' && (!(FOO == null) || !(FOO2 == null) || !(FOO3 == null) || !(FOO4 == null))";
String expected = "FOO == 'bar'";

test(query, expected);
}

@Test
public void testPruningNestedUnionOfIsNotNullFunctions_Two() {
// in this case, since the FOO field is not in the union nothing will be pruned.
String query = "FOO == 'bar' && (!(FOO2 == null) || !(FOO4 == null))";
test(query, query);
}

private void test(String query, String expected) {
try {
ASTJexlScript script = JexlASTHelper.parseAndFlattenJexlQuery(query);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,9 @@ public void testComposites() {
compositeMetadata.setCompositeFieldMappingByType(ingestType, "MAKE_COLOR", Arrays.asList("MAKE", "COLOR"));
compositeMetadata.setCompositeFieldMappingByType(ingestType, "COLOR_WHEELS", Arrays.asList("MAKE", "COLOR"));
}

TypeMetadata typeMetadata = new TypeMetadata(
"MAKE:[beep:datawave.data.type.LcNoDiacriticsType];MAKE_COLOR:[beep:datawave.data.type.NoOpType];START_DATE:[beep:datawave.data.type.DateType];TYPE_NOEVAL:[beep:datawave.data.type.LcNoDiacriticsType];IP_ADDR:[beep:datawave.data.type.IpAddressType];WHEELS:[beep:datawave.data.type.LcNoDiacriticsType,datawave.data.type.NumberType];COLOR:[beep:datawave.data.type.LcNoDiacriticsType];COLOR_WHEELS:[beep:datawave.data.type.NoOpType];TYPE:[beep:datawave.data.type.LcNoDiacriticsType]");
"dts:[0:beep];types:[0:datawave.data.type.DateType,1:datawave.data.type.IpAddressType,2:datawave.data.type.LcNoDiacriticsType,3:datawave.data.type.NoOpType,4:datawave.data.type.NumberType];MAKE:[0:2];MAKE_COLOR:[0:3];START_DATE:[0:0];TYPE_NOEVAL:[0:2];IP_ADDR:[0:1];WHEELS:[0:2,0:4];COLOR:[0:2];COLOR_WHEELS:[0:3];TYPE:[0:2]");

MarkingFunctions markingFunctions = new MarkingFunctions.Default();
ValueToAttributes valueToAttributes = new ValueToAttributes(compositeMetadata, typeMetadata, null, markingFunctions, true);
}
Expand Down

0 comments on commit 3b5f05c

Please sign in to comment.