Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,21 @@ public <T> Boolean eq(Bound<T> term, Literal<T> lit) {
public <T> Boolean notEq(Bound<T> term, Literal<T> lit) {
// because the bounds are not necessarily a min or max value, this cannot be answered using
// them. notEq(col, X) with (X, Y) doesn't guarantee that X is a value in col.
// However, when min == max and the file has no nulls, we can safely prune
// if that value equals the literal.
int id = term.ref().fieldId();
if (mayContainNull(id)) {
return ROWS_MIGHT_MATCH;
}
T lower = lowerBound(term);
T upper = upperBound(term);

if (lower != null && upper != null && lower.equals(upper)) {
int cmp = lit.comparator().compare(lower, lit.value());
if (cmp == 0) {
return ROWS_CANNOT_MATCH;
}
}
return ROWS_MIGHT_MATCH;
}

Expand Down Expand Up @@ -381,6 +396,20 @@ public <T> Boolean in(Bound<T> term, Set<T> literalSet) {
public <T> Boolean notIn(Bound<T> term, Set<T> literalSet) {
// because the bounds are not necessarily a min or max value, this cannot be answered using
// them. notIn(col, {X, ...}) with (X, Y) doesn't guarantee that X is a value in col.
// However, when min == max and the file has no nulls, we can safely prune
// if that value is in the exclusion set.
int id = term.ref().fieldId();
if (mayContainNull(id)) {
return ROWS_MIGHT_MATCH;
}
T lower = lowerBound(term);
T upper = upperBound(term);

if (lower != null && upper != null && lower.equals(upper)) {
if (literalSet.contains(lower)) {
return ROWS_CANNOT_MATCH;
}
}
return ROWS_MIGHT_MATCH;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -970,4 +970,90 @@ public void testNotNullInNestedStruct() {
.as("Should not read: optional_address.optional_street2 is optional")
.isFalse();
}

@Test
public void testNotEqWithSingleValue() {
DataFile singleValueFile =
new TestDataFile(
"single_value.avro",
Row.of(),
10,
ImmutableMap.of(3, 10L),
ImmutableMap.of(3, 0L),
null,
ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")));

boolean shouldRead =
new InclusiveMetricsEvaluator(SCHEMA, notEqual("required", "abc")).eval(singleValueFile);
assertThat(shouldRead)
.as("Should prune: file contains single value equal to literal")
.isFalse();

shouldRead =
new InclusiveMetricsEvaluator(SCHEMA, notEqual("required", "def")).eval(singleValueFile);
assertThat(shouldRead)
.as("Should read: file contains single value not equal to literal")
.isTrue();

DataFile singleValueWithNulls =
new TestDataFile(
"single_value_nulls.avro",
Row.of(),
10,
ImmutableMap.of(3, 10L),
ImmutableMap.of(3, 2L),
null,
ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")));

shouldRead =
new InclusiveMetricsEvaluator(SCHEMA, notEqual("required", "abc"))
.eval(singleValueWithNulls);
assertThat(shouldRead).as("Should read: file has nulls which match != predicate").isTrue();
}

@Test
public void testNotInWithSingleValue() {
DataFile singleValueFile =
new TestDataFile(
"single_value.avro",
Row.of(),
10,
ImmutableMap.of(3, 10L),
ImmutableMap.of(3, 0L),
null,
ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")));

boolean shouldRead =
new InclusiveMetricsEvaluator(SCHEMA, notIn("required", "abc", "def"))
.eval(singleValueFile);
assertThat(shouldRead)
.as("Should prune: file contains single value in exclusion list")
.isFalse();

shouldRead =
new InclusiveMetricsEvaluator(SCHEMA, notIn("required", "def", "ghi"))
.eval(singleValueFile);
assertThat(shouldRead)
.as("Should read: file contains single value not in exclusion list")
.isTrue();

DataFile singleValueWithNulls =
new TestDataFile(
"single_value_nulls.avro",
Row.of(),
10,
ImmutableMap.of(3, 10L),
ImmutableMap.of(3, 2L),
null,
ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")));

shouldRead =
new InclusiveMetricsEvaluator(SCHEMA, notIn("required", "abc", "def"))
.eval(singleValueWithNulls);
assertThat(shouldRead).as("Should read: file has nulls which match NOT IN predicate").isTrue();
}
}
Loading