Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ public TestMetricsRowGroupFilter(String format) {
Types.MapType.ofRequired(12, 13, StringType.get(), IntegerType.get())),
optional(14, "all_nans", DoubleType.get()),
optional(15, "some_nans", FloatType.get()),
optional(16, "no_nans", DoubleType.get()));
optional(16, "no_nans", DoubleType.get()),
optional(17, "some_double_nans", DoubleType.get()));

private static final Types.StructType _structFieldType =
Types.StructType.of(Types.NestedField.required(8, "_int_field", IntegerType.get()));
Expand All @@ -137,7 +138,8 @@ public TestMetricsRowGroupFilter(String format) {
optional(10, "_str", StringType.get()),
optional(14, "_all_nans", Types.DoubleType.get()),
optional(15, "_some_nans", FloatType.get()),
optional(16, "_no_nans", Types.DoubleType.get()));
optional(16, "_no_nans", Types.DoubleType.get()),
optional(17, "_some_double_nans", Types.DoubleType.get()));

private static final String TOO_LONG_FOR_STATS_PARQUET;

Expand Down Expand Up @@ -198,6 +200,8 @@ public void createOrcInputFile() throws IOException {
record.setField("_str", i + "str" + i);
record.setField("_all_nans", Double.NaN); // never non-nan
record.setField("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
record.setField(
"_some_double_nans", (i % 10 == 0) ? Double.NaN : 2D); // includes some nan values
record.setField("_no_nans", 3D); // optional, but always non-nan

GenericRecord structNotNull = GenericRecord.create(_structFieldType);
Expand Down Expand Up @@ -241,6 +245,8 @@ private void createParquetInputFile() throws IOException {
builder.set("_no_nulls", ""); // optional, but always non-null
builder.set("_all_nans", Double.NaN); // never non-nan
builder.set("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
builder.set(
"_some_double_nans", (i % 10 == 0) ? Double.NaN : 2D); // includes some nan values
builder.set("_no_nans", 3D); // optional, but always non-nan
builder.set("_str", i + "str" + i);

Expand Down Expand Up @@ -303,7 +309,6 @@ public void testNoNulls() {
@Test
public void testFloatWithNan() {
// NaN's should break Parquet's Min/Max stats we should be reading in all cases
// Only ORC should be able to distinguish using min/max when NaN is present
boolean shouldRead = shouldRead(greaterThan("some_nans", 1.0));
Assert.assertTrue(shouldRead);

Expand All @@ -320,6 +325,24 @@ public void testFloatWithNan() {
Assert.assertTrue(shouldRead);
}

@Test
public void testDoubleWithNan() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, If ORC doesn't support it either this is ok with me

boolean shouldRead = shouldRead(greaterThan("some_double_nans", 1.0));
Assert.assertTrue("Should read: column with some nans contains target value", shouldRead);

shouldRead = shouldRead(greaterThanOrEqual("some_double_nans", 1.0));
Assert.assertTrue("Should read: column with some nans contains the target value", shouldRead);

shouldRead = shouldRead(lessThan("some_double_nans", 3.0));
Assert.assertTrue("Should read: column with some nans contains target value", shouldRead);

shouldRead = shouldRead(lessThanOrEqual("some_double_nans", 1.0));
Assert.assertTrue("Should read: column with some nans contains target value", shouldRead);

shouldRead = shouldRead(equal("some_double_nans", 2.0));
Assert.assertTrue("Should read: column with some nans contains target value", shouldRead);
}

@Test
public void testIsNaN() {
boolean shouldRead = shouldRead(isNaN("all_nans"));
Expand Down