Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static java.util.Optional.of;
import static org.apache.parquet.avro.AvroReadSupport.READ_INT96_AS_FIXED;
import static org.apache.parquet.avro.AvroReadSupport.READ_INT96_AS_FIXED_DEFAULT;
import static org.apache.parquet.avro.AvroRecordConverter.getRuntimeAvroVersion;
import static org.apache.parquet.avro.AvroWriteSupport.WRITE_FIXED_AS_INT96;
import static org.apache.parquet.avro.AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE;
import static org.apache.parquet.avro.AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT;
Expand Down Expand Up @@ -488,15 +489,20 @@ private LogicalTypeAnnotation convertLogicalType(LogicalType logicalType) {
return timeType(true, MICROS);
} else if (logicalType instanceof LogicalTypes.TimestampMillis) {
return timestampType(true, MILLIS);
} else if (logicalType instanceof LogicalTypes.LocalTimestampMillis) {
return timestampType(false, MILLIS);
} else if (logicalType instanceof LogicalTypes.TimestampMicros) {
return timestampType(true, MICROS);
} else if (logicalType instanceof LogicalTypes.LocalTimestampMicros) {
return timestampType(false, MICROS);
} else if (logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) {
return uuidType();
}

if (avroVersionSupportsLocalTimestampTypes()) {
if (logicalType instanceof LogicalTypes.LocalTimestampMillis) {
return timestampType(false, MILLIS);
} else if (logicalType instanceof LogicalTypes.LocalTimestampMicros) {
return timestampType(false, MICROS);
}
}

return null;
}

Expand Down Expand Up @@ -538,7 +544,7 @@ public Optional<LogicalType> visit(
LogicalTypeAnnotation.TimeUnit unit = timestampLogicalType.getUnit();
boolean isAdjustedToUTC = timestampLogicalType.isAdjustedToUTC();

if (isAdjustedToUTC) {
if (isAdjustedToUTC || !avroVersionSupportsLocalTimestampTypes()) {
switch (unit) {
case MILLIS:
return of(LogicalTypes.timestampMillis());
Expand Down Expand Up @@ -605,4 +611,14 @@ private static String namespace(String name, Map<String, Integer> names) {
Integer nameCount = names.merge(name, 1, (oldValue, value) -> oldValue + 1);
return nameCount > 1 ? name + nameCount : null;
}

/* Avro <= 1.9 does not support conversions to LocalTimestamp{Micros, Millis} classes */
private static boolean avroVersionSupportsLocalTimestampTypes() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think of mocking this method to capture the behavior of supporting and not-supporting local timestamps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll give it a try with Powermock!

I might start a thread on the mailing list this week discussing Avro version support more broadly, to formalize which Avro versions are currently supported/any planned EOL dates? I've been thinking it would also be good to have some automated testing of parquet-avro on different Avro versions... it would just be a little complicated to set up since we'd need either a separate Maven module per supported Avro version, or some very clever classloading :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @clairemcginty, that's a good idea!

What do we consider the lower-bound? :D Avro 1.8.2 is May 2017, so that's pretty old, but I'm aware of some breaking API changes because we exposed some Jackson classes in the public API.

To do cross-testing, we can do something similar to we do with Hadoop 2, you can get some inspiration there:

parquet-java/pom.xml

Lines 638 to 643 in d4384d3

<profile>
<id>hadoop2</id>
<properties>
<hadoop.version>2.7.3</hadoop.version>
</properties>
</profile>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think 1.8 is a reasonable lower bound! for context on our use case, we use the Beam library, which until very recently was tightly coupled with 1.8. Things are now more flexible, but there's a lot of inertia surrounding 1.8 -- the breaking changes surrounding logical types make it a non-trivial upgrade 😅

I'll take a look at the hadoop setup! Avro is a bit tricky because we need to both set the Avro-compiler version, and load the Avro core library, for whatever version of Avro we want to test...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the context, that helps. I see that 1.8.2 is still in there, so that sounds reasonable to me 👍

final String avroVersion = getRuntimeAvroVersion();

return avroVersion == null
|| !(avroVersion.startsWith("1.7.")
|| avroVersion.startsWith("1.8.")
|| avroVersion.startsWith("1.9."));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.CALLS_REAL_METHODS;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import java.util.Arrays;
Expand All @@ -53,19 +55,34 @@
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Types;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

import org.junit.runner.RunWith;
import org.mockito.Mockito;
import org.powermock.api.mockito.PowerMockito;
import org.powermock.core.classloader.annotations.PrepareForTest;
import org.powermock.modules.junit4.PowerMockRunner;

@RunWith(PowerMockRunner.class)
@PrepareForTest(AvroRecordConverter.class)
public class TestAvroSchemaConverter {

private static final Configuration NEW_BEHAVIOR = new Configuration(false);

@Before
public void setupMockito() {
PowerMockito.mockStatic(AvroRecordConverter.class, CALLS_REAL_METHODS);
}

@BeforeClass
public static void setupConf() {
NEW_BEHAVIOR.setBoolean("parquet.avro.add-list-element-records", false);
Expand Down Expand Up @@ -665,6 +682,27 @@ public void testTimestampMillisType() throws Exception {
testRoundTripConversion(
expected, "message myrecord {\n" + " required int64 timestamp (TIMESTAMP(MILLIS,true));\n" + "}\n");

// Test that conversions for timestamp types only use APIs that are available in the user's Avro version
for (String avroVersion : ImmutableSet.of("1.7.0", "1.8.0", "1.9.0", "1.10.0", "1.11.0")) {
Mockito.when(AvroRecordConverter.getRuntimeAvroVersion()).thenReturn(avroVersion);
final Schema converted = new AvroSchemaConverter()
.convert(Types.buildMessage()
.addField(Types.primitive(INT64, Type.Repetition.REQUIRED)
.as(LogicalTypeAnnotation.timestampType(
false, LogicalTypeAnnotation.TimeUnit.MILLIS))
.length(1)
.named("timestamp_type"))
.named("TestAvro"));

assertEquals(
avroVersion.matches("1\\.[789]\\.\\d+") ? "timestamp-millis" : "local-timestamp-millis",
converted
.getField("timestamp_type")
.schema()
.getLogicalType()
.getName());
}

for (PrimitiveTypeName primitive :
new PrimitiveTypeName[] {INT32, INT96, FLOAT, DOUBLE, BOOLEAN, BINARY, FIXED_LEN_BYTE_ARRAY}) {
final PrimitiveType type;
Expand Down Expand Up @@ -729,6 +767,27 @@ public void testTimestampMicrosType() throws Exception {
IllegalArgumentException.class,
() -> new AvroSchemaConverter().convert(message(type)));
}

// Test that conversions for timestamp types only use APIs that are available in the user's Avro version
for (String avroVersion : ImmutableSet.of("1.7.0", "1.8.0", "1.9.0", "1.10.0", "1.11.0")) {
Mockito.when(AvroRecordConverter.getRuntimeAvroVersion()).thenReturn(avroVersion);
final Schema converted = new AvroSchemaConverter()
.convert(Types.buildMessage()
.addField(Types.primitive(INT64, Type.Repetition.REQUIRED)
.as(LogicalTypeAnnotation.timestampType(
false, LogicalTypeAnnotation.TimeUnit.MICROS))
.length(1)
.named("timestamp_type"))
.named("TestAvro"));

assertEquals(
avroVersion.matches("1\\.[789]\\.\\d+") ? "timestamp-micros" : "local-timestamp-micros",
converted
.getField("timestamp_type")
.schema()
.getLogicalType()
.getName());
}
}

@Test
Expand Down