From 6b6e1da6d5a0b50ce56696692072625b6da0ad8a Mon Sep 17 00:00:00 2001 From: Claire McGinty Date: Thu, 2 Jan 2025 09:47:52 -0700 Subject: [PATCH 1/2] Fix LogicalType conversions for nested records on Avro <= 1.8 --- .../parquet/avro/AvroRecordConverter.java | 6 +-- .../parquet/avro/TestAvroRecordConverter.java | 37 +++++++++++++++++-- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java index 441428bfa7..b5fc30f3db 100644 --- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java +++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java @@ -194,12 +194,12 @@ private static void addLogicalTypeConversion(SpecificData model, Schema schema, model.addLogicalTypeConversion(conversion); } } - + } catch (NoSuchFieldException e) { + // Avro classes without logical types (denoted by the "conversions" field) + } finally { for (Schema.Field field : schema.getFields()) { addLogicalTypeConversion(model, field.schema(), seenSchemas); } - } catch (NoSuchFieldException e) { - // Avro classes without logical types (denoted by the "conversions" field) } } break; diff --git a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroRecordConverter.java b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroRecordConverter.java index 76e4b99d09..315320bbdc 100644 --- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroRecordConverter.java +++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroRecordConverter.java @@ -88,14 +88,22 @@ public void testModelForGenericRecord() { public void testModelForSpecificRecordWithLogicalTypesWithDeprecatedAvro1_8() { Mockito.when(AvroRecordConverter.getRuntimeAvroVersion()).thenReturn("1.8.2"); - // Test that model is generated correctly - final SpecificData model = AvroRecordConverter.getModelForSchema(LogicalTypesTestDeprecated.SCHEMA$); + // Test that model is generated correctly when record contains both top-level and nested logical types + SpecificData model = AvroRecordConverter.getModelForSchema(LogicalTypesTestDeprecated.SCHEMA$); // Test that model is generated correctly Collection> conversions = model.getConversions(); - assertEquals(conversions.size(), 3); + assertEquals(3, conversions.size()); assertNotNull(model.getConversionByClass(Instant.class)); assertNotNull(model.getConversionByClass(LocalDate.class)); assertNotNull(model.getConversionByClass(LocalTime.class)); + + // Test that model is generated correctly when record contains only nested logical types + model = AvroRecordConverter.getModelForSchema(NestedOnlyLogicalTypesDeprecated.SCHEMA$); + // Test that model is generated correctly + conversions = model.getConversions(); + assertEquals(2, conversions.size()); + assertNotNull(model.getConversionByClass(LocalDate.class)); + assertNotNull(model.getConversionByClass(LocalTime.class)); } @Test @@ -147,6 +155,7 @@ public static org.apache.avro.Schema getClassSchema() { }; } + // An Avro class generated from Avro 1.8 that contains both nested and top-level logical type fields @org.apache.avro.specific.AvroGenerated public abstract static class LogicalTypesTestDeprecated extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { @@ -179,4 +188,26 @@ public static org.apache.avro.Schema getClassSchema() { new org.apache.avro.data.TimeConversions.TimestampMillisConversion(), null, null }; } + + // An Avro class generated from Avro 1.8 that contains only nested logical type fields + @org.apache.avro.specific.AvroGenerated + public abstract static class NestedOnlyLogicalTypesDeprecated extends org.apache.avro.specific.SpecificRecordBase + implements org.apache.avro.specific.SpecificRecord { + public static final org.apache.avro.Schema SCHEMA$ = SchemaBuilder.builder() + .record("NestedOnlyLogicalTypesDeprecated") + .namespace("org.apache.parquet.avro.TestAvroRecordConverter") + .fields() + .name("local_date_time") + .type(LocalDateTimeTestDeprecated.getClassSchema()) + .noDefault() + .endRecord(); + + public static org.apache.avro.Schema getClassSchema() { + return SCHEMA$; + } + + private static SpecificData MODEL$ = new SpecificData(); + + // No top-level conversions field, since logical types are all nested + } } From 3767c665896f97358e80a457de1653c753f85bca Mon Sep 17 00:00:00 2001 From: Claire McGinty Date: Mon, 6 Jan 2025 09:32:34 -0500 Subject: [PATCH 2/2] Move inner field traversal outside finally block --- .../java/org/apache/parquet/avro/AvroRecordConverter.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java index b5fc30f3db..a82d0148cf 100644 --- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java +++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java @@ -196,10 +196,10 @@ private static void addLogicalTypeConversion(SpecificData model, Schema schema, } } catch (NoSuchFieldException e) { // Avro classes without logical types (denoted by the "conversions" field) - } finally { - for (Schema.Field field : schema.getFields()) { - addLogicalTypeConversion(model, field.schema(), seenSchemas); - } + } + + for (Schema.Field field : schema.getFields()) { + addLogicalTypeConversion(model, field.schema(), seenSchemas); } } break;