From eccd8778745a46f5ca94e92857a0d946e6a67da0 Mon Sep 17 00:00:00 2001 From: aandres3 Date: Mon, 6 Jan 2025 09:15:03 +0000 Subject: [PATCH 1/5] Fix reading of proto Uint32Value --- .../org/apache/parquet/proto/ProtoMessageConverter.java | 7 +++++++ .../org/apache/parquet/proto/ProtoSchemaConverter.java | 2 +- .../java/org/apache/parquet/proto/ProtoWriteSupport.java | 2 +- .../org/apache/parquet/proto/ProtoSchemaConverterTest.java | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java index d446598f06..e9e42e24a0 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java @@ -763,6 +763,13 @@ public ProtoUInt32ValueConverter(ParentValueContainer parent) { this.parent = parent; } + + @Override + public void addInt(int value) { + parent.add(UInt32Value.of(value)); + } + + // This is left for backward compatibility with the old implementation which used int64 for uint32 @Override public void addLong(long value) { parent.add(UInt32Value.of(Math.toIntExact(value))); diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java index 74bb4235a2..ff27b263e9 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java @@ -260,7 +260,7 @@ private Builder>, GroupBuilder> addF return builder.primitive(INT32, getRepetition(descriptor)); } if (messageType.equals(UInt32Value.getDescriptor())) { - return builder.primitive(INT64, getRepetition(descriptor)); + return builder.primitive(INT32, getRepetition(descriptor)); } if (messageType.equals(BytesValue.getDescriptor())) { return builder.primitive(BINARY, getRepetition(descriptor)); diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java index 637f6fda91..b9fe4bf9e1 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java @@ -763,7 +763,7 @@ void writeRawValue(Object value) { class UInt32ValueWriter extends FieldWriter { @Override void writeRawValue(Object value) { - recordConsumer.addLong(((UInt32Value) value).getValue()); + recordConsumer.addInteger(((UInt32Value) value).getValue()); } } diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java index 5240be5a36..e9f08f33f7 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java @@ -400,7 +400,7 @@ public void testProto3ConvertWrappedMessageUnwrapped() throws Exception { + " optional int64 wrappedInt64 = 3;\n" + " optional int64 wrappedUInt64 = 4;\n" + " optional int32 wrappedInt32 = 5;\n" - + " optional int64 wrappedUInt32 = 6;\n" + + " optional int32 wrappedUInt32 = 6;\n" + " optional boolean wrappedBool = 7;\n" + " optional binary wrappedString (UTF8) = 8;\n" + " optional binary wrappedBytes = 9;\n" From 3f30f0acf2ebefcd6fff94ec3927907c5efb22a9 Mon Sep 17 00:00:00 2001 From: aandres3 Date: Tue, 7 Jan 2025 09:10:45 +0000 Subject: [PATCH 2/5] Add test for overflow --- .../parquet/proto/ProtoWriteSupportTest.java | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java index 360da8b741..309828488a 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java @@ -1343,7 +1343,7 @@ public void testProto3WrappedMessageUnwrappedRoundTrip() throws Exception { msg.setWrappedInt64(Int64Value.of(1_000_000_000L * 4)); msg.setWrappedUInt64(UInt64Value.of(1_000_000_000L * 9)); msg.setWrappedInt32(Int32Value.of(1_000_000 * 3)); - msg.setWrappedUInt32(UInt32Value.of(1_000_000 * 8)); + msg.setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE)); msg.setWrappedBool(BoolValue.of(true)); msg.setWrappedString(StringValue.of("Good Will Hunting")); msg.setWrappedBytes(BytesValue.of(ByteString.copyFrom("someText", "UTF-8"))); @@ -1364,7 +1364,7 @@ public void testProto3WrappedMessageUnwrappedRoundTrip() throws Exception { assertEquals(1_000_000_000L * 4, gotBackFirst.getWrappedInt64().getValue()); assertEquals(1_000_000_000L * 9, gotBackFirst.getWrappedUInt64().getValue()); assertEquals(1_000_000 * 3, gotBackFirst.getWrappedInt32().getValue()); - assertEquals(1_000_000 * 8, gotBackFirst.getWrappedUInt32().getValue()); + assertEquals(Integer.MIN_VALUE, gotBackFirst.getWrappedUInt32().getValue()); assertEquals(BoolValue.of(true), gotBackFirst.getWrappedBool()); assertEquals("Good Will Hunting", gotBackFirst.getWrappedString().getValue()); assertEquals( @@ -1372,6 +1372,26 @@ public void testProto3WrappedMessageUnwrappedRoundTrip() throws Exception { gotBackFirst.getWrappedBytes().getValue()); } + @Test + public void testProto3WrappedMessageUnwrappedRoundTripUint32() throws Exception { + TestProto3.WrappedMessage msgMin = TestProto3.WrappedMessage.newBuilder().setWrappedUInt32(UInt32Value.of(Integer.MAX_VALUE)).build(); + TestProto3.WrappedMessage msgMax = TestProto3.WrappedMessage.newBuilder().setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE)).build(); + + Path tmpFilePath = TestUtils.someTemporaryFilePath(); + ParquetWriter writer = ProtoParquetWriter.builder(tmpFilePath) + .withMessage(TestProto3.WrappedMessage.class) + .config(ProtoWriteSupport.PB_UNWRAP_PROTO_WRAPPERS, "true") + .build(); + writer.write(msgMin); + writer.write(msgMax); + writer.close(); + List gotBack = TestUtils.readMessages(tmpFilePath, TestProto3.WrappedMessage.class); + + assertEquals(msgMin, gotBack.get(0)); + assertEquals(msgMax, gotBack.get(1)); + + } + @Test public void testProto3WrappedMessageWithNullsRoundTrip() throws Exception { TestProto3.WrappedMessage.Builder msg = TestProto3.WrappedMessage.newBuilder(); From 43367931fe956a595a1e425f61e3e12d21a47240 Mon Sep 17 00:00:00 2001 From: aandres3 Date: Tue, 7 Jan 2025 09:14:38 +0000 Subject: [PATCH 3/5] Apply spotless --- .../org/apache/parquet/proto/ProtoMessageConverter.java | 1 - .../org/apache/parquet/proto/ProtoWriteSupportTest.java | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java index e9e42e24a0..ff9e4ca33a 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java @@ -763,7 +763,6 @@ public ProtoUInt32ValueConverter(ParentValueContainer parent) { this.parent = parent; } - @Override public void addInt(int value) { parent.add(UInt32Value.of(value)); diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java index 309828488a..58347c679f 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java @@ -1374,8 +1374,12 @@ public void testProto3WrappedMessageUnwrappedRoundTrip() throws Exception { @Test public void testProto3WrappedMessageUnwrappedRoundTripUint32() throws Exception { - TestProto3.WrappedMessage msgMin = TestProto3.WrappedMessage.newBuilder().setWrappedUInt32(UInt32Value.of(Integer.MAX_VALUE)).build(); - TestProto3.WrappedMessage msgMax = TestProto3.WrappedMessage.newBuilder().setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE)).build(); + TestProto3.WrappedMessage msgMin = TestProto3.WrappedMessage.newBuilder() + .setWrappedUInt32(UInt32Value.of(Integer.MAX_VALUE)) + .build(); + TestProto3.WrappedMessage msgMax = TestProto3.WrappedMessage.newBuilder() + .setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE)) + .build(); Path tmpFilePath = TestUtils.someTemporaryFilePath(); ParquetWriter writer = ProtoParquetWriter.builder(tmpFilePath) @@ -1389,7 +1393,6 @@ public void testProto3WrappedMessageUnwrappedRoundTripUint32() throws Exception assertEquals(msgMin, gotBack.get(0)); assertEquals(msgMax, gotBack.get(1)); - } @Test From 29e0de258f0b6812167cd0135a3f01929d4f074a Mon Sep 17 00:00:00 2001 From: aandres3 Date: Tue, 7 Jan 2025 10:02:44 +0000 Subject: [PATCH 4/5] Roll back change to original test --- .../java/org/apache/parquet/proto/ProtoWriteSupportTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java index 58347c679f..b4754a24b5 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java @@ -1343,7 +1343,7 @@ public void testProto3WrappedMessageUnwrappedRoundTrip() throws Exception { msg.setWrappedInt64(Int64Value.of(1_000_000_000L * 4)); msg.setWrappedUInt64(UInt64Value.of(1_000_000_000L * 9)); msg.setWrappedInt32(Int32Value.of(1_000_000 * 3)); - msg.setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE)); + msg.setWrappedUInt32(UInt32Value.of(1_000_000 * 8)); msg.setWrappedBool(BoolValue.of(true)); msg.setWrappedString(StringValue.of("Good Will Hunting")); msg.setWrappedBytes(BytesValue.of(ByteString.copyFrom("someText", "UTF-8"))); @@ -1364,7 +1364,7 @@ public void testProto3WrappedMessageUnwrappedRoundTrip() throws Exception { assertEquals(1_000_000_000L * 4, gotBackFirst.getWrappedInt64().getValue()); assertEquals(1_000_000_000L * 9, gotBackFirst.getWrappedUInt64().getValue()); assertEquals(1_000_000 * 3, gotBackFirst.getWrappedInt32().getValue()); - assertEquals(Integer.MIN_VALUE, gotBackFirst.getWrappedUInt32().getValue()); + assertEquals(1_000_000 * 8, gotBackFirst.getWrappedUInt32().getValue()); assertEquals(BoolValue.of(true), gotBackFirst.getWrappedBool()); assertEquals("Good Will Hunting", gotBackFirst.getWrappedString().getValue()); assertEquals( From e74433fb90b1aabafa853157b9078190f9d5720e Mon Sep 17 00:00:00 2001 From: aandres3 Date: Wed, 15 Jan 2025 18:02:02 +0000 Subject: [PATCH 5/5] Better uint32 example --- .../proto/ProtoInputOutputFormatTest.java | 28 ++++++++++++++++ .../parquet/proto/ProtoWriteSupportTest.java | 32 ++++++++----------- 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java index 57ad4d4f08..ca59d3db5e 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java @@ -678,6 +678,34 @@ public void testProto3WrappedMessageClass() throws Exception { assertEquals(msgNonEmpty, result.get(1)); } + @Test + public void testProto3Uint32Behaviour() throws Exception { + + TestProto3.SchemaConverterAllDatatypes intMin = TestProto3.SchemaConverterAllDatatypes.newBuilder() + .setOptionalUInt32(Integer.MIN_VALUE) + .build(); + assertEquals(intMin.toString(), "optionalUInt32: 2147483648\n"); + TestProto3.SchemaConverterAllDatatypes uintMin = TestProto3.SchemaConverterAllDatatypes.newBuilder() + .setOptionalUInt32(-1) + .build(); + assertEquals(uintMin.toString(), "optionalUInt32: 4294967295\n"); + TestProto3.SchemaConverterAllDatatypes uintMax = TestProto3.SchemaConverterAllDatatypes.newBuilder() + .setOptionalUInt32(Integer.MAX_VALUE) + .build(); + assertEquals(uintMax.toString(), "optionalUInt32: 2147483647\n"); + + Configuration conf = new Configuration(); + Path outputPath = new WriteUsingMR(conf).write(intMin, uintMin, uintMax); + ReadUsingMR readUsingMR = new ReadUsingMR(conf); + String customClass = TestProto3.SchemaConverterAllDatatypes.class.getName(); + ProtoReadSupport.setProtobufClass(readUsingMR.getConfiguration(), customClass); + List result = readUsingMR.read(outputPath); + + assertEquals(result.get(0), intMin); + assertEquals(result.get(1), uintMin); + assertEquals(result.get(2), uintMax); + } + /** * Runs job that writes input to file and then job reading data back. */ diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java index b4754a24b5..e80524d14b 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java @@ -22,22 +22,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import com.google.protobuf.BoolValue; -import com.google.protobuf.ByteString; -import com.google.protobuf.BytesValue; -import com.google.protobuf.Descriptors; -import com.google.protobuf.DoubleValue; -import com.google.protobuf.DynamicMessage; -import com.google.protobuf.FloatValue; -import com.google.protobuf.Int32Value; -import com.google.protobuf.Int64Value; -import com.google.protobuf.Message; -import com.google.protobuf.MessageOrBuilder; -import com.google.protobuf.StringValue; -import com.google.protobuf.Timestamp; -import com.google.protobuf.UInt32Value; -import com.google.protobuf.UInt64Value; -import com.google.protobuf.Value; +import com.google.protobuf.*; import com.google.protobuf.util.Timestamps; import java.io.IOException; import java.time.LocalDate; @@ -1374,12 +1359,21 @@ public void testProto3WrappedMessageUnwrappedRoundTrip() throws Exception { @Test public void testProto3WrappedMessageUnwrappedRoundTripUint32() throws Exception { + TestProto3.WrappedMessage msgMin = TestProto3.WrappedMessage.newBuilder() - .setWrappedUInt32(UInt32Value.of(Integer.MAX_VALUE)) + .setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE)) .build(); + assertEquals(TextFormat.shortDebugString(msgMin), "wrappedUInt32 { value: 2147483648 }"); + TestProto3.WrappedMessage msgMax = TestProto3.WrappedMessage.newBuilder() - .setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE)) + .setWrappedUInt32(UInt32Value.of(Integer.MAX_VALUE)) + .build(); + assertEquals(TextFormat.shortDebugString(msgMax), "wrappedUInt32 { value: 2147483647 }"); + + TestProto3.WrappedMessage msgMinusOne = TestProto3.WrappedMessage.newBuilder() + .setWrappedUInt32(UInt32Value.of(-1)) .build(); + assertEquals(TextFormat.shortDebugString(msgMinusOne), "wrappedUInt32 { value: 4294967295 }"); Path tmpFilePath = TestUtils.someTemporaryFilePath(); ParquetWriter writer = ProtoParquetWriter.builder(tmpFilePath) @@ -1388,11 +1382,13 @@ public void testProto3WrappedMessageUnwrappedRoundTripUint32() throws Exception .build(); writer.write(msgMin); writer.write(msgMax); + writer.write(msgMinusOne); writer.close(); List gotBack = TestUtils.readMessages(tmpFilePath, TestProto3.WrappedMessage.class); assertEquals(msgMin, gotBack.get(0)); assertEquals(msgMax, gotBack.get(1)); + assertEquals(msgMinusOne, gotBack.get(2)); } @Test