diff --git a/parquet-benchmarks/pom.xml b/parquet-benchmarks/pom.xml
index 77df2c101d..7246e1a754 100644
--- a/parquet-benchmarks/pom.xml
+++ b/parquet-benchmarks/pom.xml
@@ -34,6 +34,8 @@
1.37
parquet-benchmarks
+ 3.25.5
+ 1.14.18
@@ -52,6 +54,11 @@
parquet-common
${project.version}
+
+ org.apache.parquet
+ parquet-protobuf
+ ${project.version}
+
org.apache.hadoop
hadoop-client
@@ -82,10 +89,32 @@
slf4j-api
${slf4j.version}
+
+ net.bytebuddy
+ byte-buddy
+ ${byte-buddy.version}
+
+
+ com.github.os72
+ protoc-jar-maven-plugin
+ 3.11.4
+
+
+ generate-sources
+ generate-sources
+
+ run
+
+
+ com.google.protobuf:protoc:${protobuf.version}
+
+
+
+
org.apache.maven.plugins
maven-compiler-plugin
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ProtoDataGenerator.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ProtoDataGenerator.java
new file mode 100644
index 0000000000..0a7c402e5b
--- /dev/null
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ProtoDataGenerator.java
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.benchmarks;
+
+import static java.util.UUID.randomUUID;
+import static org.apache.parquet.benchmarks.BenchmarkConstants.DICT_PAGE_SIZE;
+import static org.apache.parquet.benchmarks.BenchmarkUtils.exists;
+
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Message;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.IntFunction;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.benchmarks.Messages.Test1;
+import org.apache.parquet.benchmarks.Messages.Test100Int32;
+import org.apache.parquet.benchmarks.Messages.Test30Int32;
+import org.apache.parquet.benchmarks.Messages.Test30String;
+import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.proto.ProtoParquetWriter;
+import org.apache.parquet.proto.ProtoWriteSupport;
+
+public class ProtoDataGenerator extends DataGenerator {
+
+ private final Class protoClass;
+ private final ProtoWriteSupport.CodegenMode codegenMode;
+ private final RecordGeneratorFactory recordGeneratorFactory;
+
+ public ProtoDataGenerator(Class protoClass, ProtoWriteSupport.CodegenMode codegenMode) {
+ this.protoClass = protoClass;
+ this.codegenMode = codegenMode;
+ this.recordGeneratorFactory = (RecordGeneratorFactory) GENERATORS.get(protoClass);
+ }
+
+ private interface RecordGeneratorFactory {
+ RecordGenerator newRecordGenerator(int fixedLenByteArraySize);
+ }
+
+ private interface RecordGenerator extends IntFunction {}
+
+ private static final RecordGeneratorFactory TEST1 = fixedLenByteArraySize -> {
+ final Test1.Builder builder = Test1.newBuilder();
+ String fixedLenStr = generateFixedLenStr(fixedLenByteArraySize);
+
+ return i -> builder.setBinaryField(ByteString.copyFromUtf8(randomUUID().toString()))
+ .setInt32Field(i)
+ .setInt64Field(64L)
+ .setBooleanField(true)
+ .setFloatField(1.0f)
+ .setDoubleField(2.0d)
+ .setStringField(fixedLenStr);
+ };
+
+ private static final RecordGeneratorFactory TEST_30_INT32 = fixedLenByteArraySize -> {
+ final Test30Int32.Builder builder = Test30Int32.newBuilder();
+
+ return i -> builder.setField1(i)
+ .setField2(i)
+ .setField3(i)
+ .setField4(i)
+ .setField5(i)
+ .setField6(i)
+ .setField7(i)
+ .setField8(i)
+ .setField9(i)
+ .setField10(i)
+ .setField11(i)
+ .setField12(i)
+ .setField13(i)
+ .setField14(i)
+ .setField15(i)
+ .setField16(i)
+ .setField17(i)
+ .setField18(i)
+ .setField19(i)
+ .setField20(i)
+ .setField21(i)
+ .setField22(i)
+ .setField23(i)
+ .setField24(i)
+ .setField25(i)
+ .setField26(i)
+ .setField27(i)
+ .setField28(i)
+ .setField29(i)
+ .setField30(i);
+ };
+
+ private static final RecordGeneratorFactory TEST_100_INT32 = fixedLenByteArraySize -> {
+ final Test100Int32.Builder builder = Test100Int32.newBuilder();
+
+ return i -> builder.setF1(i)
+ .setF2(i)
+ .setF3(i)
+ .setF4(i)
+ .setF5(i)
+ .setF6(i)
+ .setF7(i)
+ .setF8(i)
+ .setF9(i)
+ .setF10(i)
+ .setF11(i)
+ .setF12(i)
+ .setF13(i)
+ .setF14(i)
+ .setF15(i)
+ .setF16(i)
+ .setF17(i)
+ .setF18(i)
+ .setF19(i)
+ .setF20(i)
+ .setF21(i)
+ .setF22(i)
+ .setF23(i)
+ .setF24(i)
+ .setF25(i)
+ .setF26(i)
+ .setF27(i)
+ .setF28(i)
+ .setF29(i)
+ .setF30(i)
+ .setF31(i)
+ .setF32(i)
+ .setF33(i)
+ .setF34(i)
+ .setF35(i)
+ .setF36(i)
+ .setF37(i)
+ .setF38(i)
+ .setF39(i)
+ .setF40(i)
+ .setF41(i)
+ .setF42(i)
+ .setF43(i)
+ .setF44(i)
+ .setF45(i)
+ .setF46(i)
+ .setF47(i)
+ .setF48(i)
+ .setF49(i)
+ .setF50(i)
+ .setF51(i)
+ .setF52(i)
+ .setF53(i)
+ .setF54(i)
+ .setF55(i)
+ .setF56(i)
+ .setF57(i)
+ .setF58(i)
+ .setF59(i)
+ .setF60(i)
+ .setF61(i)
+ .setF62(i)
+ .setF63(i)
+ .setF64(i)
+ .setF65(i)
+ .setF66(i)
+ .setF67(i)
+ .setF68(i)
+ .setF69(i)
+ .setF70(i)
+ .setF71(i)
+ .setF72(i)
+ .setF73(i)
+ .setF74(i)
+ .setF75(i)
+ .setF76(i)
+ .setF77(i)
+ .setF78(i)
+ .setF79(i)
+ .setF80(i)
+ .setF81(i)
+ .setF82(i)
+ .setF83(i)
+ .setF84(i)
+ .setF85(i)
+ .setF86(i)
+ .setF87(i)
+ .setF88(i)
+ .setF89(i)
+ .setF90(i)
+ .setF91(i)
+ .setF92(i)
+ .setF93(i)
+ .setF94(i)
+ .setF95(i)
+ .setF96(i)
+ .setF97(i)
+ .setF98(i)
+ .setF99(i)
+ .setF100(i);
+ };
+
+ private static final RecordGeneratorFactory TEST_30_STRING = fixedLenByteArraySize -> {
+ final Test30String.Builder builder = Test30String.newBuilder();
+
+ return i -> builder.setField1("setField1:" + i)
+ .setField2("setField2:" + i)
+ .setField3("setField3:" + i)
+ .setField4("setField4:" + i)
+ .setField5("setField5:" + i)
+ .setField6("setField6:" + i)
+ .setField7("setField7:" + i)
+ .setField8("setField8:" + i)
+ .setField9("setField9:" + i)
+ .setField10("setField10:" + i)
+ .setField11("setField11:" + i)
+ .setField12("setField12:" + i)
+ .setField13("setField13:" + i)
+ .setField14("setField14:" + i)
+ .setField15("setField15:" + i)
+ .setField16("setField16:" + i)
+ .setField17("setField17:" + i)
+ .setField18("setField18:" + i)
+ .setField19("setField19:" + i)
+ .setField20("setField20:" + i)
+ .setField21("setField21:" + i)
+ .setField22("setField22:" + i)
+ .setField23("setField23:" + i)
+ .setField24("setField24:" + i)
+ .setField25("setField25:" + i)
+ .setField26("setField26:" + i)
+ .setField27("setField27:" + i)
+ .setField28("setField28:" + i)
+ .setField29("setField29:" + i)
+ .setField30("setField30:" + i);
+ };
+
+ private static String generateFixedLenStr(int fixedLenByteArraySize) {
+ // generate some data for the fixed len byte array field
+ char[] chars = new char[fixedLenByteArraySize];
+ Arrays.fill(chars, '*');
+ return String.copyValueOf(chars);
+ }
+
+ private static final Map, RecordGeneratorFactory>> GENERATORS = new HashMap() {
+ {
+ put(Test1.class, TEST1);
+ put(Test30Int32.class, TEST_30_INT32);
+ put(Test30String.class, TEST_30_STRING);
+ put(Test100Int32.class, TEST_100_INT32);
+ }
+ };
+
+ public void generateData(
+ Path outFile,
+ Configuration configuration,
+ ParquetProperties.WriterVersion version,
+ int blockSize,
+ int pageSize,
+ int fixedLenByteArraySize,
+ CompressionCodecName codec,
+ int nRows)
+ throws IOException {
+
+ outFile = outFile.suffix(protoClass.getName());
+
+ if (exists(configuration, outFile)) {
+ System.out.println("File already exists " + outFile);
+ return;
+ }
+
+ System.out.println("Generating data @ " + outFile + " with codegenMode " + codegenMode);
+
+ ProtoWriteSupport.setCodegenMode(configuration, codegenMode);
+ ProtoWriteSupport.setSchema(configuration, protoClass);
+
+ ParquetWriter writer = ProtoParquetWriter.builder(outFile)
+ .withMessage(protoClass)
+ .withConf(configuration)
+ .withCompressionCodec(codec)
+ .withRowGroupSize((long) blockSize)
+ .withPageSize(pageSize)
+ .enableDictionaryEncoding()
+ .withDictionaryPageSize(DICT_PAGE_SIZE)
+ .withValidation(false)
+ .withWriterVersion(version)
+ .build();
+
+ RecordGenerator recordGenerator = recordGeneratorFactory.newRecordGenerator(fixedLenByteArraySize);
+
+ for (int i = 0; i < nRows; i++) {
+ writer.write(recordGenerator.apply(i));
+ }
+ writer.close();
+ }
+}
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ProtoReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ProtoReadBenchmarks.java
new file mode 100644
index 0000000000..7143bfe130
--- /dev/null
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ProtoReadBenchmarks.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.benchmarks;
+
+import static org.apache.parquet.benchmarks.BenchmarkFiles.configuration;
+import static org.openjdk.jmh.annotations.Scope.Thread;
+
+import com.google.protobuf.Message;
+import java.io.IOException;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.proto.ProtoParquetReader;
+import org.apache.parquet.proto.ProtoReadSupport;
+import org.apache.parquet.proto.ProtoWriteSupport;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.infra.Blackhole;
+
+@State(Thread)
+public class ProtoReadBenchmarks extends ReadBenchmarks {
+
+ @Param({"OFF", "REQUIRED"})
+ public ProtoReadSupport.CodegenMode codegenMode;
+
+ @Param({"Test30Int32", "Test100Int32", "Test30String", "Test1"})
+ public String protoClass;
+
+ private Class messageClass;
+ private ProtoDataGenerator protoDataGenerator;
+
+ @Setup(Level.Trial)
+ public void generateFilesForRead() {
+ try {
+ messageClass = (Class) Class.forName("org.apache.parquet.benchmarks.Messages$" + protoClass);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ protoDataGenerator = new ProtoDataGenerator<>(messageClass, ProtoWriteSupport.CodegenMode.OFF);
+ protoDataGenerator.generateAll();
+ }
+
+ protected void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException {
+ ProtoReadSupport.setCodegenMode(configuration, codegenMode);
+ ParquetReader reader = ProtoParquetReader.builder(parquetFile.suffix(messageClass.getName()))
+ .withConf(configuration)
+ .build();
+ for (int i = 0; i < nRows; i++) {
+ Message.Builder builder = (Message.Builder) reader.read();
+ Message message = builder.build();
+ blackhole.consume(message);
+ }
+ reader.close();
+ }
+}
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ProtoWriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ProtoWriteBenchmarks.java
new file mode 100644
index 0000000000..0e29c11cff
--- /dev/null
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ProtoWriteBenchmarks.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.benchmarks;
+
+import static org.openjdk.jmh.annotations.Scope.Thread;
+
+import com.google.protobuf.Message;
+import org.apache.parquet.proto.ProtoWriteSupport;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+
+@State(Thread)
+public class ProtoWriteBenchmarks extends WriteBenchmarks {
+ @Param({"OFF", "REQUIRED_ALL"})
+ public ProtoWriteSupport.CodegenMode codegenMode;
+
+ @Param({"Test30Int32", "Test100Int32", "Test30String", "Test1"})
+ public String protoClass;
+
+ @Setup(Level.Iteration)
+ public void setup() {
+ Class messageClass;
+ try {
+ messageClass = (Class) Class.forName("org.apache.parquet.benchmarks.Messages$" + protoClass);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ dataGenerator = new ProtoDataGenerator<>(messageClass, codegenMode);
+ // clean existing test data at the beginning of each iteration
+ dataGenerator.cleanup();
+ }
+}
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java
index 2d6e3a52e3..484d12f2cf 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java
@@ -45,7 +45,7 @@
@State(Scope.Benchmark)
public class ReadBenchmarks {
- private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException {
+ protected void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException {
ParquetReader reader = ParquetReader.builder(new GroupReadSupport(), parquetFile)
.withConf(configuration)
.build();
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
index 41f961de44..ff53ff45d2 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
@@ -50,10 +50,11 @@
@State(Thread)
public class WriteBenchmarks {
- private DataGenerator dataGenerator = new DataGenerator();
+ protected DataGenerator dataGenerator;
@Setup(Level.Iteration)
public void setup() {
+ dataGenerator = new DataGenerator();
// clean existing test data at the beginning of each iteration
dataGenerator.cleanup();
}
diff --git a/parquet-benchmarks/src/main/protobuf/messages.proto b/parquet-benchmarks/src/main/protobuf/messages.proto
new file mode 100644
index 0000000000..fb9ad19bd4
--- /dev/null
+++ b/parquet-benchmarks/src/main/protobuf/messages.proto
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+syntax = "proto3";
+
+package Benchmarks;
+
+option java_package = "org.apache.parquet.benchmarks";
+
+// more or less mimics the data structure defined in the original DataGenerator
+message Test1 {
+ bytes binary_field = 1;
+ int32 int32_field = 2;
+ int64 int64_field = 3;
+ bool boolean_field = 4;
+ float float_field = 5;
+ double double_field = 6;
+ string string_field = 7;
+}
+
+message Test30Int32 {
+ int32 field1 = 1;
+ int32 field2 = 2;
+ int32 field3 = 3;
+ int32 field4 = 4;
+ int32 field5 = 5;
+ int32 field6 = 6;
+ int32 field7 = 7;
+ int32 field8 = 8;
+ int32 field9 = 9;
+ int32 field10 = 10;
+
+ int32 field11 = 11;
+ int32 field12 = 12;
+ int32 field13 = 13;
+ int32 field14 = 14;
+ int32 field15 = 15;
+ int32 field16 = 16;
+ int32 field17 = 17;
+ int32 field18 = 18;
+ int32 field19 = 19;
+ int32 field20 = 20;
+
+ int32 field21 = 21;
+ int32 field22 = 22;
+ int32 field23 = 23;
+ int32 field24 = 24;
+ int32 field25 = 25;
+ int32 field26 = 26;
+ int32 field27 = 27;
+ int32 field28 = 28;
+ int32 field29 = 29;
+ int32 field30 = 30;
+}
+
+message Test30String {
+ string field1 = 1;
+ string field2 = 2;
+ string field3 = 3;
+ string field4 = 4;
+ string field5 = 5;
+ string field6 = 6;
+ string field7 = 7;
+ string field8 = 8;
+ string field9 = 9;
+ string field10 = 10;
+
+ string field11 = 11;
+ string field12 = 12;
+ string field13 = 13;
+ string field14 = 14;
+ string field15 = 15;
+ string field16 = 16;
+ string field17 = 17;
+ string field18 = 18;
+ string field19 = 19;
+ string field20 = 20;
+
+ string field21 = 21;
+ string field22 = 22;
+ string field23 = 23;
+ string field24 = 24;
+ string field25 = 25;
+ string field26 = 26;
+ string field27 = 27;
+ string field28 = 28;
+ string field29 = 29;
+ string field30 = 30;
+}
+
+message Test100Int32 {
+ int32 f1 = 1;
+ int32 f2 = 2;
+ int32 f3 = 3;
+ int32 f4 = 4;
+ int32 f5 = 5;
+ int32 f6 = 6;
+ int32 f7 = 7;
+ int32 f8 = 8;
+ int32 f9 = 9;
+ int32 f10 = 10;
+
+ int32 f11 = 11;
+ int32 f12 = 12;
+ int32 f13 = 13;
+ int32 f14 = 14;
+ int32 f15 = 15;
+ int32 f16 = 16;
+ int32 f17 = 17;
+ int32 f18 = 18;
+ int32 f19 = 19;
+ int32 f20 = 20;
+
+ int32 f21 = 21;
+ int32 f22 = 22;
+ int32 f23 = 23;
+ int32 f24 = 24;
+ int32 f25 = 25;
+ int32 f26 = 26;
+ int32 f27 = 27;
+ int32 f28 = 28;
+ int32 f29 = 29;
+ int32 f30 = 30;
+
+ int32 f31 = 31;
+ int32 f32 = 32;
+ int32 f33 = 33;
+ int32 f34 = 34;
+ int32 f35 = 35;
+ int32 f36 = 36;
+ int32 f37 = 37;
+ int32 f38 = 38;
+ int32 f39 = 39;
+ int32 f40 = 40;
+
+ int32 f41 = 41;
+ int32 f42 = 42;
+ int32 f43 = 43;
+ int32 f44 = 44;
+ int32 f45 = 45;
+ int32 f46 = 46;
+ int32 f47 = 47;
+ int32 f48 = 48;
+ int32 f49 = 49;
+ int32 f50 = 50;
+
+ int32 f51 = 51;
+ int32 f52 = 52;
+ int32 f53 = 53;
+ int32 f54 = 54;
+ int32 f55 = 55;
+ int32 f56 = 56;
+ int32 f57 = 57;
+ int32 f58 = 58;
+ int32 f59 = 59;
+ int32 f60 = 60;
+
+ int32 f61 = 61;
+ int32 f62 = 62;
+ int32 f63 = 63;
+ int32 f64 = 64;
+ int32 f65 = 65;
+ int32 f66 = 66;
+ int32 f67 = 67;
+ int32 f68 = 68;
+ int32 f69 = 69;
+ int32 f70 = 70;
+
+ int32 f71 = 71;
+ int32 f72 = 72;
+ int32 f73 = 73;
+ int32 f74 = 74;
+ int32 f75 = 75;
+ int32 f76 = 76;
+ int32 f77 = 77;
+ int32 f78 = 78;
+ int32 f79 = 79;
+ int32 f80 = 80;
+
+ int32 f81 = 81;
+ int32 f82 = 82;
+ int32 f83 = 83;
+ int32 f84 = 84;
+ int32 f85 = 85;
+ int32 f86 = 86;
+ int32 f87 = 87;
+ int32 f88 = 88;
+ int32 f89 = 89;
+ int32 f90 = 90;
+
+ int32 f91 = 91;
+ int32 f92 = 92;
+ int32 f93 = 93;
+ int32 f94 = 94;
+ int32 f95 = 95;
+ int32 f96 = 96;
+ int32 f97 = 97;
+ int32 f98 = 98;
+ int32 f99 = 99;
+ int32 f100 = 100;
+}
diff --git a/parquet-protobuf/pom.xml b/parquet-protobuf/pom.xml
index f704295eff..509925cebe 100644
--- a/parquet-protobuf/pom.xml
+++ b/parquet-protobuf/pom.xml
@@ -34,6 +34,8 @@
3.25.5
2.50.0
1.4.3
+ 1.14.18
+
@@ -41,6 +43,13 @@
https://parquet.apache.org
+
+ net.bytebuddy
+ byte-buddy
+ ${byte-buddy.version}
+ compile
+ true
+
org.mockito
mockito-core
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ByteBuddyCodeGen.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ByteBuddyCodeGen.java
new file mode 100644
index 0000000000..165bd2e7cf
--- /dev/null
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ByteBuddyCodeGen.java
@@ -0,0 +1,3965 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.proto;
+
+import static org.apache.parquet.proto.ByteBuddyCodeGen.CodeGenUtils.Reflection;
+
+import com.google.common.collect.MapMaker;
+import com.google.protobuf.BoolValue;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import com.google.protobuf.Descriptors;
+import com.google.protobuf.DoubleValue;
+import com.google.protobuf.FloatValue;
+import com.google.protobuf.Int32Value;
+import com.google.protobuf.Int64Value;
+import com.google.protobuf.MapEntry;
+import com.google.protobuf.Message;
+import com.google.protobuf.MessageOrBuilder;
+import com.google.protobuf.StringValue;
+import com.google.protobuf.Timestamp;
+import com.google.protobuf.UInt32Value;
+import com.google.protobuf.UInt64Value;
+import com.google.protobuf.util.Timestamps;
+import com.google.type.Date;
+import com.google.type.TimeOfDay;
+import java.lang.invoke.LambdaMetafactory;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.Type;
+import java.time.LocalDate;
+import java.time.LocalTime;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Queue;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.BiConsumer;
+import java.util.function.BiFunction;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import net.bytebuddy.ByteBuddy;
+import net.bytebuddy.description.field.FieldDescription;
+import net.bytebuddy.description.method.MethodDescription;
+import net.bytebuddy.description.method.MethodList;
+import net.bytebuddy.description.modifier.Visibility;
+import net.bytebuddy.description.type.TypeDescription;
+import net.bytebuddy.dynamic.DynamicType;
+import net.bytebuddy.dynamic.loading.ClassLoadingStrategy;
+import net.bytebuddy.dynamic.scaffold.InstrumentedType;
+import net.bytebuddy.implementation.Implementation;
+import net.bytebuddy.implementation.MethodCall;
+import net.bytebuddy.implementation.SuperMethodCall;
+import net.bytebuddy.implementation.bytecode.ByteCodeAppender;
+import net.bytebuddy.implementation.bytecode.Removal;
+import net.bytebuddy.implementation.bytecode.StackManipulation;
+import net.bytebuddy.implementation.bytecode.StackSize;
+import net.bytebuddy.implementation.bytecode.assign.TypeCasting;
+import net.bytebuddy.implementation.bytecode.collection.ArrayAccess;
+import net.bytebuddy.implementation.bytecode.collection.ArrayFactory;
+import net.bytebuddy.implementation.bytecode.constant.DoubleConstant;
+import net.bytebuddy.implementation.bytecode.constant.FloatConstant;
+import net.bytebuddy.implementation.bytecode.constant.IntegerConstant;
+import net.bytebuddy.implementation.bytecode.constant.JavaConstantValue;
+import net.bytebuddy.implementation.bytecode.constant.LongConstant;
+import net.bytebuddy.implementation.bytecode.constant.TextConstant;
+import net.bytebuddy.implementation.bytecode.member.FieldAccess;
+import net.bytebuddy.implementation.bytecode.member.MethodInvocation;
+import net.bytebuddy.implementation.bytecode.member.MethodReturn;
+import net.bytebuddy.implementation.bytecode.member.MethodVariableAccess;
+import net.bytebuddy.jar.asm.Handle;
+import net.bytebuddy.jar.asm.Label;
+import net.bytebuddy.jar.asm.MethodVisitor;
+import net.bytebuddy.jar.asm.Opcodes;
+import net.bytebuddy.matcher.ElementMatcher;
+import net.bytebuddy.matcher.ElementMatchers;
+import net.bytebuddy.utility.JavaConstant;
+import org.apache.parquet.conf.ParquetConfiguration;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.Converter;
+import org.apache.parquet.io.api.GroupConverter;
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.io.api.RecordMaterializer;
+import org.apache.parquet.proto.ByteBuddyCodeGen.CodeGenUtils.Codegen;
+import org.apache.parquet.proto.ByteBuddyCodeGen.CodeGenUtils.Implementations;
+import org.apache.parquet.proto.ByteBuddyCodeGen.CodeGenUtils.LocalVar;
+import org.apache.parquet.proto.ProtoMessageConverter.AddRepeatedFieldParentValueContainer;
+import org.apache.parquet.proto.ProtoMessageConverter.ListConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.MapConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ParentValueContainer;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoBinaryConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoBoolValueConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoBooleanConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoBytesValueConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoDateConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoDoubleConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoDoubleValueConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoEnumConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoFloatConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoFloatValueConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoInt32ValueConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoInt64ValueConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoIntConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoLongConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoStringConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoStringValueConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoTimeConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoTimestampConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoUInt32ValueConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.ProtoUInt64ValueConverter;
+import org.apache.parquet.proto.ProtoMessageConverter.SetFieldParentValueContainer;
+import org.apache.parquet.schema.MessageType;
+
+public class ByteBuddyCodeGen {
+ private static final AtomicLong BYTE_BUDDY_CLASS_SEQUENCE = new AtomicLong();
+
+ private static final GenerateMessageClasses GeneratedMessageV3 =
+ GenerateMessageClasses.resolve("com.google.protobuf.GeneratedMessageV3");
+ private static final GenerateMessageClasses GeneratedMessage =
+ GenerateMessageClasses.resolve("com.google.protobuf.GeneratedMessage");
+
+ static class CodeGenException extends RuntimeException {
+ public CodeGenException() {
+ super();
+ }
+
+ public CodeGenException(String message) {
+ super(message);
+ }
+
+ public CodeGenException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public CodeGenException(Throwable cause) {
+ super(cause);
+ }
+ }
+
+ static boolean isGeneratedMessage(Class> protoMessage) {
+ return protoMessage != null
+ && (GeneratedMessage.isGeneratedMessage(protoMessage)
+ || GeneratedMessageV3.isGeneratedMessage(protoMessage));
+ }
+
+ static boolean isExtendableMessage(Class> protoMessage) {
+ return protoMessage != null
+ && (GeneratedMessage.isExtendableMessage(protoMessage)
+ || GeneratedMessageV3.isExtendableMessage(protoMessage));
+ }
+
+ static class GenerateMessageClasses {
+ private final Class> classGeneratedMessage;
+ private final Class> classExtendableMessage;
+
+ private GenerateMessageClasses(Class> classGeneratedMessage, Class> classExtendableMessage) {
+ this.classGeneratedMessage = classGeneratedMessage;
+ this.classExtendableMessage = classExtendableMessage;
+ }
+
+ static GenerateMessageClasses resolve(String generatedMessageClassName) {
+ Optional> generatedMessage = ReflectionUtil.classForName(generatedMessageClassName);
+ Optional> extendableMessage =
+ ReflectionUtil.classForName(generatedMessageClassName + "$ExtendableMessage");
+ if (generatedMessage.isPresent() && extendableMessage.isPresent()) {
+ return new GenerateMessageClasses(generatedMessage.get(), extendableMessage.get());
+ } else {
+ return new GenerateMessageClasses(null, null);
+ }
+ }
+
+ public boolean isGeneratedMessage(Class> clazz) {
+ return classGeneratedMessage != null && clazz != null && classGeneratedMessage.isAssignableFrom(clazz);
+ }
+
+ public boolean isExtendableMessage(Class> clazz) {
+ return classExtendableMessage != null && clazz != null && classExtendableMessage.isAssignableFrom(clazz);
+ }
+ }
+
+ static boolean isByteBuddyAvailable(boolean failIfNot) {
+ try {
+ Class.forName("net.bytebuddy.ByteBuddy", false, ByteBuddyCodeGen.class.getClassLoader());
+ return true;
+ } catch (ClassNotFoundException e) {
+ if (failIfNot) {
+ throw new UnsupportedOperationException("ByteBuddy is not available", e);
+ }
+ return false;
+ }
+ }
+
+ static class CodeGenUtils {
+ // resolve reflection methods early, so tests would fail fast should anything is changed in interfaces/classes
+ static final ResolvedReflection Reflection = new ResolvedReflection();
+
+ static class ResolvedReflection {
+ final Method MethodHandles_lookup = ReflectionUtil.getDeclaredMethod(MethodHandles.class, "lookup");
+
+ final RecordConsumerMethods RecordConsumer = new RecordConsumerMethods();
+ final ByteBuddyMessageWritersMethods ByteBuddyProto3FastMessageWriter =
+ new ByteBuddyMessageWritersMethods();
+ final FieldWriterMethods FieldWriter = new FieldWriterMethods();
+
+ static class RecordConsumerMethods {
+ final Method startField =
+ ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "startField", String.class, int.class);
+ final Method endField =
+ ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "endField", String.class, int.class);
+ final Method startGroup = ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "startGroup");
+ final Method endGroup = ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "endGroup");
+ final Method addInteger =
+ ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "addInteger", int.class);
+ final Method addLong = ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "addLong", long.class);
+ final Method addBoolean =
+ ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "addBoolean", boolean.class);
+ final Method addBinary =
+ ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "addBinary", Binary.class);
+ final Method addFloat = ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "addFloat", float.class);
+ final Method addDouble =
+ ReflectionUtil.getDeclaredMethod(RecordConsumer.class, "addDouble", double.class);
+
+ final Map, Method> PRIMITIVES = initPrimitives();
+
+ private Map, Method> initPrimitives() {
+ Map, Method> m = new HashMap<>();
+ m.put(int.class, addInteger);
+ m.put(long.class, addLong);
+ m.put(boolean.class, addBoolean);
+ m.put(float.class, addFloat);
+ m.put(double.class, addDouble);
+ return Collections.unmodifiableMap(m);
+ }
+
+ private RecordConsumerMethods() {}
+ }
+
+ static class ByteBuddyMessageWritersMethods {
+ final Method getRecordConsumer = ReflectionUtil.getDeclaredMethod(
+ WriteSupport.ByteBuddyMessageWriters.class, "getRecordConsumer");
+ final Method enumNameNumberPairs = ReflectionUtil.getDeclaredMethod(
+ WriteSupport.ByteBuddyMessageWriters.class, "enumNameNumberPairs", String.class);
+
+ private ByteBuddyMessageWritersMethods() {}
+ }
+
+ static class FieldWriterMethods {
+ final Method writeRawValue = ReflectionUtil.getDeclaredMethod(
+ ProtoWriteSupport.FieldWriter.class, "writeRawValue", Object.class);
+ }
+
+ private ResolvedReflection() {}
+ }
+
+ static class Codegen {
+ public static StackManipulation incIntVar(LocalVar var, int inc) {
+ int offset = var.offset();
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ methodVisitor.visitIincInsn(offset, inc);
+ return Size.ZERO;
+ }
+ };
+ }
+
+ private static StackManipulation jumpTo(int jumpInst, Label label) {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ methodVisitor.visitJumpInsn(jumpInst, label);
+ return Size.ZERO;
+ }
+ };
+ }
+
+ private static StackManipulation visitLabel(Label label) {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ methodVisitor.visitLabel(label);
+ return Size.ZERO;
+ }
+ };
+ }
+
+ private static Implementation returnVoid() {
+ return new Implementations(MethodReturn.VOID);
+ }
+
+ public static StackManipulation castLongToInt() {
+ return castPrimitive(Opcodes.L2I);
+ }
+
+ public static StackManipulation castIntToLong() {
+ return castPrimitive(Opcodes.I2L);
+ }
+
+ public static StackManipulation castPrimitive(int opcode) {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ methodVisitor.visitInsn(opcode);
+ return Size.ZERO;
+ }
+ };
+ }
+
+ public static StackManipulation invokeMethod(Method method) {
+ return MethodInvocation.invoke(new MethodDescription.ForLoadedMethod(method));
+ }
+
+ public static StackManipulation invokeProtoMethod(
+ Class> proto3MessageOrBuilderInterface,
+ String name,
+ Descriptors.FieldDescriptor fieldDescriptor,
+ Class>... parameters) {
+ return invokeMethod(ReflectionUtil.getDeclaredMethod(
+ proto3MessageOrBuilderInterface, fieldDescriptor, name, parameters));
+ }
+
+ public static StackManipulation storeRecordConsumer(LocalVar recordConsumerVar) {
+ return new StackManipulation.Compound(
+ MethodVariableAccess.loadThis(),
+ invokeMethod(Reflection.ByteBuddyProto3FastMessageWriter.getRecordConsumer),
+ recordConsumerVar.store());
+ }
+ }
+
+ static class Implementations implements Implementation {
+ private final List implementations = new ArrayList<>();
+ private final List ongoing = new ArrayList<>();
+
+ private Implementation compound;
+
+ public Implementations() {}
+
+ public Implementations(StackManipulation... stackManipulations) {
+ add(stackManipulations);
+ }
+
+ @Override
+ public ByteCodeAppender appender(Target implementationTarget) {
+ return compound.appender(implementationTarget);
+ }
+
+ @Override
+ public InstrumentedType prepare(InstrumentedType instrumentedType) {
+ if (compound != null) {
+ throw new IllegalStateException();
+ }
+ flushOngoing();
+ compound = new Compound(implementations);
+ return compound.prepare(instrumentedType);
+ }
+
+ public Implementations add(Implementation... implementations) {
+ flushOngoing();
+ this.implementations.addAll(Arrays.asList(implementations));
+ return this;
+ }
+
+ public Implementations add(ByteCodeAppender... appenders) {
+ return add(new Simple(appenders));
+ }
+
+ public Implementations add(StackManipulation... stackManipulations) {
+ ongoing.addAll(Arrays.asList(stackManipulations));
+ return this;
+ }
+
+ private void flushOngoing() {
+ if (!ongoing.isEmpty()) {
+ implementations.add(new Simple(ongoing.toArray(new StackManipulation[0])));
+ ongoing.clear();
+ }
+ }
+ }
+
+ static class LocalVar implements AutoCloseable {
+ private final LocalVars vars;
+ private final TypeDescription typeDescription;
+ private final Class> clazz;
+ private final int stackSize;
+
+ private int refCount;
+
+ private int offset;
+
+ public LocalVar(Class> clazz, TypeDescription typeDescription, LocalVars vars) {
+ this.clazz = clazz;
+ this.typeDescription = typeDescription;
+ this.vars = vars;
+ this.stackSize = StackSize.of(typeDescription);
+ }
+
+ public LocalVars vars() {
+ return vars;
+ }
+
+ public int offset() {
+ assertRegistered();
+ return offset;
+ }
+
+ public TypeDescription typeDescription() {
+ return typeDescription;
+ }
+
+ public StackManipulation load() {
+ return MethodVariableAccess.of(typeDescription()).loadFrom(offset());
+ }
+
+ public StackManipulation store() {
+ return MethodVariableAccess.of(typeDescription()).storeAt(offset());
+ }
+
+ public Class> clazz() {
+ if (clazz == null) {
+ throw new IllegalStateException();
+ }
+ return clazz;
+ }
+
+ private int stackSize() {
+ return stackSize;
+ }
+
+ public LocalVar register() {
+ vars.register(this);
+ return this;
+ }
+
+ public LocalVar alias() {
+ assertRegistered();
+ refCount += 1;
+ return this;
+ }
+
+ public LocalVar unregister() {
+ int index = assertRegistered();
+ refCount -= 1;
+ if (refCount == 0) {
+ if (index != vars.vars.size() - 1) {
+ throw new IllegalStateException("cannot deregister var " + this + " from " + vars.vars);
+ }
+ vars.vars.remove(this);
+ }
+ return this;
+ }
+
+ private int assertRegistered() {
+ int index = getIndex();
+ if (index < 0) {
+ throw new IllegalStateException("not registered");
+ }
+ return index;
+ }
+
+ private int getIndex() {
+ return vars.vars.indexOf(this);
+ }
+
+ @Override
+ public void close() {
+ unregister();
+ }
+
+ @Override
+ public String toString() {
+ return "LocalVar{" + "vars="
+ + vars + ", typeDescription="
+ + typeDescription + ", stackSize="
+ + stackSize + ", offset="
+ + offset + '}';
+ }
+ }
+
+ static class LocalVars {
+ private final List frame = new ArrayList<>();
+ private final List vars = new ArrayList<>();
+ private int maxSize;
+
+ public LocalVar register(LocalVar var) {
+ if (vars.contains(var)) {
+ throw new IllegalStateException("cannot register var twice: " + var + ", " + vars);
+ }
+ int offset =
+ vars.isEmpty() ? 0 : vars.get(vars.size() - 1).offset + vars.get(vars.size() - 1).stackSize;
+ vars.add(var);
+ var.offset = offset;
+ var.refCount = 1;
+
+ maxSize = Math.max(maxSize, getSize());
+ return var;
+ }
+
+ public StackManipulation frameSame1(Class> varOnStack) {
+ List currTypes = types();
+ try {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ implementationContext
+ .getFrameGeneration()
+ .same1(methodVisitor, TypeDescription.ForLoadedType.of(varOnStack), currTypes);
+ return Size.ZERO;
+ }
+ };
+ } finally {
+ this.frame.clear();
+ this.frame.addAll(currTypes);
+ }
+ }
+
+ public StackManipulation frameEmptyStack() {
+ List currTypes = types();
+ List frame = new ArrayList<>(this.frame);
+ try {
+ if (currTypes.size() < frame.size()) {
+ int commonLength = commonTypesLength(currTypes, frame);
+ if (commonLength < currTypes.size() || frame.size() - currTypes.size() > 3) {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(
+ MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ implementationContext
+ .getFrameGeneration()
+ .full(methodVisitor, Collections.emptyList(), currTypes);
+ return Size.ZERO;
+ }
+ };
+ } else {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(
+ MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ implementationContext
+ .getFrameGeneration()
+ .chop(methodVisitor, frame.size() - currTypes.size(), currTypes);
+ return Size.ZERO;
+ }
+ };
+ }
+ } else if (currTypes.size() == frame.size()) {
+ int commonLength = commonTypesLength(currTypes, frame);
+ if (commonLength != currTypes.size()) {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(
+ MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ implementationContext
+ .getFrameGeneration()
+ .full(methodVisitor, Collections.emptyList(), currTypes);
+ return Size.ZERO;
+ }
+ };
+ } else {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(
+ MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ implementationContext.getFrameGeneration().same(methodVisitor, currTypes);
+ return Size.ZERO;
+ }
+ };
+ }
+ } else {
+ int commonLength = commonTypesLength(currTypes, frame);
+ if (commonLength < frame.size() || currTypes.size() - frame.size() > 3) {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(
+ MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ implementationContext
+ .getFrameGeneration()
+ .full(methodVisitor, Collections.emptyList(), currTypes);
+ return Size.ZERO;
+ }
+ };
+ } else {
+ return new StackManipulation.AbstractBase() {
+ @Override
+ public Size apply(
+ MethodVisitor methodVisitor, Implementation.Context implementationContext) {
+ implementationContext
+ .getFrameGeneration()
+ .append(
+ methodVisitor,
+ currTypes.subList(frame.size(), currTypes.size()),
+ frame);
+ return Size.ZERO;
+ }
+ };
+ }
+ }
+ } finally {
+ this.frame.clear();
+ this.frame.addAll(currTypes);
+ }
+ }
+
+ private int commonTypesLength(List a, List b) {
+ int len = Math.min(a.size(), b.size());
+ for (int i = 0; i < len; i++) {
+ if (!Objects.equals(a.get(i), b.get(i))) {
+ return i;
+ }
+ }
+ return len;
+ }
+
+ public LocalVar register(TypeDescription typeDescription) {
+ LocalVar var = new LocalVar(null, typeDescription, this);
+ return register(var);
+ }
+
+ public LocalVar register(Class> clazz) {
+ LocalVar var = new LocalVar(clazz, TypeDescription.ForLoadedType.of(clazz), this);
+ return register(var);
+ }
+
+ public Implementation asImplementation() {
+ return new Implementation.Simple(new ByteCodeAppender() {
+ @Override
+ public Size apply(
+ MethodVisitor methodVisitor,
+ Implementation.Context implementationContext,
+ MethodDescription instrumentedMethod) {
+ return new Size(0, maxSize);
+ }
+ });
+ }
+
+ private int getSize() {
+ int size = 0;
+ for (LocalVar var : vars) {
+ size += var.stackSize();
+ }
+ return size;
+ }
+
+ private List types() {
+ List types = new ArrayList<>();
+ for (LocalVar var : vars) {
+ types.add(var.typeDescription);
+ }
+ return types;
+ }
+ }
+ }
+
+ static class ReflectionUtil {
+
+ static Optional extends Class extends MessageOrBuilder>> getMessageOrBuilderInterface(
+ Class extends Message> messageClass) {
+ return Stream.of(messageClass)
+ .filter(Objects::nonNull)
+ .filter(ByteBuddyCodeGen::isGeneratedMessage)
+ .flatMap(x -> Arrays.stream(x.getInterfaces()))
+ .filter(MessageOrBuilder.class::isAssignableFrom)
+ .map(x -> (Class extends MessageOrBuilder>) x)
+ .findFirst();
+ }
+
+ static Method getDeclaredMethod(Class> clazz, String name, Class>... parameterTypes) {
+ try {
+ return clazz.getDeclaredMethod(name, parameterTypes);
+ } catch (NoSuchMethodException e) {
+ throw new CodeGenException(e);
+ }
+ }
+
+ static Method getDeclaredMethod(
+ Class> protoClazz, Descriptors.FieldDescriptor fieldDescriptor, String name, Class>... parameters) {
+ return getDeclaredMethod(
+ protoClazz, name.replace("{}", getFieldNameForMethod(fieldDescriptor)), parameters);
+ }
+
+ static Method getDeclaredMethodByName(Class> clazz, String name) {
+ for (Method method : clazz.getDeclaredMethods()) {
+ if (name.equals(method.getName())) {
+ return method;
+ }
+ }
+ throw new CodeGenException("no such method on class " + clazz + ": " + name);
+ }
+
+ static Method getDeclaredMethodByName(
+ Class> clazz, Descriptors.FieldDescriptor fieldDescriptor, String name) {
+ return getDeclaredMethodByName(clazz, name.replace("{}", getFieldNameForMethod(fieldDescriptor)));
+ }
+
+ // almost the same as com.google.protobuf.Descriptors.FieldDescriptor#fieldNameToJsonName
+ // but capitalizing the first letter after each last digit
+ static String getFieldNameForMethod(Descriptors.FieldDescriptor fieldDescriptor) {
+ String name = fieldDescriptor.getType() == Descriptors.FieldDescriptor.Type.GROUP
+ ? fieldDescriptor.getMessageType().getName()
+ : fieldDescriptor.getName();
+ final int length = name.length();
+ StringBuilder result = new StringBuilder(length);
+ boolean isNextUpperCase = false;
+ for (int i = 0; i < length; i++) {
+ char ch = name.charAt(i);
+ if (ch == '_') {
+ isNextUpperCase = true;
+ } else if ('0' <= ch && ch <= '9') {
+ isNextUpperCase = true;
+ result.append(ch);
+ } else if (isNextUpperCase || i == 0) {
+ // This closely matches the logic for ASCII characters in:
+ // http://google3/google/protobuf/descriptor.cc?l=249-251&rcl=228891689
+ if ('a' <= ch && ch <= 'z') {
+ ch = (char) (ch - 'a' + 'A');
+ }
+ result.append(ch);
+ isNextUpperCase = false;
+ } else {
+ result.append(ch);
+ }
+ }
+ return result.toString();
+ }
+
+ static Constructor getConstructor(Class clazz, Class>... parameterTypes) {
+ try {
+ return clazz.getConstructor(parameterTypes);
+ } catch (NoSuchMethodException e) {
+ throw new CodeGenException(e);
+ }
+ }
+
+ static T newInstance(Constructor constructor, Object... initParams) {
+ try {
+ return constructor.newInstance(initParams);
+ } catch (InstantiationException | IllegalAccessException e) {
+ throw new CodeGenException(e);
+ } catch (InvocationTargetException e) {
+ if (e.getCause() instanceof CodeGenException) {
+ throw (CodeGenException) e.getCause();
+ }
+ throw new CodeGenException(e.getCause());
+ }
+ }
+
+ static Optional> classForName(String className) {
+ try {
+ return Optional.of(Class.forName(className, false, ByteBuddyCodeGen.class.getClassLoader()));
+ } catch (ClassNotFoundException e) {
+ return Optional.empty();
+ }
+ }
+ }
+
+ public static class WriteSupport {
+ // in order to avoid class generation for the same proto descriptors, cache implementations.
+ private static final Map.MessageWriter>>
+ WRITERS_CACHE = new MapMaker().weakValues().makeMap();
+
+ private static final Consumer.MessageWriter> NOOP_WRITER_PATCHER = messageWriter -> {};
+ private static final Consumer.MessageWriter> REVERT_WRITER_PATCHER = messageWriter -> {
+ Queue.FieldWriter> queue = new ArrayDeque<>();
+ queue.add(messageWriter);
+
+ while (!queue.isEmpty()) {
+ ProtoWriteSupport>.FieldWriter fw = queue.poll();
+ if (fw instanceof ProtoWriteSupport>.MessageWriter) {
+ ((ProtoWriteSupport>.MessageWriter) fw)
+ .setAlternativeMessageWriter(ProtoWriteSupport.MessageFieldsWriter.NOOP);
+ queue.addAll(Arrays.asList(((ProtoWriteSupport>.MessageWriter) fw).fieldWriters));
+ } else if (fw instanceof ProtoWriteSupport>.ArrayWriter) {
+ queue.add(((ProtoWriteSupport>.ArrayWriter) fw).fieldWriter);
+ } else if (fw instanceof ProtoWriteSupport>.RepeatedWriter) {
+ queue.add(((ProtoWriteSupport>.RepeatedWriter) fw).fieldWriter);
+ } else if (fw instanceof ProtoWriteSupport>.MapWriter) {
+ queue.add(((ProtoWriteSupport>.MapWriter) fw).keyWriter);
+ queue.add(((ProtoWriteSupport>.MapWriter) fw).valueWriter);
+ }
+ }
+ };
+
+ static class MessageFieldsWritersCacheKey {
+ private final MessageType rootSchema;
+ private final Class extends Message> protoMessage;
+ private final boolean writeSpecsCompliant;
+ private final boolean protoReflectionForExtendable;
+
+ MessageFieldsWritersCacheKey(
+ MessageType rootSchema,
+ Class extends Message> protoMessage,
+ boolean writeSpecsCompliant,
+ boolean protoReflectionForExtendable) {
+ this.rootSchema = rootSchema;
+ this.protoMessage = protoMessage;
+ this.writeSpecsCompliant = writeSpecsCompliant;
+ this.protoReflectionForExtendable = protoReflectionForExtendable;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null || getClass() != o.getClass()) return false;
+ MessageFieldsWritersCacheKey that = (MessageFieldsWritersCacheKey) o;
+ return writeSpecsCompliant == that.writeSpecsCompliant
+ && protoReflectionForExtendable == that.protoReflectionForExtendable
+ && Objects.equals(rootSchema, that.rootSchema)
+ && Objects.equals(protoMessage, that.protoMessage);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(rootSchema, protoMessage, writeSpecsCompliant, protoReflectionForExtendable);
+ }
+ }
+
+ static void tryApplyAlternativeMessageFieldsWriters(
+ ProtoWriteSupport.MessageWriter rootMessageWriter,
+ MessageType rootSchema,
+ Class extends Message> protoMessage,
+ Descriptors.Descriptor descriptor,
+ ProtoWriteSupport.CodegenMode codegenMode) {
+
+ if (!codegenMode.tryCodeGen(protoMessage)) {
+ return;
+ }
+
+ MessageFieldsWritersCacheKey cacheKey = new MessageFieldsWritersCacheKey(
+ rootSchema,
+ protoMessage,
+ rootMessageWriter.getProtoWriteSupport().isWriteSpecsCompliant(),
+ codegenMode.protobufReflectionForExtensions());
+
+ try {
+ Consumer.MessageWriter> messageFieldsWriterPatcher = WRITERS_CACHE.computeIfAbsent(
+ cacheKey,
+ unused -> createMessageFieldsWriterPatcher(
+ rootMessageWriter, protoMessage, descriptor, codegenMode));
+ messageFieldsWriterPatcher.accept(rootMessageWriter);
+ } catch (Throwable t) {
+ if (!codegenMode.ignoreCodeGenException()) {
+ throw t;
+ }
+ REVERT_WRITER_PATCHER.accept(rootMessageWriter);
+ }
+ }
+
+ private static Consumer.MessageWriter> createMessageFieldsWriterPatcher(
+ ProtoWriteSupport>.MessageWriter rootMessageWriter,
+ Class extends Message> protoMessage,
+ Descriptors.Descriptor descriptor,
+ ProtoWriteSupport.CodegenMode codegenMode) {
+ return new ByteBuddyMessageWritersCodeGen(rootMessageWriter, protoMessage, descriptor, codegenMode)
+ .getPatcher();
+ }
+
+ static class Field {
+ private final FieldScanner fieldScanner;
+ private final Field parent;
+ private final ProtoWriteSupport>.FieldWriter fieldWriter;
+
+ private final Descriptors.FieldDescriptor fieldDescriptor; // can be null for root MessageWriter
+ private final Descriptors.Descriptor messageType; // filled for Message fields (incl. Map)
+
+ private final String parquetFieldName;
+ private final int parquetFieldIndex;
+
+ private Type reflectionType;
+ private Object codeGenerationBasicType;
+ private Object codeGenerationKey;
+
+ private List children;
+ private Field mapKey;
+ private Field mapValue;
+
+ private Field(
+ FieldScanner fieldScanner,
+ Field parent,
+ ProtoWriteSupport>.FieldWriter fieldWriter,
+ Descriptors.FieldDescriptor fieldDescriptor,
+ String parquetFieldName,
+ int parquetFieldIndex) {
+ this.fieldScanner = fieldScanner;
+ this.parent = parent;
+ this.fieldWriter = fieldWriter;
+ this.fieldDescriptor = fieldDescriptor;
+ this.messageType = fieldDescriptor.getJavaType() == Descriptors.FieldDescriptor.JavaType.MESSAGE
+ ? fieldDescriptor.getMessageType()
+ : null;
+ this.parquetFieldName = parquetFieldName;
+ this.parquetFieldIndex = parquetFieldIndex;
+ }
+
+ private Field(
+ FieldScanner fieldScanner,
+ ProtoWriteSupport>.MessageWriter messageWriter,
+ Class extends Message> protoMessage,
+ Descriptors.Descriptor messageType) {
+ this.fieldScanner = fieldScanner;
+ this.parent = null;
+ this.fieldWriter = messageWriter;
+ this.fieldDescriptor = null;
+ this.messageType = messageType;
+ this.reflectionType = protoMessage;
+ this.parquetFieldName = null;
+ this.parquetFieldIndex = -1;
+ }
+
+ public String getParquetFieldName() {
+ return parquetFieldName;
+ }
+
+ public int getParquetFieldIndex() {
+ return parquetFieldIndex;
+ }
+
+ public Field getParent() {
+ return parent;
+ }
+
+ @Override
+ public String toString() {
+ List path = new ArrayList<>();
+ Field p = this;
+ while (p != null) {
+ path.add(p.getParquetFieldName());
+ p = p.getParent();
+ }
+ Collections.reverse(path);
+ return String.valueOf(path);
+ }
+
+ public Descriptors.Descriptor getMessageType() {
+ return messageType;
+ }
+
+ // helps codegen to deal with particular java getter for a proto field
+ public Type getReflectionType() {
+ if (reflectionType == null) {
+ reflectionType = initReflectionType();
+ }
+ return reflectionType;
+ }
+
+ public Class extends MessageOrBuilder> getMessageOrBuilderInterface() {
+ if (!isProtoMessage()) {
+ throw new CodeGenException();
+ }
+ return ReflectionUtil.getMessageOrBuilderInterface((Class extends Message>) getReflectionType())
+ .get();
+ }
+
+ public boolean isList() {
+ return !isMap() && fieldDescriptor != null && fieldDescriptor.isRepeated();
+ }
+
+ private Type initReflectionType() {
+ // parent is always not null here
+ if (isMap()) {
+ return initMapReflectionType();
+ } else if (parent.isMap()) {
+ MapReflectionType mapReflectionType = (MapReflectionType) parent.getReflectionType();
+ return fieldDescriptor.getIndex() == 0 ? mapReflectionType.key() : mapReflectionType.value();
+ } else {
+ return initRegularFieldReflectionType();
+ }
+ }
+
+ private Type initRegularFieldReflectionType() {
+ Class> clazz;
+ Class> parentProtoMessage = (Class>) parent.getReflectionType();
+ if (fieldDescriptor.isRepeated()) {
+ clazz = ReflectionUtil.getDeclaredMethod(parentProtoMessage, fieldDescriptor, "get{}", int.class)
+ .getReturnType();
+ } else {
+ clazz = ReflectionUtil.getDeclaredMethod(parentProtoMessage, fieldDescriptor, "get{}")
+ .getReturnType();
+ }
+ if (fieldDescriptor.getJavaType() == Descriptors.FieldDescriptor.JavaType.ENUM) {
+ return new EnumReflectionType(clazz, fieldDescriptor);
+ }
+ return clazz;
+ }
+
+ private Type initMapReflectionType() {
+ Class> parentProtoMessage = (Class>) parent.getReflectionType();
+ Method method =
+ ReflectionUtil.getDeclaredMethodByName(parentProtoMessage, fieldDescriptor, "get{}OrThrow");
+ Descriptors.FieldDescriptor valueFieldDescriptor =
+ fieldDescriptor.getMessageType().getFields().get(1);
+ Type valueType;
+ if (valueFieldDescriptor.getJavaType() == Descriptors.FieldDescriptor.JavaType.ENUM) {
+ valueType = new EnumReflectionType(method.getReturnType(), valueFieldDescriptor);
+ } else {
+ valueType = method.getReturnType();
+ }
+ return new MapReflectionType(method.getParameterTypes()[0], valueType);
+ }
+
+ // helps codegen to identify unique methods and supporting fields to write messages, map entries and enums
+ public Object getCodeGenerationElementKey() {
+ if (codeGenerationKey == null) {
+ codeGenerationKey = initCodeGenerationKey();
+ }
+ return codeGenerationKey;
+ }
+
+ private Object initCodeGenerationKey() {
+ if (isMessage() || (isMap() && mapValue().isMessage())) {
+ List