From e798ff6e9d1d40587ee8ec395ef132b2a2776580 Mon Sep 17 00:00:00 2001 From: David Schlosnagle Date: Mon, 11 Aug 2025 13:27:24 -0400 Subject: [PATCH 1/3] Presize JsonStringArrayList vector results --- .../apache/arrow/adapter/avro/AvroToArrowUtils.java | 4 ++-- .../arrow/adapter/avro/AvroToArrowVectorIterator.java | 10 ++++++---- .../apache/arrow/vector/complex/LargeListVector.java | 3 ++- .../arrow/vector/complex/LargeListViewVector.java | 2 +- .../org/apache/arrow/vector/complex/ListVector.java | 2 +- .../apache/arrow/vector/complex/ListViewVector.java | 2 +- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java b/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java index aedef7732e..a6e77e4050 100644 --- a/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java +++ b/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java @@ -1071,8 +1071,8 @@ private static FieldType createFieldType( } private static String convertAliases(Set aliases) { - JsonStringArrayList jsonList = new JsonStringArrayList(); - aliases.stream().forEach(a -> jsonList.add(a)); + JsonStringArrayList jsonList = new JsonStringArrayList(aliases.size()); + jsonList.addAll(aliases); return jsonList.toString(); } } diff --git a/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java b/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java index 4123370061..9b9a7eac2f 100644 --- a/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java +++ b/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java @@ -17,13 +17,14 @@ package org.apache.arrow.adapter.avro; import java.io.EOFException; -import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.stream.Collectors; import org.apache.arrow.adapter.avro.consumers.CompositeAvroConsumer; +import org.apache.arrow.adapter.avro.consumers.Consumer; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.ValueVectorUtility; @@ -75,9 +76,10 @@ public static AvroToArrowVectorIterator create( private void initialize() { // create consumers compositeConsumer = AvroToArrowUtils.createCompositeConsumer(schema, config); - List vectors = new ArrayList<>(); - compositeConsumer.getConsumers().forEach(c -> vectors.add(c.getVector())); - List fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList()); + List vectors = compositeConsumer.getConsumers().stream() + .map(Consumer::getVector) + .collect(Collectors.toList()); + List fields = vectors.stream().map(ValueVector::getField).collect(Collectors.toList()); VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 0); rootSchema = root.getSchema(); diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 835d3468f3..b1b7b5b569 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -31,6 +31,7 @@ import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.memory.util.LargeMemoryUtil; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.AddOrGetResult; @@ -861,10 +862,10 @@ public List getObject(int index) { if (isSet(index) == 0) { return null; } - final List vals = new JsonStringArrayList<>(); final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH); final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); final ValueVector vv = getDataVector(); + final List vals = new JsonStringArrayList<>(LargeMemoryUtil.checkedCastToInt(end - start)); for (long i = start; i < end; i++) { vals.add(vv.getObject(checkedCastToInt(i))); } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 394c3c67bb..2da7eb057e 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -672,10 +672,10 @@ public List getObject(int index) { if (isSet(index) == 0) { return null; } - final List vals = new JsonStringArrayList<>(); final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH); final ValueVector vv = getDataVector(); + final List vals = new JsonStringArrayList<>(end - start); for (int i = start; i < end; i++) { vals.add(vv.getObject(checkedCastToInt(i))); } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 2b2817515f..93a313ef4f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -719,10 +719,10 @@ public List getObject(int index) { if (isSet(index) == 0) { return null; } - final List vals = new JsonStringArrayList<>(); final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); final ValueVector vv = getDataVector(); + final List vals = new JsonStringArrayList<>(end - start); for (int i = start; i < end; i++) { vals.add(vv.getObject(i)); } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 2b80101926..8711db5e0f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -678,10 +678,10 @@ public List getObject(int index) { if (isSet(index) == 0) { return null; } - final List vals = new JsonStringArrayList<>(); final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH); final ValueVector vv = getDataVector(); + final List vals = new JsonStringArrayList<>(end - start); for (int i = start; i < end; i++) { vals.add(vv.getObject(i)); } From c0ae8216cf95656a463b895d5a5c827e53169773 Mon Sep 17 00:00:00 2001 From: David Schlosnagle Date: Tue, 12 Aug 2025 00:05:01 -0400 Subject: [PATCH 2/3] Format LargeListVector.java --- .../java/org/apache/arrow/vector/complex/LargeListVector.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index b1b7b5b569..997b5a8b78 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -865,7 +865,8 @@ public List getObject(int index) { final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH); final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); final ValueVector vv = getDataVector(); - final List vals = new JsonStringArrayList<>(LargeMemoryUtil.checkedCastToInt(end - start)); + final List vals = + new JsonStringArrayList<>(LargeMemoryUtil.checkedCastToInt(end - start)); for (long i = start; i < end; i++) { vals.add(vv.getObject(checkedCastToInt(i))); } From 1955e119188ecfea664c3c10ee66c60c8cdbd0ed Mon Sep 17 00:00:00 2001 From: David Schlosnagle Date: Tue, 12 Aug 2025 12:13:05 -0400 Subject: [PATCH 3/3] mvn spotless:apply --- .../apache/arrow/adapter/avro/AvroToArrowVectorIterator.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java b/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java index 9b9a7eac2f..e82fdc36fb 100644 --- a/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java +++ b/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java @@ -76,7 +76,8 @@ public static AvroToArrowVectorIterator create( private void initialize() { // create consumers compositeConsumer = AvroToArrowUtils.createCompositeConsumer(schema, config); - List vectors = compositeConsumer.getConsumers().stream() + List vectors = + compositeConsumer.getConsumers().stream() .map(Consumer::getVector) .collect(Collectors.toList()); List fields = vectors.stream().map(ValueVector::getField).collect(Collectors.toList());