From 15070d31c45acfec32a9b57459561517333cd1ba Mon Sep 17 00:00:00 2001 From: "chenweiguo.vc" Date: Thu, 17 Oct 2024 14:27:19 +0800 Subject: [PATCH 1/2] GH-44332: [Java] Implement VectorAppender for BaseVariableWidthViewVector --- .../arrow/vector/util/VectorAppender.java | 61 ++++++++++++++++++- .../arrow/vector/util/TestVectorAppender.java | 27 ++++++++ 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index e703571b374..01a95f78a90 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -19,6 +19,8 @@ import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; import java.util.HashSet; +import java.util.List; +import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.util.MemoryUtil; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -247,8 +249,63 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) { } @Override - public ValueVector visit(BaseVariableWidthViewVector left, Void value) { - throw new UnsupportedOperationException("View vectors are not supported."); + public ValueVector visit(BaseVariableWidthViewVector deltaVector, Void value) { + Preconditions.checkArgument( + typeVisitor.equals(deltaVector), + "The targetVector to append must have the same type as the targetVector being appended"); + + if (deltaVector.getValueCount() == 0) { + return targetVector; // nothing to append, return + } + + int oldTargetValueCount = targetVector.getValueCount(); + int newValueCount = oldTargetValueCount + deltaVector.getValueCount(); + + // make sure there is enough capacity + while (targetVector.getValueCapacity() < newValueCount) { + targetVector.reAlloc(); + } + + // append validity buffer + BitVectorHelper.concatBits( + targetVector.getValidityBuffer(), + oldTargetValueCount, + deltaVector.getValidityBuffer(), + deltaVector.getValueCount(), + targetVector.getValidityBuffer()); + + // append data buffers + BaseVariableWidthViewVector targetViewVector = (BaseVariableWidthViewVector) targetVector; + List targetDataBuffers = targetViewVector.getDataBuffers(); + final int oldTargetDataBufferCount = targetDataBuffers.size(); + List deltaVectorDataBuffers = deltaVector.getDataBuffers(); + deltaVectorDataBuffers.forEach(buf -> buf.getReferenceManager().retain()); + targetDataBuffers.addAll(deltaVectorDataBuffers); + + // append view buffer + ArrowBuf targetViewBuffer = targetVector.getDataBuffer(); + int ELEMENT_SIZE = BaseVariableWidthViewVector.ELEMENT_SIZE; + MemoryUtil.copyMemory( + deltaVector.getDataBuffer().memoryAddress(), + targetViewBuffer.memoryAddress() + (long) ELEMENT_SIZE * oldTargetValueCount, + (long) ELEMENT_SIZE * deltaVector.getValueCount()); + + // update view buffer + for (int i = oldTargetValueCount; i < newValueCount; i++) { + if (targetViewVector.isSet(i) > 0 + && targetViewVector.getValueLength(i) > BaseVariableWidthViewVector.INLINE_SIZE) { + long start = + (long) i * ELEMENT_SIZE + + BaseVariableWidthViewVector.LENGTH_WIDTH + + BaseVariableWidthViewVector.PREFIX_WIDTH; + // shift buf id + int bufferId = targetViewBuffer.getInt(start); + targetViewBuffer.setInt(start, bufferId + oldTargetDataBufferCount); + } + } + + targetVector.setValueCount(newValueCount); + return targetVector; } @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index 19eafd1b201..aef3493861c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -34,6 +34,7 @@ import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.compare.Range; import org.apache.arrow.vector.compare.RangeEqualsVisitor; import org.apache.arrow.vector.compare.TypeEqualsVisitor; @@ -171,6 +172,32 @@ public void testAppendVariableWidthVector() { } } + @Test + public void testAppendVariableWidthViewVector() { + final int length1 = 10; + final int length2 = 5; + try (ViewVarCharVector target = new ViewVarCharVector("", allocator); + ViewVarCharVector delta = new ViewVarCharVector("", allocator)) { + + target.setValueCount(length1); + delta.setValueCount(length2); + + for (int i = 0; i < length1; i++) { + target.setSafe(i, (i + "xxxxxxxxxxxx").getBytes()); + } + + for (int i = 0; i < length2; i++) { + delta.setSafe(i, (i + "xxxxxxxxxxxx").getBytes()); + } + + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + assertEquals(15, target.getValueCount()); + target.accept(new RangeEqualsVisitor(target, delta), new Range(10, 0, 5)); + } + } + @Test public void testAppendEmptyVariableWidthVector() { try (VarCharVector target = new VarCharVector("", allocator); From cf491f1d0ee3b9b09ed275a01069c8734f4140a3 Mon Sep 17 00:00:00 2001 From: "chenweiguo.vc" Date: Thu, 17 Oct 2024 14:37:03 +0800 Subject: [PATCH 2/2] format --- .../java/org/apache/arrow/vector/util/VectorAppender.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index 01a95f78a90..37f717bacf0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -284,18 +284,18 @@ public ValueVector visit(BaseVariableWidthViewVector deltaVector, Void value) { // append view buffer ArrowBuf targetViewBuffer = targetVector.getDataBuffer(); - int ELEMENT_SIZE = BaseVariableWidthViewVector.ELEMENT_SIZE; MemoryUtil.copyMemory( deltaVector.getDataBuffer().memoryAddress(), - targetViewBuffer.memoryAddress() + (long) ELEMENT_SIZE * oldTargetValueCount, - (long) ELEMENT_SIZE * deltaVector.getValueCount()); + targetViewBuffer.memoryAddress() + + (long) BaseVariableWidthViewVector.ELEMENT_SIZE * oldTargetValueCount, + (long) BaseVariableWidthViewVector.ELEMENT_SIZE * deltaVector.getValueCount()); // update view buffer for (int i = oldTargetValueCount; i < newValueCount; i++) { if (targetViewVector.isSet(i) > 0 && targetViewVector.getValueLength(i) > BaseVariableWidthViewVector.INLINE_SIZE) { long start = - (long) i * ELEMENT_SIZE + (long) i * BaseVariableWidthViewVector.ELEMENT_SIZE + BaseVariableWidthViewVector.LENGTH_WIDTH + BaseVariableWidthViewVector.PREFIX_WIDTH; // shift buf id