diff --git a/pom.xml b/pom.xml index cb189dc8f9..ab988c7d05 100644 --- a/pom.xml +++ b/pom.xml @@ -327,8 +327,8 @@ under the License. true UTC - 1048576 + which in turn can cause OOM. Using 2MB - 1byte to simulate the defaul limit of 2^31 - 1 bytes. --> + 2097151 false diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index 7b8d2cdfda..1609e64ca5 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -571,10 +571,13 @@ public void reallocDataBuffer(long desiredAllocSize) { return; } - final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); + final long newAllocationSize = + Math.min(CommonUtil.nextPowerOfTwo(desiredAllocSize), MAX_BUFFER_SIZE); assert newAllocationSize >= 1; - checkDataBufferSize(newAllocationSize); + if (newAllocationSize < desiredAllocSize) { + checkDataBufferSize(desiredAllocSize); + } final ArrowBuf newBuf = allocator.buffer(newAllocationSize); newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity()); diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index e0e16762f2..beda91dc3f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -550,15 +550,18 @@ public void reallocViewBuffer(long desiredAllocSize) { if (desiredAllocSize == 0) { return; } - long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); + long newAllocationSize = Math.min(CommonUtil.nextPowerOfTwo(desiredAllocSize), MAX_BUFFER_SIZE); assert newAllocationSize >= 1; - checkDataBufferSize(newAllocationSize); // for each set operation, we have to allocate 16 bytes // here we are adjusting the desired allocation-based allocation size // to align with the 16bytes requirement. newAllocationSize = roundUpToMultipleOf16(newAllocationSize); + if (newAllocationSize < desiredAllocSize) { + checkDataBufferSize(desiredAllocSize); + } + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); newBuf.setBytes(0, viewBuffer, 0, viewBuffer.capacity()); @@ -587,10 +590,13 @@ public void reallocViewDataBuffer(long desiredAllocSize) { return; } - final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); + final long newAllocationSize = + Math.min(CommonUtil.nextPowerOfTwo(desiredAllocSize), MAX_BUFFER_SIZE); assert newAllocationSize >= 1; - checkDataBufferSize(newAllocationSize); + if (newAllocationSize < desiredAllocSize) { + checkDataBufferSize(desiredAllocSize); + } final ArrowBuf newBuf = allocator.buffer(newAllocationSize); dataBuffers.add(newBuf); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 83e470ae25..daec331831 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -95,7 +95,7 @@ public void init() { private static final byte[] STR5 = "EEE5".getBytes(utf8Charset); private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset); private static final int MAX_VALUE_COUNT = - (int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7); + (int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 9); private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2); @AfterEach diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java index f5ec42c71c..bc47150376 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java @@ -24,6 +24,7 @@ import java.nio.charset.StandardCharsets; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; @@ -222,6 +223,17 @@ public void testVariableAllocateAfterReAlloc() throws Exception { } } + @Test + public void testVariableReAllocAbove1GB() throws Exception { + try (final VarCharVector vector = new VarCharVector("", allocator)) { + long desiredSizeAboveLastPowerOf2 = + CommonUtil.nextPowerOfTwo(BaseVariableWidthVector.MAX_ALLOCATION_SIZE) / 2 + 1; + vector.reallocDataBuffer(desiredSizeAboveLastPowerOf2); + + assertTrue(vector.getDataBuffer().capacity() >= desiredSizeAboveLastPowerOf2); + } + } + @Test public void testLargeVariableAllocateAfterReAlloc() throws Exception { try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) { diff --git a/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index e1b3889d85..4ee9630a4d 100644 --- a/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -28,6 +28,7 @@ import java.util.stream.Stream; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BigIntVector; @@ -309,7 +310,15 @@ public void testAppendEmptyVariableWidthVector() { @Test public void testAppendLargeAndSmallVariableVectorsWithinLimit() { - int sixteenthOfMaxAllocation = Math.toIntExact(BaseValueVector.MAX_ALLOCATION_SIZE / 16); + // Using the max power of 2 allocation size to avoid hitting the max limit at round ups + long maxPowerOfTwoAllocationSize = + CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE); + if (maxPowerOfTwoAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { + maxPowerOfTwoAllocationSize = + CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE / 2); + } + + int sixteenthOfMaxAllocation = Math.toIntExact(maxPowerOfTwoAllocationSize / 16); try (VarCharVector target = makeVarCharVec(1, sixteenthOfMaxAllocation); VarCharVector delta = makeVarCharVec(sixteenthOfMaxAllocation, 1)) { new VectorAppender(delta).visit(target, null);