diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java index d126266cf5..f6e2a3b225 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java @@ -49,9 +49,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector protected final Field field; private int allocationMonitor; - protected ArrowBuf validityBuffer; protected ArrowBuf valueBuffer; - protected int valueCount; /** * Constructs a new instance. @@ -87,7 +85,7 @@ public String getName() { /* TODO: * Once the entire hierarchy has been refactored, move common functions - * like getNullCount(), splitAndTransferValidityBuffer to top level + * like getNullCount() to top level * base class BaseValueVector. * * Along with this, some class members (validityBuffer) can also be @@ -342,9 +340,9 @@ private void allocateBytes(int valueCount) { * slice the source buffer so we have to explicitly allocate the validityBuffer of the target * vector. This is unlike the databuffer which we can always slice for the target vector. */ - private void allocateValidityBuffer(final int validityBufferSize) { - validityBuffer = allocator.buffer(validityBufferSize); - validityBuffer.readerIndex(0); + @Override + protected void allocateValidityBuffer(final long validityBufferSize) { + super.allocateValidityBuffer(validityBufferSize); refreshValueCapacity(); } @@ -656,72 +654,18 @@ private void splitAndTransferValueBuffer( target.refreshValueCapacity(); } - /** - * Validity buffer has multiple cases of split and transfer depending on the starting position of - * the source index. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, BaseFixedWidthVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - /* slice */ - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); - target.refreshValueCapacity(); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } + @Override + protected void sliceAndTransferValidityBuffer( + int startIndex, int length, BaseValueVector target) { + final int firstByteSource = BitVectorHelper.byteIndex(startIndex); + final int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); + + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); } + ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); + ((BaseFixedWidthVector) target).refreshValueCapacity(); } /*----------------------------------------------------------------* diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java index 4245e0053b..6c451f10a7 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java @@ -52,10 +52,8 @@ public abstract class BaseLargeVariableWidthVector extends BaseValueVector /* protected members */ public static final int OFFSET_WIDTH = 8; /* 8 byte unsigned int to track offsets */ protected static final byte[] emptyByteArray = new byte[] {}; - protected ArrowBuf validityBuffer; protected ArrowBuf valueBuffer; protected ArrowBuf offsetBuffer; - protected int valueCount; protected int lastSet; protected final Field field; @@ -501,10 +499,9 @@ private ArrowBuf allocateOffsetBuffer(final long size) { } /* allocate validity buffer */ - private void allocateValidityBuffer(final long size) { - validityBuffer = allocator.buffer(size); - validityBuffer.readerIndex(0); - initValidityBuffer(); + @Override + protected void allocateValidityBuffer(final long size) { + super.allocateValidityBuffer(size); } /** @@ -809,69 +806,17 @@ private void splitAndTransferOffsetBuffer( target.valueBuffer = transferBuffer(slicedBuffer, target.allocator); } - /* - * Transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, BaseLargeVariableWidthVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; + @Override + protected void sliceAndTransferValidityBuffer( + int startIndex, int length, BaseValueVector target) { + final int firstByteSource = BitVectorHelper.byteIndex(startIndex); + final int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); } + target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer.getReferenceManager().retain(); } /*----------------------------------------------------------------* diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java index 7bff431e40..37dfa20616 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java @@ -48,6 +48,10 @@ public abstract class BaseValueVector implements ValueVector { protected volatile FieldReader fieldReader; + protected ArrowBuf validityBuffer; + + protected int valueCount; + protected BaseValueVector(BufferAllocator allocator) { this.allocator = Preconditions.checkNotNull(allocator, "allocator cannot be null"); } @@ -255,4 +259,116 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } + + /** + * Transfer the validity buffer from `validityBuffer` to the target vector's `validityBuffer`. + * Start at `startIndex` and copy `length` number of elements. If the starting index is 8 byte + * aligned, then the buffer is sliced from that index and ownership is transferred. If not, + * individual bytes are copied. + * + * @param startIndex starting index + * @param length number of elements to be copied + * @param target target vector + */ + protected void splitAndTransferValidityBuffer( + int startIndex, int length, BaseValueVector target) { + int offset = startIndex % 8; + + if (length <= 0) { + return; + } + if (offset == 0) { + sliceAndTransferValidityBuffer(startIndex, length, target); + } else { + copyValidityBuffer(startIndex, length, target); + } + } + + /** + * If the start index is 8 byte aligned, slice `validityBuffer` and transfer ownership to + * `target`'s `validityBuffer`. + * + * @param startIndex starting index + * @param length number of elements to be copied + * @param target target vector + */ + protected void sliceAndTransferValidityBuffer( + int startIndex, int length, BaseValueVector target) { + final int firstByteSource = BitVectorHelper.byteIndex(startIndex); + final int byteSizeTarget = getValidityBufferSizeFromCount(length); + + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); + } + target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer.getReferenceManager().retain(1); + } + + /** + * Allocate new validity buffer for `target` and copy bytes from `validityBuffer`. Precise details + * in the comments below. + * + * @param startIndex starting index + * @param length number of elements to be copied + * @param target target vector + */ + protected void copyValidityBuffer(int startIndex, int length, BaseValueVector target) { + final int firstByteSource = BitVectorHelper.byteIndex(startIndex); + final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); + final int byteSizeTarget = getValidityBufferSizeFromCount(length); + final int offset = startIndex % 8; + + /* Copy data + * When the first bit starts from the middle of a byte (offset != 0), + * copy data from src BitVector. + * Each byte in the target is composed by a part in i-th byte, + * another part in (i+1)-th byte. + */ + target.allocateValidityBuffer(byteSizeTarget); + + for (int i = 0; i < byteSizeTarget - 1; i++) { + byte b1 = + BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); + byte b2 = + BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); + + target.validityBuffer.setByte(i, (b1 + b2)); + } + + /* Copying the last piece is done in the following manner: + * if the source vector has 1 or more bytes remaining, we copy + * the last piece as a byte formed by shifting data + * from the current byte and the next byte. + * + * if the source vector has no more bytes remaining + * (we are at the last byte), we copy the last piece as a byte + * by shifting data from the current byte. + */ + if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { + byte b1 = + BitVectorHelper.getBitsFromCurrentByte( + this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); + byte b2 = + BitVectorHelper.getBitsFromNextByte( + this.validityBuffer, firstByteSource + byteSizeTarget, offset); + + target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); + } else { + byte b1 = + BitVectorHelper.getBitsFromCurrentByte( + this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); + target.validityBuffer.setByte(byteSizeTarget - 1, b1); + } + } + + /** + * Allocate new validity buffer for when the bytes need to be copied over. + * + * @param byteSizeTarget desired size of the buffer + */ + protected void allocateValidityBuffer(long byteSizeTarget) { + validityBuffer = allocator.buffer(byteSizeTarget); + validityBuffer.readerIndex(0); + validityBuffer.setZero(0, validityBuffer.capacity()); + } } diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index 4f681311ed..96e2afbd29 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -50,10 +50,8 @@ public abstract class BaseVariableWidthVector extends BaseValueVector /* protected members */ public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track offsets */ protected static final byte[] emptyByteArray = new byte[] {}; - protected ArrowBuf validityBuffer; protected ArrowBuf valueBuffer; protected ArrowBuf offsetBuffer; - protected int valueCount; protected int lastSet; protected final Field field; @@ -87,7 +85,7 @@ public String getName() { /* TODO: * Once the entire hierarchy has been refactored, move common functions - * like getNullCount(), splitAndTransferValidityBuffer to top level + * like getNullCount() to top level * base class BaseValueVector. * * Along with this, some class members (validityBuffer) can also be @@ -519,11 +517,9 @@ private ArrowBuf allocateOffsetBuffer(final long size) { } /* allocate validity buffer */ - private void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - initValidityBuffer(); + @Override + protected void allocateValidityBuffer(final long size) { + super.allocateValidityBuffer(size); } /** @@ -856,70 +852,17 @@ private void splitAndTransferOffsetBuffer( target.valueBuffer = transferBuffer(slicedBuffer, target.allocator); } - /* - * Transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, BaseVariableWidthVector target) { - if (length <= 0) { - return; - } - + @Override + protected void sliceAndTransferValidityBuffer( + int startIndex, int length, BaseValueVector target) { final int firstByteSource = BitVectorHelper.byteIndex(startIndex); - final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); final int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - final int offset = startIndex % 8; - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); - return; - } - - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); } + final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); } /*----------------------------------------------------------------* diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index 5e25ffa568..ea9de8320e 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -78,13 +78,11 @@ public abstract class BaseVariableWidthViewVector extends BaseValueVector // The third 4 bytes of view are allocated for buffer index public static final int BUF_INDEX_WIDTH = 4; public static final byte[] EMPTY_BYTE_ARRAY = new byte[] {}; - protected ArrowBuf validityBuffer; // The view buffer is used to store the variable width view elements protected ArrowBuf viewBuffer; // The external buffer which stores the long strings protected List dataBuffers; protected int initialDataBufferSize; - protected int valueCount; protected int lastSet; protected final Field field; @@ -117,7 +115,7 @@ public String getName() { /* TODO: * Once the entire hierarchy has been refactored, move common functions - * like getNullCount(), splitAndTransferValidityBuffer to top level + * like getNullCount() to top level * base class BaseValueVector. * * Along with this, some class members (validityBuffer) can also be @@ -129,12 +127,6 @@ public String getName() { * the top class as of now is not a good idea. */ - /* TODO: - * Implement TransferPair functionality - * https://github.com/apache/arrow/issues/40932 - * - */ - /** * Get buffer that manages the validity (NULL or NON-NULL nature) of elements in the vector. * Consider it as a buffer for internal bit vector data structure. @@ -854,77 +846,22 @@ public void splitAndTransferTo(int startIndex, int length, BaseVariableWidthView } /* allocate validity buffer */ - private void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - initValidityBuffer(); + @Override + protected void allocateValidityBuffer(final long size) { + super.allocateValidityBuffer(size); } - /* - * Transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, BaseVariableWidthViewVector target) { - if (length <= 0) { - return; - } - + @Override + protected void sliceAndTransferValidityBuffer( + int startIndex, int length, BaseValueVector target) { final int firstByteSource = BitVectorHelper.byteIndex(startIndex); - final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); final int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - final int offset = startIndex % 8; - - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); - return; - } - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); } + final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java index 12edd6557b..fac3f86bba 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java @@ -52,7 +52,6 @@ public abstract class BaseLargeRepeatedValueViewVector extends BaseValueVector protected ArrowBuf sizeBuffer; protected FieldVector vector; protected final CallBack repeatedCallBack; - protected int valueCount; protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH; private final String name; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index fbe83bad52..ee1d65d3e3 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -54,7 +54,6 @@ public abstract class BaseRepeatedValueVector extends BaseValueVector protected ArrowBuf offsetBuffer; protected FieldVector vector; protected final CallBack repeatedCallBack; - protected int valueCount; protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; private final String name; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java index e6213316b5..fd7a4ff2c6 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -52,7 +52,6 @@ public abstract class BaseRepeatedValueViewVector extends BaseValueVector protected ArrowBuf sizeBuffer; protected FieldVector vector; protected final CallBack repeatedCallBack; - protected int valueCount; protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH; private final String name; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 36d9ff40ed..e3b4ab477f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -69,12 +69,10 @@ public static FixedSizeListVector empty(String name, int size, BufferAllocator a } private FieldVector vector; - private ArrowBuf validityBuffer; private final int listSize; private Field field; private UnionFixedSizeListReader reader; - private int valueCount; private int validityAllocationSizeInBytes; /** @@ -248,12 +246,10 @@ public boolean allocateNewSafe() { return success; } - private void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); + @Override + protected void allocateValidityBuffer(final long size) { + super.allocateValidityBuffer(size); + validityAllocationSizeInBytes = (int) size; } @Override @@ -649,71 +645,6 @@ public void splitAndTransfer(int startIndex, int length) { to.setValueCount(length); } - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, FixedSizeListVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - @Override public ValueVector getTo() { return to; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 71633441cb..835d3468f3 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -94,11 +94,9 @@ public static LargeListVector empty(String name, BufferAllocator allocator) { protected ArrowBuf offsetBuffer; protected FieldVector vector; protected final CallBack callBack; - protected int valueCount; protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; protected String defaultDataVectorName = DATA_VECTOR_NAME; - protected ArrowBuf validityBuffer; protected UnionLargeListReader reader; private Field field; private int validityAllocationSizeInBytes; @@ -375,12 +373,10 @@ public boolean allocateNewSafe() { return success; } - private void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); + @Override + protected void allocateValidityBuffer(final long size) { + super.allocateValidityBuffer(size); + validityAllocationSizeInBytes = (int) size; } protected ArrowBuf allocateOffsetBuffer(final long size) { @@ -694,71 +690,6 @@ public void splitAndTransfer(int startIndex, int length) { to.setValueCount(length); } - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, LargeListVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - @Override public ValueVector getTo() { return to; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 1b7e6b2280..394c3c67bb 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -77,7 +77,6 @@ public class LargeListViewVector extends BaseLargeRepeatedValueViewVector implements PromotableVector, ValueIterableVector> { - protected ArrowBuf validityBuffer; protected UnionLargeListViewReader reader; private CallBack callBack; protected Field field; @@ -285,12 +284,10 @@ public boolean allocateNewSafe() { return success; } + @Override protected void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); + super.allocateValidityBuffer(size); + validityAllocationSizeInBytes = (int) size; } @Override @@ -531,71 +528,6 @@ public void splitAndTransfer(int startIndex, int length) { } } - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, LargeListViewVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - @Override public ValueVector getTo() { return to; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index a8e8dcc436..2b2817515f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -74,7 +74,6 @@ public static ListVector empty(String name, BufferAllocator allocator) { return new ListVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null); } - protected ArrowBuf validityBuffer; protected UnionListReader reader; private CallBack callBack; protected Field field; @@ -324,12 +323,10 @@ public boolean allocateNewSafe() { return success; } + @Override protected void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); + super.allocateValidityBuffer(size); + validityAllocationSizeInBytes = (int) size; } /** @@ -575,70 +572,6 @@ public void splitAndTransfer(int startIndex, int length) { } } - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer(int startIndex, int length, ListVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - @Override public ValueVector getTo() { return to; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index ada25bbaf5..2b80101926 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -76,7 +76,6 @@ public class ListViewVector extends BaseRepeatedValueViewVector implements PromotableVector, ValueIterableVector> { - protected ArrowBuf validityBuffer; protected UnionListViewReader reader; private CallBack callBack; protected Field field; @@ -284,12 +283,10 @@ public boolean allocateNewSafe() { return success; } + @Override protected void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); + super.allocateValidityBuffer(size); + validityAllocationSizeInBytes = (int) size; } @Override @@ -538,70 +535,6 @@ public void splitAndTransfer(int startIndex, int length) { } } - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer(int startIndex, int length, ListViewVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - @Override public ValueVector getTo() { return to; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 5eb857ab94..3f98322ba9 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -22,7 +22,6 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; @@ -232,70 +231,6 @@ public void splitAndTransfer(int startIndex, int length) { } } - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer(int startIndex, int length, MapVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - @Override public ValueVector getTo() { return to;