Skip to content

Commit 1521cf2

Browse files
committed
GH-79: Move splitAndTransferValidityBuffer to BaseValueVector
1 parent abef7af commit 1521cf2

14 files changed

+166
-667
lines changed

vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java

Lines changed: 14 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
4949

5050
protected final Field field;
5151
private int allocationMonitor;
52-
protected ArrowBuf validityBuffer;
5352
protected ArrowBuf valueBuffer;
54-
protected int valueCount;
5553

5654
/**
5755
* Constructs a new instance.
@@ -87,7 +85,7 @@ public String getName() {
8785

8886
/* TODO:
8987
* Once the entire hierarchy has been refactored, move common functions
90-
* like getNullCount(), splitAndTransferValidityBuffer to top level
88+
* like getNullCount() to top level
9189
* base class BaseValueVector.
9290
*
9391
* Along with this, some class members (validityBuffer) can also be
@@ -342,7 +340,8 @@ private void allocateBytes(int valueCount) {
342340
* slice the source buffer so we have to explicitly allocate the validityBuffer of the target
343341
* vector. This is unlike the databuffer which we can always slice for the target vector.
344342
*/
345-
private void allocateValidityBuffer(final int validityBufferSize) {
343+
@Override
344+
protected void allocateValidityBuffer(final long validityBufferSize) {
346345
validityBuffer = allocator.buffer(validityBufferSize);
347346
validityBuffer.readerIndex(0);
348347
refreshValueCapacity();
@@ -656,72 +655,18 @@ private void splitAndTransferValueBuffer(
656655
target.refreshValueCapacity();
657656
}
658657

659-
/**
660-
* Validity buffer has multiple cases of split and transfer depending on the starting position of
661-
* the source index.
662-
*/
663-
private void splitAndTransferValidityBuffer(
664-
int startIndex, int length, BaseFixedWidthVector target) {
665-
int firstByteSource = BitVectorHelper.byteIndex(startIndex);
666-
int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
667-
int byteSizeTarget = getValidityBufferSizeFromCount(length);
668-
int offset = startIndex % 8;
669-
670-
if (length > 0) {
671-
if (offset == 0) {
672-
/* slice */
673-
if (target.validityBuffer != null) {
674-
target.validityBuffer.getReferenceManager().release();
675-
}
676-
ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
677-
target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator);
678-
target.refreshValueCapacity();
679-
} else {
680-
/* Copy data
681-
* When the first bit starts from the middle of a byte (offset != 0),
682-
* copy data from src BitVector.
683-
* Each byte in the target is composed by a part in i-th byte,
684-
* another part in (i+1)-th byte.
685-
*/
686-
target.allocateValidityBuffer(byteSizeTarget);
687-
688-
for (int i = 0; i < byteSizeTarget - 1; i++) {
689-
byte b1 =
690-
BitVectorHelper.getBitsFromCurrentByte(
691-
this.validityBuffer, firstByteSource + i, offset);
692-
byte b2 =
693-
BitVectorHelper.getBitsFromNextByte(
694-
this.validityBuffer, firstByteSource + i + 1, offset);
695-
696-
target.validityBuffer.setByte(i, (b1 + b2));
697-
}
698-
699-
/* Copying the last piece is done in the following manner:
700-
* if the source vector has 1 or more bytes remaining, we copy
701-
* the last piece as a byte formed by shifting data
702-
* from the current byte and the next byte.
703-
*
704-
* if the source vector has no more bytes remaining
705-
* (we are at the last byte), we copy the last piece as a byte
706-
* by shifting data from the current byte.
707-
*/
708-
if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
709-
byte b1 =
710-
BitVectorHelper.getBitsFromCurrentByte(
711-
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
712-
byte b2 =
713-
BitVectorHelper.getBitsFromNextByte(
714-
this.validityBuffer, firstByteSource + byteSizeTarget, offset);
715-
716-
target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
717-
} else {
718-
byte b1 =
719-
BitVectorHelper.getBitsFromCurrentByte(
720-
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
721-
target.validityBuffer.setByte(byteSizeTarget - 1, b1);
722-
}
723-
}
658+
@Override
659+
protected void sliceAndTransferValidityBuffer(
660+
int startIndex, int length, BaseValueVector target) {
661+
final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
662+
final int byteSizeTarget = getValidityBufferSizeFromCount(length);
663+
664+
if (target.validityBuffer != null) {
665+
target.validityBuffer.getReferenceManager().release();
724666
}
667+
ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
668+
target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator);
669+
((BaseFixedWidthVector) target).refreshValueCapacity();
725670
}
726671

727672
/*----------------------------------------------------------------*

vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java

Lines changed: 11 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,8 @@ public abstract class BaseLargeVariableWidthVector extends BaseValueVector
5252
/* protected members */
5353
public static final int OFFSET_WIDTH = 8; /* 8 byte unsigned int to track offsets */
5454
protected static final byte[] emptyByteArray = new byte[] {};
55-
protected ArrowBuf validityBuffer;
5655
protected ArrowBuf valueBuffer;
5756
protected ArrowBuf offsetBuffer;
58-
protected int valueCount;
5957
protected int lastSet;
6058
protected final Field field;
6159

@@ -501,7 +499,8 @@ private ArrowBuf allocateOffsetBuffer(final long size) {
501499
}
502500

503501
/* allocate validity buffer */
504-
private void allocateValidityBuffer(final long size) {
502+
@Override
503+
protected void allocateValidityBuffer(final long size) {
505504
validityBuffer = allocator.buffer(size);
506505
validityBuffer.readerIndex(0);
507506
initValidityBuffer();
@@ -809,69 +808,17 @@ private void splitAndTransferOffsetBuffer(
809808
target.valueBuffer = transferBuffer(slicedBuffer, target.allocator);
810809
}
811810

812-
/*
813-
* Transfer the validity.
814-
*/
815-
private void splitAndTransferValidityBuffer(
816-
int startIndex, int length, BaseLargeVariableWidthVector target) {
817-
int firstByteSource = BitVectorHelper.byteIndex(startIndex);
818-
int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
819-
int byteSizeTarget = getValidityBufferSizeFromCount(length);
820-
int offset = startIndex % 8;
811+
@Override
812+
protected void sliceAndTransferValidityBuffer(
813+
int startIndex, int length, BaseValueVector target) {
814+
final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
815+
final int byteSizeTarget = getValidityBufferSizeFromCount(length);
821816

822-
if (length > 0) {
823-
if (offset == 0) {
824-
// slice
825-
if (target.validityBuffer != null) {
826-
target.validityBuffer.getReferenceManager().release();
827-
}
828-
target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
829-
target.validityBuffer.getReferenceManager().retain();
830-
} else {
831-
/* Copy data
832-
* When the first bit starts from the middle of a byte (offset != 0),
833-
* copy data from src BitVector.
834-
* Each byte in the target is composed by a part in i-th byte,
835-
* another part in (i+1)-th byte.
836-
*/
837-
target.allocateValidityBuffer(byteSizeTarget);
838-
839-
for (int i = 0; i < byteSizeTarget - 1; i++) {
840-
byte b1 =
841-
BitVectorHelper.getBitsFromCurrentByte(
842-
this.validityBuffer, firstByteSource + i, offset);
843-
byte b2 =
844-
BitVectorHelper.getBitsFromNextByte(
845-
this.validityBuffer, firstByteSource + i + 1, offset);
846-
847-
target.validityBuffer.setByte(i, (b1 + b2));
848-
}
849-
/* Copying the last piece is done in the following manner:
850-
* if the source vector has 1 or more bytes remaining, we copy
851-
* the last piece as a byte formed by shifting data
852-
* from the current byte and the next byte.
853-
*
854-
* if the source vector has no more bytes remaining
855-
* (we are at the last byte), we copy the last piece as a byte
856-
* by shifting data from the current byte.
857-
*/
858-
if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
859-
byte b1 =
860-
BitVectorHelper.getBitsFromCurrentByte(
861-
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
862-
byte b2 =
863-
BitVectorHelper.getBitsFromNextByte(
864-
this.validityBuffer, firstByteSource + byteSizeTarget, offset);
865-
866-
target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
867-
} else {
868-
byte b1 =
869-
BitVectorHelper.getBitsFromCurrentByte(
870-
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
871-
target.validityBuffer.setByte(byteSizeTarget - 1, b1);
872-
}
873-
}
817+
if (target.validityBuffer != null) {
818+
target.validityBuffer.getReferenceManager().release();
874819
}
820+
target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
821+
target.validityBuffer.getReferenceManager().retain();
875822
}
876823

877824
/*----------------------------------------------------------------*

vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ public abstract class BaseValueVector implements ValueVector {
4848

4949
protected volatile FieldReader fieldReader;
5050

51+
protected ArrowBuf validityBuffer;
52+
53+
protected int valueCount;
54+
5155
protected BaseValueVector(BufferAllocator allocator) {
5256
this.allocator = Preconditions.checkNotNull(allocator, "allocator cannot be null");
5357
}
@@ -248,4 +252,114 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
248252
public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
249253
throw new UnsupportedOperationException();
250254
}
255+
256+
/**
257+
* Transfer the validity buffer from `validityBuffer` to the target vector's `validityBuffer`.
258+
* Start at `startIndex` and copy `length` number of elements. If the starting index is 8 byte
259+
* aligned, then the buffer is sliced from that index and ownership is transferred. If not,
260+
* individual bytes are copied.
261+
*
262+
* @param startIndex starting index
263+
* @param length number of elements to be copied
264+
* @param target target vector
265+
*/
266+
protected void splitAndTransferValidityBuffer(
267+
int startIndex, int length, BaseValueVector target) {
268+
int offset = startIndex % 8;
269+
270+
if (length <= 0) {
271+
return;
272+
}
273+
if (offset == 0) {
274+
sliceAndTransferValidityBuffer(startIndex, length, target);
275+
} else {
276+
copyValidityBuffer(startIndex, length, target);
277+
}
278+
}
279+
280+
/**
281+
* If the start index is 8 byte aligned, slice `validityBuffer` and transfer ownership to
282+
* `target`'s `validityBuffer`.
283+
*
284+
* @param startIndex starting index
285+
* @param length number of elements to be copied
286+
* @param target target vector
287+
*/
288+
protected void sliceAndTransferValidityBuffer(
289+
int startIndex, int length, BaseValueVector target) {
290+
final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
291+
final int byteSizeTarget = getValidityBufferSizeFromCount(length);
292+
293+
if (target.validityBuffer != null) {
294+
target.validityBuffer.getReferenceManager().release();
295+
}
296+
target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
297+
target.validityBuffer.getReferenceManager().retain(1);
298+
}
299+
300+
/**
301+
* Allocate new validity buffer for `target` and copy bytes from `validityBuffer`. Precise details
302+
* in the comments below.
303+
*
304+
* @param startIndex starting index
305+
* @param length number of elements to be copied
306+
* @param target target vector
307+
*/
308+
protected void copyValidityBuffer(int startIndex, int length, BaseValueVector target) {
309+
final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
310+
final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
311+
final int byteSizeTarget = getValidityBufferSizeFromCount(length);
312+
final int offset = startIndex % 8;
313+
314+
/* Copy data
315+
* When the first bit starts from the middle of a byte (offset != 0),
316+
* copy data from src BitVector.
317+
* Each byte in the target is composed by a part in i-th byte,
318+
* another part in (i+1)-th byte.
319+
*/
320+
target.allocateValidityBuffer(byteSizeTarget);
321+
322+
for (int i = 0; i < byteSizeTarget - 1; i++) {
323+
byte b1 =
324+
BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset);
325+
byte b2 =
326+
BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset);
327+
328+
target.validityBuffer.setByte(i, (b1 + b2));
329+
}
330+
331+
/* Copying the last piece is done in the following manner:
332+
* if the source vector has 1 or more bytes remaining, we copy
333+
* the last piece as a byte formed by shifting data
334+
* from the current byte and the next byte.
335+
*
336+
* if the source vector has no more bytes remaining
337+
* (we are at the last byte), we copy the last piece as a byte
338+
* by shifting data from the current byte.
339+
*/
340+
if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
341+
byte b1 =
342+
BitVectorHelper.getBitsFromCurrentByte(
343+
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
344+
byte b2 =
345+
BitVectorHelper.getBitsFromNextByte(
346+
this.validityBuffer, firstByteSource + byteSizeTarget, offset);
347+
348+
target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
349+
} else {
350+
byte b1 =
351+
BitVectorHelper.getBitsFromCurrentByte(
352+
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
353+
target.validityBuffer.setByte(byteSizeTarget - 1, b1);
354+
}
355+
}
356+
357+
/**
358+
* Allocate new validity buffer for when the bytes need to be copied over
359+
*
360+
* @param byteSizeTarget desired size of the buffer
361+
*/
362+
protected void allocateValidityBuffer(final long byteSizeTarget) {
363+
throw new UnsupportedOperationException();
364+
}
251365
}

0 commit comments

Comments
 (0)