Skip to content

Commit aec2975

Browse files
committed
GH-470: [Vector] Fix bug of usage sizeBuffer with OFFSET_WIDTH (hashCode) and offsetBuffer with SIZE_WIDTH (setSize) plus refactoring to avoid code duplication in ListViewVector
1 parent a758cad commit aec2975

File tree

1 file changed

+38
-30
lines changed

1 file changed

+38
-30
lines changed

vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,8 @@ private void setReaderAndWriterIndex() {
226226
sizeBuffer.writerIndex(0);
227227
} else {
228228
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
229-
offsetBuffer.writerIndex(valueCount * OFFSET_WIDTH);
230-
sizeBuffer.writerIndex(valueCount * SIZE_WIDTH);
229+
offsetBuffer.writerIndex((long) valueCount * OFFSET_WIDTH);
230+
sizeBuffer.writerIndex((long) valueCount * SIZE_WIDTH);
231231
}
232232
}
233233

@@ -445,14 +445,22 @@ public int hashCode(int index, ArrowBufHasher hasher) {
445445
return ArrowBufPointer.NULL_HASH_CODE;
446446
}
447447
int hash = 0;
448-
final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
449-
final int end = sizeBuffer.getInt(index * OFFSET_WIDTH);
448+
final int start = getElementStartIndex(index);
449+
final int end = getElementEndIndex(index);
450450
for (int i = start; i < end; i++) {
451451
hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher));
452452
}
453453
return hash;
454454
}
455455

456+
private void setElementOffsetBuffer(int index, int value) {
457+
offsetBuffer.setInt((long) index * OFFSET_WIDTH, value);
458+
}
459+
460+
private void setElementSizeBuffer(int index, int value) {
461+
sizeBuffer.setInt((long) index * SIZE_WIDTH, value);
462+
}
463+
456464
private class TransferImpl implements TransferPair {
457465

458466
ListViewVector to;
@@ -498,7 +506,6 @@ public void splitAndTransfer(int startIndex, int length) {
498506
valueCount);
499507
to.clear();
500508
if (length > 0) {
501-
final int startPoint = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH);
502509
// we have to scan by index since there are out-of-order offsets
503510
to.offsetBuffer = to.allocateBuffers((long) length * OFFSET_WIDTH);
504511
to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH);
@@ -507,9 +514,9 @@ public void splitAndTransfer(int startIndex, int length) {
507514
int maxOffsetAndSizeSum = -1;
508515
int minOffsetValue = -1;
509516
for (int i = 0; i < length; i++) {
510-
final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH);
511-
final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH);
512-
to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue);
517+
final int offsetValue = getElementStartIndex(startIndex + i);
518+
final int sizeValue = getElementSize(startIndex + i);
519+
to.setElementSizeBuffer(i, sizeValue);
513520
if (maxOffsetAndSizeSum < offsetValue + sizeValue) {
514521
maxOffsetAndSizeSum = offsetValue + sizeValue;
515522
}
@@ -520,9 +527,9 @@ public void splitAndTransfer(int startIndex, int length) {
520527

521528
/* splitAndTransfer the offset buffer */
522529
for (int i = 0; i < length; i++) {
523-
final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH);
530+
final int offsetValue = getElementStartIndex(startIndex + i);
524531
final int relativeOffset = offsetValue - minOffsetValue;
525-
to.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeOffset);
532+
to.setElementOffsetBuffer(i, relativeOffset);
526533
}
527534

528535
/* splitAndTransfer the validity buffer */
@@ -678,8 +685,8 @@ public List<?> getObject(int index) {
678685
if (isSet(index) == 0) {
679686
return null;
680687
}
681-
final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
682-
final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH);
688+
final int start = getElementStartIndex(index);
689+
final int end = getElementEndIndex(index);
683690
final ValueVector vv = getDataVector();
684691
final List<Object> vals = new JsonStringArrayList<>(end - start);
685692
for (int i = start; i < end; i++) {
@@ -711,7 +718,7 @@ public boolean isEmpty(int index) {
711718
if (isNull(index)) {
712719
return true;
713720
} else {
714-
return sizeBuffer.getInt(index * SIZE_WIDTH) == 0;
721+
return getElementSize(index) == 0;
715722
}
716723
}
717724

@@ -722,10 +729,7 @@ public boolean isEmpty(int index) {
722729
* @return 1 if element at given index is not null, 0 otherwise
723730
*/
724731
public int isSet(int index) {
725-
final int byteIndex = index >> 3;
726-
final byte b = validityBuffer.getByte(byteIndex);
727-
final int bitIndex = index & 7;
728-
return (b >> bitIndex) & 0x01;
732+
return BitVectorHelper.get(validityBuffer, index);
729733
}
730734

731735
/**
@@ -775,8 +779,8 @@ public void setNull(int index) {
775779
reallocValidityAndSizeAndOffsetBuffers();
776780
}
777781

778-
offsetBuffer.setInt(index * OFFSET_WIDTH, 0);
779-
sizeBuffer.setInt(index * SIZE_WIDTH, 0);
782+
setElementOffsetBuffer(index, 0);
783+
setElementSizeBuffer(index, 0);
780784
BitVectorHelper.unsetBit(validityBuffer, index);
781785
}
782786

@@ -794,11 +798,11 @@ public int startNewValue(int index) {
794798

795799
if (index > 0) {
796800
final int prevOffset = getMaxViewEndChildVectorByIndex(index);
797-
offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
801+
setElementOffsetBuffer(index, prevOffset);
798802
}
799803

800804
BitVectorHelper.setBit(validityBuffer, index);
801-
return offsetBuffer.getInt(index * OFFSET_WIDTH);
805+
return getElementStartIndex(index);
802806
}
803807

804808
/**
@@ -836,9 +840,9 @@ private void validateInvariants(int offset, int size) {
836840
* @param value value to set
837841
*/
838842
public void setOffset(int index, int value) {
839-
validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH));
843+
validateInvariants(value, getElementSize(index));
840844

841-
offsetBuffer.setInt(index * OFFSET_WIDTH, value);
845+
setElementOffsetBuffer(index, value);
842846
}
843847

844848
/**
@@ -848,9 +852,9 @@ public void setOffset(int index, int value) {
848852
* @param value value to set
849853
*/
850854
public void setSize(int index, int value) {
851-
validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value);
855+
validateInvariants(getElementStartIndex(index), value);
852856

853-
sizeBuffer.setInt(index * SIZE_WIDTH, value);
857+
setElementSizeBuffer(index, value);
854858
}
855859

856860
/**
@@ -886,12 +890,16 @@ public void setValueCount(int valueCount) {
886890

887891
@Override
888892
public int getElementStartIndex(int index) {
889-
return offsetBuffer.getInt(index * OFFSET_WIDTH);
893+
return offsetBuffer.getInt((long) index * OFFSET_WIDTH);
894+
}
895+
896+
private int getElementSize(int index) {
897+
return sizeBuffer.getInt((long) index * SIZE_WIDTH);
890898
}
891899

892900
@Override
893901
public int getElementEndIndex(int index) {
894-
return offsetBuffer.getInt(index * OFFSET_WIDTH) + sizeBuffer.getInt(index * SIZE_WIDTH);
902+
return getElementStartIndex(index) + getElementSize(index);
895903
}
896904

897905
@Override
@@ -948,8 +956,8 @@ public double getDensity() {
948956
@Override
949957
public void validate() {
950958
for (int i = 0; i < valueCount; i++) {
951-
final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH);
952-
final int size = sizeBuffer.getInt(i * SIZE_WIDTH);
959+
final int offset = getElementStartIndex(i);
960+
final int size = getElementSize(i);
953961
validateInvariants(offset, size);
954962
}
955963
}
@@ -961,6 +969,6 @@ public void validate() {
961969
* @param size number of elements in the list that was written
962970
*/
963971
public void endValue(int index, int size) {
964-
sizeBuffer.setInt(index * SIZE_WIDTH, size);
972+
setElementSizeBuffer(index, size);
965973
}
966974
}

0 commit comments

Comments
 (0)