Skip to content

Commit ea2b103

Browse files
committed
feat: Implement VectorAppender for RunEndEncodedVector
1 parent aee8a10 commit ea2b103

File tree

2 files changed

+132
-0
lines changed

2 files changed

+132
-0
lines changed

vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,18 @@
2424
import org.apache.arrow.memory.util.MemoryUtil;
2525
import org.apache.arrow.util.Preconditions;
2626
import org.apache.arrow.vector.BaseFixedWidthVector;
27+
import org.apache.arrow.vector.BaseIntVector;
2728
import org.apache.arrow.vector.BaseLargeVariableWidthVector;
2829
import org.apache.arrow.vector.BaseVariableWidthVector;
2930
import org.apache.arrow.vector.BaseVariableWidthViewVector;
31+
import org.apache.arrow.vector.BigIntVector;
3032
import org.apache.arrow.vector.BitVector;
3133
import org.apache.arrow.vector.BitVectorHelper;
3234
import org.apache.arrow.vector.ExtensionTypeVector;
35+
import org.apache.arrow.vector.FieldVector;
36+
import org.apache.arrow.vector.IntVector;
3337
import org.apache.arrow.vector.NullVector;
38+
import org.apache.arrow.vector.SmallIntVector;
3439
import org.apache.arrow.vector.ValueVector;
3540
import org.apache.arrow.vector.compare.TypeEqualsVisitor;
3641
import org.apache.arrow.vector.compare.VectorVisitor;
@@ -39,6 +44,7 @@
3944
import org.apache.arrow.vector.complex.LargeListVector;
4045
import org.apache.arrow.vector.complex.ListVector;
4146
import org.apache.arrow.vector.complex.NonNullableStructVector;
47+
import org.apache.arrow.vector.complex.RunEndEncodedVector;
4248
import org.apache.arrow.vector.complex.UnionVector;
4349

4450
/** Utility to append two vectors together. */
@@ -698,4 +704,76 @@ public ValueVector visit(ExtensionTypeVector<?> deltaVector, Void value) {
698704
deltaVector.getUnderlyingVector().accept(underlyingAppender, null);
699705
return targetVector;
700706
}
707+
708+
@Override
709+
public ValueVector visit(RunEndEncodedVector deltaVector, Void value) {
710+
Preconditions.checkArgument(
711+
typeVisitor.equals(deltaVector),
712+
"The vector to append must have the same type as the targetVector being appended");
713+
714+
if (deltaVector.getValueCount() == 0) {
715+
return targetVector; // optimization, nothing to append, return
716+
}
717+
718+
RunEndEncodedVector targetEncodedVector = (RunEndEncodedVector) targetVector;
719+
720+
final int targetLogicalValueCount = targetEncodedVector.getValueCount();
721+
722+
// Append the values vector first.
723+
VectorAppender valueAppender = new VectorAppender(targetEncodedVector.getValuesVector());
724+
deltaVector.getValuesVector().accept(valueAppender, null);
725+
726+
// Then append the run-ends vector.
727+
BaseIntVector targetRunEndsVector = (BaseIntVector) targetEncodedVector.getRunEndsVector();
728+
BaseIntVector deltaRunEndsVector = (BaseIntVector) deltaVector.getRunEndsVector();
729+
730+
// Shift the delta run-ends vector in-place before appending.
731+
shiftRunEndsVector(
732+
deltaRunEndsVector,
733+
deltaRunEndsVector.getDataBuffer(),
734+
targetLogicalValueCount,
735+
deltaRunEndsVector.getValueCount());
736+
737+
// Append the now-shifted delta run-ends vector to the target.
738+
new VectorAppender(targetRunEndsVector).visit((BaseFixedWidthVector) deltaRunEndsVector, null);
739+
740+
targetEncodedVector.setValueCount(targetLogicalValueCount + deltaVector.getValueCount());
741+
742+
return targetVector;
743+
}
744+
745+
private void shiftRunEndsVector(
746+
ValueVector toRunEndVector,
747+
ArrowBuf fromRunEndBuffer,
748+
int offset,
749+
int physicalLength) {
750+
ArrowBuf toRunEndBuffer = toRunEndVector.getDataBuffer();
751+
if (toRunEndVector instanceof SmallIntVector) {
752+
byte typeWidth = SmallIntVector.TYPE_WIDTH;
753+
for (int i = 0; i < physicalLength; i++) {
754+
toRunEndBuffer.setShort(
755+
(long) i * typeWidth,
756+
fromRunEndBuffer.getShort((long) (i) * typeWidth) + offset);
757+
}
758+
759+
} else if (toRunEndVector instanceof IntVector) {
760+
byte typeWidth = IntVector.TYPE_WIDTH;
761+
for (int i = 0; i < physicalLength; i++) {
762+
toRunEndBuffer.setInt(
763+
(long) i * typeWidth,
764+
fromRunEndBuffer.getInt((long) (i) * typeWidth) + offset);
765+
}
766+
767+
} else if (toRunEndVector instanceof BigIntVector) {
768+
byte typeWidth = BigIntVector.TYPE_WIDTH;
769+
for (int i = 0; i < physicalLength; i++) {
770+
toRunEndBuffer.setLong(
771+
(long) i * typeWidth,
772+
fromRunEndBuffer.getLong((long) (i) * typeWidth) + offset);
773+
}
774+
} else {
775+
throw new IllegalArgumentException(
776+
"Run-end vector and must be of type int with size 16, 32, or 64 bits.");
777+
}
778+
}
701779
}

vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.apache.arrow.vector.Float4Vector;
3737
import org.apache.arrow.vector.IntVector;
3838
import org.apache.arrow.vector.LargeVarCharVector;
39+
import org.apache.arrow.vector.complex.RunEndEncodedVector;
3940
import org.apache.arrow.vector.TestUtils;
4041
import org.apache.arrow.vector.ValueVector;
4142
import org.apache.arrow.vector.VarCharVector;
@@ -1025,6 +1026,59 @@ public void testAppendDenseUnionVectorMismatch() {
10251026
}
10261027
}
10271028

1029+
@Test
1030+
public void testAppendRunEndEncodedVector() {
1031+
final FieldType reeFieldType = FieldType.notNullable(ArrowType.RunEndEncoded.INSTANCE);
1032+
final Field runEndsField =
1033+
new Field("runEnds", FieldType.notNullable(Types.MinorType.INT.getType()), null);
1034+
final Field valuesField = Field.nullable("values", Types.MinorType.INT.getType());
1035+
final List<Field> children = Arrays.asList(runEndsField, valuesField);
1036+
1037+
final Field targetField = new Field("target", reeFieldType, children);
1038+
final Field deltaField = new Field("delta", reeFieldType, children);
1039+
try (RunEndEncodedVector target = new RunEndEncodedVector(targetField, allocator, null);
1040+
RunEndEncodedVector delta = new RunEndEncodedVector(deltaField, allocator, null)) {
1041+
1042+
// populate target
1043+
target.allocateNew();
1044+
// data: [1, 1, 2, null, 3, 3, 3] (7 values)
1045+
// values: [1, 2, null, 3]
1046+
// runEnds: [2, 3, 4, 7]
1047+
ValueVectorDataPopulator.setVector((IntVector) target.getValuesVector(), 1, 2, null, 3);
1048+
ValueVectorDataPopulator.setVector((IntVector) target.getRunEndsVector(), 2, 3, 4, 7);
1049+
target.setValueCount(7);
1050+
1051+
// populate delta
1052+
delta.allocateNew();
1053+
// data: [3, 4, 4, 5, null, null] (6 values)
1054+
// values: [3, 4, 5, null]
1055+
// runEnds: [1, 3, 4, 6]
1056+
ValueVectorDataPopulator.setVector((IntVector) delta.getValuesVector(), 3, 4, 5, null);
1057+
ValueVectorDataPopulator.setVector((IntVector) delta.getRunEndsVector(), 1, 3, 4, 6);
1058+
delta.setValueCount(6);
1059+
1060+
VectorAppender appender = new VectorAppender(target);
1061+
delta.accept(appender, null);
1062+
1063+
assertEquals(13, target.getValueCount());
1064+
1065+
final Field expectedField = new Field("expected", reeFieldType, children);
1066+
try (RunEndEncodedVector expected = new RunEndEncodedVector(expectedField, allocator, null)) {
1067+
expected.allocateNew();
1068+
// expected data: [1, 1, 2, null, 3, 3, 3, 3, 4, 4, 5, null, null] (13 values)
1069+
// expected values: [1, 2, null, 3, 3, 4, 5, null]
1070+
// expected runEnds: [2, 3, 4, 7, 8, 10, 11, 13]
1071+
ValueVectorDataPopulator.setVector(
1072+
(IntVector) expected.getValuesVector(), 1, 2, null, 3, 3, 4, 5, null);
1073+
ValueVectorDataPopulator.setVector(
1074+
(IntVector) expected.getRunEndsVector(), 2, 3, 4, 7, 8, 10, 11, 13);
1075+
expected.setValueCount(13);
1076+
1077+
assertVectorsEqual(expected, target);
1078+
}
1079+
}
1080+
}
1081+
10281082
@Test
10291083
public void testAppendVectorNegative() {
10301084
final int vectorLength = 10;

0 commit comments

Comments
 (0)