Skip to content

Commit 4d5ba12

Browse files
committed
GH-52: Make RangeEqualsVisitor of RunEndEncodedVector more efficient
1 parent 6be33bb commit 4d5ba12

File tree

2 files changed

+59
-19
lines changed

2 files changed

+59
-19
lines changed

vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import org.apache.arrow.vector.complex.ListViewVector;
4444
import org.apache.arrow.vector.complex.NonNullableStructVector;
4545
import org.apache.arrow.vector.complex.RunEndEncodedVector;
46+
import org.apache.arrow.vector.complex.RunEndEncodedVector.RangeIterator;
4647
import org.apache.arrow.vector.complex.StructVector;
4748
import org.apache.arrow.vector.complex.UnionVector;
4849

@@ -270,35 +271,24 @@ protected boolean compareRunEndEncodedVectors(Range range) {
270271
RunEndEncodedVector leftVector = (RunEndEncodedVector) left;
271272
RunEndEncodedVector rightVector = (RunEndEncodedVector) right;
272273

273-
final int leftRangeEnd = range.getLeftStart() + range.getLength();
274-
final int rightRangeEnd = range.getRightStart() + range.getLength();
274+
final RunEndEncodedVector.RangeIterator leftIterator =
275+
new RangeIterator(leftVector, range.getLeftStart(), range.getLength());
276+
final RunEndEncodedVector.RangeIterator rightIterator =
277+
new RangeIterator(rightVector, range.getRightStart(), range.getLength());
275278

276279
FieldVector leftValuesVector = leftVector.getValuesVector();
277280
FieldVector rightValuesVector = rightVector.getValuesVector();
278281

279282
RangeEqualsVisitor innerVisitor = createInnerVisitor(leftValuesVector, rightValuesVector, null);
280283

281-
int leftLogicalIndex = range.getLeftStart();
282-
int rightLogicalIndex = range.getRightStart();
284+
while (leftIterator.nextRun() | rightIterator.nextRun()) {
285+
int leftPhysicalIndex = leftIterator.getRunIndex();
286+
int rightPhysicalIndex = rightIterator.getRunIndex();
283287

284-
while (leftLogicalIndex < leftRangeEnd) {
285-
// TODO: implement it more efficient
286-
// https://github.com/apache/arrow/issues/44157
287-
int leftPhysicalIndex = leftVector.getPhysicalIndex(leftLogicalIndex);
288-
int rightPhysicalIndex = rightVector.getPhysicalIndex(rightLogicalIndex);
289288
if (leftValuesVector.accept(
290289
innerVisitor, new Range(leftPhysicalIndex, rightPhysicalIndex, 1))) {
291-
int leftRunEnd = leftVector.getRunEnd(leftLogicalIndex);
292-
int rightRunEnd = rightVector.getRunEnd(rightLogicalIndex);
293-
294-
int leftRunLength = Math.min(leftRunEnd, leftRangeEnd) - leftLogicalIndex;
295-
int rightRunLength = Math.min(rightRunEnd, rightRangeEnd) - rightLogicalIndex;
296-
297-
if (leftRunLength != rightRunLength) {
290+
if (leftIterator.getRunLength() != rightIterator.getRunLength()) {
298291
return false;
299-
} else {
300-
leftLogicalIndex = leftRunEnd;
301-
rightLogicalIndex = rightRunEnd;
302292
}
303293
} else {
304294
return false;

vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,4 +820,54 @@ static int getPhysicalIndex(FieldVector runEndVector, int logicalIndex) {
820820

821821
return result;
822822
}
823+
824+
public static class RangeIterator {
825+
826+
private final RunEndEncodedVector runEndEncodedVector;
827+
private final int rangeEnd;
828+
private int runIndex;
829+
private int runEnd;
830+
private int logicalPos;
831+
832+
public RangeIterator(RunEndEncodedVector runEndEncodedVector, int startIndex, int length) {
833+
this.runEndEncodedVector = runEndEncodedVector;
834+
this.rangeEnd = startIndex + length;
835+
this.runIndex = runEndEncodedVector.getPhysicalIndex(startIndex) - 1;
836+
this.runEnd = startIndex;
837+
this.logicalPos = -1;
838+
}
839+
840+
public boolean nextRun() {
841+
logicalPos = runEnd;
842+
if (logicalPos >= rangeEnd) {
843+
return false;
844+
}
845+
updateRun();
846+
return true;
847+
}
848+
849+
private void updateRun() {
850+
runIndex++;
851+
runEnd = (int) ((BaseIntVector) runEndEncodedVector.runEndsVector).getValueAsLong(runIndex);
852+
}
853+
854+
public boolean nextValue() {
855+
logicalPos++;
856+
if (logicalPos >= rangeEnd) {
857+
return false;
858+
}
859+
if (logicalPos == runEnd) {
860+
updateRun();
861+
}
862+
return true;
863+
}
864+
865+
public int getRunIndex() {
866+
return runIndex;
867+
}
868+
869+
public int getRunLength() {
870+
return Math.min(runEnd, rangeEnd) - logicalPos;
871+
}
872+
}
823873
}

0 commit comments

Comments
 (0)