From 2f34ff65a0af7fdad42348a23db5c43642de6985 Mon Sep 17 00:00:00 2001 From: An Qi Date: Mon, 6 Jan 2025 23:24:08 +0800 Subject: [PATCH 1/4] unify `ValueVector.getObject` and `VariableWidthFieldVector.get` behavior about null value --- .../apache/arrow/vector/FixedSizeBinaryVector.java | 5 ++++- .../apache/arrow/vector/LargeVarBinaryVector.java | 13 +++++++++++-- .../org/apache/arrow/vector/LargeVarCharVector.java | 4 ++-- .../java/org/apache/arrow/vector/ValueVector.java | 2 +- .../org/apache/arrow/vector/VarBinaryVector.java | 10 +++++++++- .../java/org/apache/arrow/vector/VarCharVector.java | 2 +- .../apache/arrow/vector/ViewVarBinaryVector.java | 5 ++++- .../org/apache/arrow/vector/ViewVarCharVector.java | 2 +- 8 files changed, 33 insertions(+), 10 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java index 4add729358..2005036c78 100644 --- a/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java @@ -149,7 +149,10 @@ public void get(int index, NullableFixedSizeBinaryHolder holder) { */ @Override public byte[] getObject(int index) { - return get(index); + if (isSet(index) == 0) { + return null; + } + return get(valueBuffer, index, byteWidth); } public int getByteWidth() { diff --git a/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java index f38627b933..fe798494c9 100644 --- a/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java @@ -16,6 +16,8 @@ */ package org.apache.arrow.vector; +import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.ReusableBuffer; import org.apache.arrow.vector.complex.impl.LargeVarBinaryReaderImpl; @@ -95,7 +97,7 @@ public MinorType getMinorType() { */ public byte[] get(int index) { assert index >= 0; - if (isSet(index) == 0) { + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { return null; } final long startOffset = getStartOffset(index); @@ -127,7 +129,14 @@ public void read(int index, ReusableBuffer buffer) { */ @Override public byte[] getObject(int index) { - return get(index); + if (isSet(index) == 0) { + return null; + } + final long startOffset = getStartOffset(index); + final long dataLength = getEndOffset(index) - startOffset; + final byte[] result = new byte[(int) dataLength]; + valueBuffer.getBytes(startOffset, result, 0, (int) dataLength); + return result; } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java index 07a9a172f0..b7a765f310 100644 --- a/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java @@ -101,7 +101,7 @@ public Types.MinorType getMinorType() { @Override public byte[] get(int index) { assert index >= 0; - if (isSet(index) == 0) { + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { return null; } final long startOffset = getStartOffset(index); @@ -120,7 +120,7 @@ public byte[] get(int index) { @Override public Text getObject(int index) { assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + if (isSet(index) == 0) { return null; } diff --git a/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 0a45409eb9..3a5058256c 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -264,7 +264,7 @@ public interface ValueVector extends Closeable, Iterable { * Get friendly type object from the vector. * * @param index index of object to get - * @return friendly type object + * @return friendly type object, null if value is unset */ Object getObject(int index); diff --git a/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java index 7196e9c910..ad76504f0f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java @@ -128,7 +128,15 @@ public void read(int index, ReusableBuffer buffer) { */ @Override public byte[] getObject(int index) { - return get(index); + if (isSet(index) == 0) { + return null; + } + + final int startOffset = getStartOffset(index); + final int dataLength = getEndOffset(index) - startOffset; + final byte[] result = new byte[dataLength]; + valueBuffer.getBytes(startOffset, result, 0, dataLength); + return result; } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java index c81e34558c..5ddc8b84d2 100644 --- a/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java @@ -117,7 +117,7 @@ public byte[] get(int index) { @Override public Text getObject(int index) { assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + if (isSet(index) == 0) { return null; } diff --git a/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java index 80d6952e00..f605fde458 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java @@ -122,7 +122,10 @@ public void read(int index, ReusableBuffer buffer) { */ @Override public byte[] getObject(int index) { - return get(index); + if(isSet(index) == 0) { + return null; + } + return getData(index); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java b/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java index dc474b68e3..9ce7f85ef6 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java @@ -115,7 +115,7 @@ public byte[] get(int index) { @Override public Text getObject(int index) { assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + if (isSet(index) == 0) { return null; } From b9bffb417c43c7e54e2d1e4ff870b6cec1909c8a Mon Sep 17 00:00:00 2001 From: An Qi Date: Mon, 6 Jan 2025 23:29:59 +0800 Subject: [PATCH 2/4] format --- .../main/java/org/apache/arrow/vector/ViewVarBinaryVector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java index f605fde458..c41854bb5f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java @@ -122,7 +122,7 @@ public void read(int index, ReusableBuffer buffer) { */ @Override public byte[] getObject(int index) { - if(isSet(index) == 0) { + if (isSet(index) == 0) { return null; } return getData(index); From d42d14cc9a78a102b6d43f2da5175e73b8b9cc8d Mon Sep 17 00:00:00 2001 From: An Qi Date: Mon, 6 Jan 2025 23:35:48 +0800 Subject: [PATCH 3/4] throw exception rather 'return null' --- .../main/java/org/apache/arrow/vector/LargeVarBinaryVector.java | 2 +- .../main/java/org/apache/arrow/vector/LargeVarCharVector.java | 2 +- .../src/main/java/org/apache/arrow/vector/VarBinaryVector.java | 2 +- vector/src/main/java/org/apache/arrow/vector/VarCharVector.java | 2 +- .../main/java/org/apache/arrow/vector/ViewVarBinaryVector.java | 2 +- .../main/java/org/apache/arrow/vector/ViewVarCharVector.java | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java index fe798494c9..30a86f7869 100644 --- a/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java @@ -98,7 +98,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; + throw new IllegalStateException("Value at index is null"); } final long startOffset = getStartOffset(index); final long dataLength = getEndOffset(index) - startOffset; diff --git a/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java index b7a765f310..003d3fb419 100644 --- a/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java @@ -102,7 +102,7 @@ public Types.MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; + throw new IllegalStateException("Value at index is null"); } final long startOffset = getStartOffset(index); final long dataLength = getEndOffset(index) - startOffset; diff --git a/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java index ad76504f0f..0fe845fcba 100644 --- a/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java @@ -97,7 +97,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; + throw new IllegalStateException("Value at index is null"); } final int startOffset = getStartOffset(index); final int dataLength = getEndOffset(index) - startOffset; diff --git a/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java index 5ddc8b84d2..03ea7e8821 100644 --- a/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java @@ -99,7 +99,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; + throw new IllegalStateException("Value at index is null"); } final int startOffset = getStartOffset(index); final int dataLength = getEndOffset(index) - startOffset; diff --git a/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java index c41854bb5f..0f89236d2f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java @@ -97,7 +97,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; + throw new IllegalStateException("Value at index is null"); } return getData(index); } diff --git a/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java b/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java index 9ce7f85ef6..26fdb6b82a 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java @@ -101,7 +101,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; + throw new IllegalStateException("Value at index is null"); } return getData(index); } From 5a2b461474de6ad63af06692cc7e2b709c3bd8a0 Mon Sep 17 00:00:00 2001 From: An Qi Date: Tue, 7 Jan 2025 00:14:35 +0800 Subject: [PATCH 4/4] Revert "throw exception rather 'return null'" This reverts commit 4ef7160087f9481bd1a54f332110430fd27f89e6. --- .../main/java/org/apache/arrow/vector/LargeVarBinaryVector.java | 2 +- .../main/java/org/apache/arrow/vector/LargeVarCharVector.java | 2 +- .../src/main/java/org/apache/arrow/vector/VarBinaryVector.java | 2 +- vector/src/main/java/org/apache/arrow/vector/VarCharVector.java | 2 +- .../main/java/org/apache/arrow/vector/ViewVarBinaryVector.java | 2 +- .../main/java/org/apache/arrow/vector/ViewVarCharVector.java | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java index 30a86f7869..fe798494c9 100644 --- a/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java @@ -98,7 +98,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); + return null; } final long startOffset = getStartOffset(index); final long dataLength = getEndOffset(index) - startOffset; diff --git a/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java index 003d3fb419..b7a765f310 100644 --- a/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java @@ -102,7 +102,7 @@ public Types.MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); + return null; } final long startOffset = getStartOffset(index); final long dataLength = getEndOffset(index) - startOffset; diff --git a/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java index 0fe845fcba..ad76504f0f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java @@ -97,7 +97,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); + return null; } final int startOffset = getStartOffset(index); final int dataLength = getEndOffset(index) - startOffset; diff --git a/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java index 03ea7e8821..5ddc8b84d2 100644 --- a/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java @@ -99,7 +99,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); + return null; } final int startOffset = getStartOffset(index); final int dataLength = getEndOffset(index) - startOffset; diff --git a/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java b/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java index 0f89236d2f..c41854bb5f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java @@ -97,7 +97,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); + return null; } return getData(index); } diff --git a/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java b/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java index 26fdb6b82a..9ce7f85ef6 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java @@ -101,7 +101,7 @@ public MinorType getMinorType() { public byte[] get(int index) { assert index >= 0; if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); + return null; } return getData(index); }