Skip to content

Commit e6da71e

Browse files
authored
GH-725: Added ExtensionReader (#726)
## What's Changed ExtensionReader was added to support reading extension types from a complex vector. It contains **read(ExtensionHolder)** method for reading to the holder. And **readObject** - for reading the value explicitly. Closes #725.
1 parent 7618274 commit e6da71e

File tree

13 files changed

+315
-6
lines changed

13 files changed

+315
-6
lines changed

vector/src/main/codegen/templates/AbstractFieldReader.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,23 @@ public void copyAsField(String name, ${name}Writer writer) {
108108
}
109109

110110
</#list></#list>
111+
112+
public void read(ExtensionHolder holder) {
113+
fail("Extension");
114+
}
115+
116+
public void read(int arrayIndex, ExtensionHolder holder) {
117+
fail("RepeatedExtension");
118+
}
119+
120+
public void copyAsValue(AbstractExtensionTypeWriter writer) {
121+
fail("CopyAsValueExtension");
122+
}
123+
124+
public void copyAsField(String name, AbstractExtensionTypeWriter writer) {
125+
fail("CopyAsFieldExtension");
126+
}
127+
111128
public FieldReader reader(String name) {
112129
fail("reader(String name)");
113130
return null;

vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ public MapWriter map(boolean keysSorted) {
295295
296296
@Override
297297
public ExtensionWriter extension(ArrowType arrowType) {
298-
return getWriter(MinorType.EXTENSIONTYPE).extension(arrowType);
298+
return getWriter(MinorType.LIST).extension(arrowType);
299299
}
300300
301301
@Override
@@ -325,7 +325,7 @@ public MapWriter map(String name, boolean keysSorted) {
325325
326326
@Override
327327
public ExtensionWriter extension(String name, ArrowType arrowType) {
328-
return getWriter(MinorType.EXTENSIONTYPE).extension(name, arrowType);
328+
return getWriter(MinorType.STRUCT).extension(name, arrowType);
329329
}
330330
331331
<#list vv.types as type><#list type.minor as minor>

vector/src/main/codegen/templates/BaseReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public interface RepeatedMapReader extends MapReader{
7373

7474
public interface ScalarReader extends
7575
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, </#list></#list>
76-
BaseReader {}
76+
ExtensionReader, BaseReader {}
7777

7878
interface ComplexReader{
7979
StructReader rootAsStruct();

vector/src/main/codegen/templates/NullReader.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ public void read(int arrayIndex, Nullable${name}Holder holder){
8686
}
8787
</#list></#list>
8888

89+
public void read(ExtensionHolder holder) {
90+
holder.isSet = 0;
91+
}
92+
8993
public int size(){
9094
return 0;
9195
}

vector/src/main/codegen/templates/PromotableWriter.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,10 @@ public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) {
550550
getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory);
551551
}
552552

553+
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory, ArrowType arrowType) {
554+
getWriter(MinorType.EXTENSIONTYPE, arrowType).addExtensionTypeWriterFactory(factory);
555+
}
556+
553557
@Override
554558
public void allocate() {
555559
getWriter().allocate();

vector/src/main/codegen/templates/UnionListWriter.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public class Union${listName}Writer extends AbstractFieldWriter {
5353
private boolean inStruct = false;
5454
private boolean listStarted = false;
5555
private String structName;
56+
private ArrowType extensionType;
5657
<#if listName == "LargeList" || listName == "LargeListView">
5758
private static final long OFFSET_WIDTH = 8;
5859
<#else>
@@ -203,8 +204,8 @@ public MapWriter map(String name, boolean keysSorted) {
203204
204205
@Override
205206
public ExtensionWriter extension(ArrowType arrowType) {
206-
writer.extension(arrowType);
207-
return writer;
207+
this.extensionType = arrowType;
208+
return this;
208209
}
209210
@Override
210211
public ExtensionWriter extension(String name, ArrowType arrowType) {
@@ -337,13 +338,17 @@ public void writeNull() {
337338
@Override
338339
public void writeExtension(Object value) {
339340
writer.writeExtension(value);
341+
writer.setPosition(writer.idx() + 1);
340342
}
343+
341344
@Override
342345
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
343-
writer.addExtensionTypeWriterFactory(var1);
346+
writer.addExtensionTypeWriterFactory(var1, extensionType);
344347
}
348+
345349
public void write(ExtensionHolder var1) {
346350
writer.write(var1);
351+
writer.setPosition(writer.idx() + 1);
347352
}
348353
349354
<#list vv.types as type>
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.complex.reader;
18+
19+
import org.apache.arrow.vector.holders.ExtensionHolder;
20+
21+
/** Interface for reading extension types. Extends the functionality of {@link BaseReader}. */
22+
public interface ExtensionReader extends BaseReader {
23+
24+
/**
25+
* Reads to the given extension holder.
26+
*
27+
* @param holder the {@link ExtensionHolder} to read
28+
*/
29+
void read(ExtensionHolder holder);
30+
31+
/**
32+
* Reads and returns an object representation of the extension type.
33+
*
34+
* @return the object representation of the extension type
35+
*/
36+
Object readObject();
37+
38+
/**
39+
* Checks if the current value is set.
40+
*
41+
* @return true if the value is set, false otherwise
42+
*/
43+
boolean isSet();
44+
}

vector/src/test/java/org/apache/arrow/vector/TestListVector.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,22 @@
2424
import static org.junit.jupiter.api.Assertions.assertThrows;
2525
import static org.junit.jupiter.api.Assertions.assertTrue;
2626

27+
import java.nio.ByteBuffer;
2728
import java.util.ArrayList;
2829
import java.util.Arrays;
2930
import java.util.List;
31+
import java.util.UUID;
3032
import org.apache.arrow.memory.ArrowBuf;
3133
import org.apache.arrow.memory.BufferAllocator;
3234
import org.apache.arrow.util.AutoCloseables;
3335
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
3436
import org.apache.arrow.vector.complex.ListVector;
37+
import org.apache.arrow.vector.complex.impl.UnionListReader;
3538
import org.apache.arrow.vector.complex.impl.UnionListWriter;
39+
import org.apache.arrow.vector.complex.impl.UuidWriterFactory;
3640
import org.apache.arrow.vector.complex.reader.FieldReader;
41+
import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter;
42+
import org.apache.arrow.vector.holder.UuidHolder;
3743
import org.apache.arrow.vector.holders.DurationHolder;
3844
import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
3945
import org.apache.arrow.vector.holders.TimeStampMilliTZHolder;
@@ -42,6 +48,7 @@
4248
import org.apache.arrow.vector.types.pojo.ArrowType;
4349
import org.apache.arrow.vector.types.pojo.Field;
4450
import org.apache.arrow.vector.types.pojo.FieldType;
51+
import org.apache.arrow.vector.types.pojo.UuidType;
4552
import org.apache.arrow.vector.util.TransferPair;
4653
import org.junit.jupiter.api.AfterEach;
4754
import org.junit.jupiter.api.BeforeEach;
@@ -1199,6 +1206,71 @@ public void testGetTransferPairWithField() {
11991206
}
12001207
}
12011208

1209+
@Test
1210+
public void testListVectorWithExtensionType() throws Exception {
1211+
final FieldType type = FieldType.nullable(new UuidType());
1212+
try (final ListVector inVector = new ListVector("list", allocator, type, null)) {
1213+
UnionListWriter writer = inVector.getWriter();
1214+
writer.allocate();
1215+
writer.setPosition(0);
1216+
UUID u1 = UUID.randomUUID();
1217+
UUID u2 = UUID.randomUUID();
1218+
writer.startList();
1219+
ExtensionWriter extensionWriter = writer.extension(new UuidType());
1220+
extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory());
1221+
extensionWriter.writeExtension(u1);
1222+
extensionWriter.writeExtension(u2);
1223+
writer.endList();
1224+
1225+
writer.setValueCount(1);
1226+
1227+
FieldReader reader = inVector.getReader();
1228+
assertTrue(reader.isSet(), "shouldn't be null");
1229+
Object result = inVector.getObject(0);
1230+
ArrayList<UUID> resultSet = (ArrayList<UUID>) result;
1231+
assertEquals(2, resultSet.size());
1232+
assertEquals(u1, resultSet.get(0));
1233+
assertEquals(u2, resultSet.get(1));
1234+
}
1235+
}
1236+
1237+
@Test
1238+
public void testListVectorReaderForExtensionType() throws Exception {
1239+
final FieldType type = FieldType.nullable(new UuidType());
1240+
try (final ListVector inVector = new ListVector("list", allocator, type, null)) {
1241+
UnionListWriter writer = inVector.getWriter();
1242+
writer.allocate();
1243+
writer.setPosition(0);
1244+
UUID u1 = UUID.randomUUID();
1245+
UUID u2 = UUID.randomUUID();
1246+
writer.startList();
1247+
ExtensionWriter extensionWriter = writer.extension(new UuidType());
1248+
extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory());
1249+
extensionWriter.writeExtension(u1);
1250+
extensionWriter.writeExtension(u2);
1251+
writer.endList();
1252+
1253+
writer.setValueCount(1);
1254+
1255+
UnionListReader reader = inVector.getReader();
1256+
assertTrue(reader.isSet(), "shouldn't be null");
1257+
reader.setPosition(0);
1258+
reader.next();
1259+
FieldReader uuidReader = reader.reader();
1260+
UuidHolder holder = new UuidHolder();
1261+
uuidReader.read(holder);
1262+
ByteBuffer bb = ByteBuffer.wrap(holder.value);
1263+
UUID actualUuid = new UUID(bb.getLong(), bb.getLong());
1264+
assertEquals(u1, actualUuid);
1265+
reader.next();
1266+
uuidReader = reader.reader();
1267+
uuidReader.read(holder);
1268+
bb = ByteBuffer.wrap(holder.value);
1269+
actualUuid = new UUID(bb.getLong(), bb.getLong());
1270+
assertEquals(u2, actualUuid);
1271+
}
1272+
}
1273+
12021274
private void writeIntValues(UnionListWriter writer, int[] values) {
12031275
writer.startList();
12041276
for (int v : values) {

vector/src/test/java/org/apache/arrow/vector/UuidVector.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
import java.util.UUID;
2121
import org.apache.arrow.memory.BufferAllocator;
2222
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
23+
import org.apache.arrow.vector.complex.impl.UuidReaderImpl;
24+
import org.apache.arrow.vector.complex.reader.FieldReader;
25+
import org.apache.arrow.vector.holder.UuidHolder;
2326
import org.apache.arrow.vector.types.pojo.Field;
2427
import org.apache.arrow.vector.types.pojo.FieldType;
2528
import org.apache.arrow.vector.types.pojo.UuidType;
@@ -79,11 +82,21 @@ public TransferPair makeTransferPair(ValueVector to) {
7982
return new TransferImpl((UuidVector) to);
8083
}
8184

85+
@Override
86+
protected FieldReader getReaderImpl() {
87+
return new UuidReaderImpl(this);
88+
}
89+
8290
public void setSafe(int index, byte[] value) {
8391
getUnderlyingVector().setIndexDefined(index);
8492
getUnderlyingVector().setSafe(index, value);
8593
}
8694

95+
public void get(int index, UuidHolder holder) {
96+
holder.value = getUnderlyingVector().get(index);
97+
holder.isSet = 1;
98+
}
99+
87100
public class TransferImpl implements TransferPair {
88101
UuidVector to;
89102
ValueVector targetUnderlyingVector;

vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -805,4 +805,29 @@ public void testExtensionType() throws Exception {
805805
assertEquals(u2, uuidVector.getObject(1));
806806
}
807807
}
808+
809+
@Test
810+
public void testExtensionTypeForList() throws Exception {
811+
try (final ListVector container = ListVector.empty(EMPTY_SCHEMA_PATH, allocator);
812+
final UuidVector v =
813+
(UuidVector) container.addOrGetVector(FieldType.nullable(new UuidType())).getVector();
814+
final PromotableWriter writer = new PromotableWriter(v, container)) {
815+
UUID u1 = UUID.randomUUID();
816+
UUID u2 = UUID.randomUUID();
817+
container.allocateNew();
818+
container.setValueCount(1);
819+
writer.addExtensionTypeWriterFactory(new UuidWriterFactory());
820+
821+
writer.setPosition(0);
822+
writer.writeExtension(u1);
823+
writer.setPosition(1);
824+
writer.writeExtension(u2);
825+
826+
container.setValueCount(2);
827+
828+
UuidVector uuidVector = (UuidVector) container.getDataVector();
829+
assertEquals(u1, uuidVector.getObject(0));
830+
assertEquals(u2, uuidVector.getObject(1));
831+
}
832+
}
808833
}

0 commit comments

Comments
 (0)