From fc7d55d5541f28405c8164d3b0d1cbbbbb725110 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Tue, 1 Apr 2025 11:08:51 +0300 Subject: [PATCH 1/8] GH-87: [Java][Vector] Add ExtensionWriter for List/Struct. Based on changes from https://github.com/apache/arrow/pull/41731. Added writer ExtensionWriter with 3 methods: - write method for writing values from Extension holders; - writeExtensionType method for writing values (arguments is Object because we don't know exact type); - addExtensionTypeFactory method - because exact vector and value type are unknown, user should create their own extension type vector, writer for it and ExtensionTypeFactory where it should map vector and writer. --- .../templates/AbstractFieldWriter.java | 22 +++++ .../AbstractPromotableFieldWriter.java | 10 ++ .../main/codegen/templates/BaseWriter.java | 9 ++ .../codegen/templates/PromotableWriter.java | 14 +++ .../main/codegen/templates/StructWriters.java | 26 +++++ .../codegen/templates/UnionListWriter.java | 23 +++++ .../codegen/templates/UnionMapWriter.java | 12 +++ .../main/codegen/templates/UnionWriter.java | 20 ++++ .../impl/ExtensionTypeWriterFactory.java | 23 +++++ .../complex/impl/UnionExtensionWriter.java | 78 +++++++++++++++ .../vector/complex/writer/FieldWriter.java | 2 +- .../arrow/vector/holders/ExtensionHolder.java | 21 ++++ .../apache/arrow/vector/TestStructVector.java | 36 +++++++ .../complex/impl/TestPromotableWriter.java | 95 +++++++++++++++++++ .../vector/types/pojo/TestExtensionType.java | 55 ++++++++++- 15 files changed, 443 insertions(+), 3 deletions(-) create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java diff --git a/vector/src/main/codegen/templates/AbstractFieldWriter.java b/vector/src/main/codegen/templates/AbstractFieldWriter.java index cc2cc618d8..d1617c3936 100644 --- a/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -107,6 +107,16 @@ public void endEntry() { throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); } + public void write(T var1) { + this.fail("ExtensionType"); + } + public void writeExtensionType(Object var1) { + this.fail("ExtensionType"); + } + public void addExtensionTypeFactory(T var1) { + this.fail("ExtensionType"); + } + <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> @@ -241,6 +251,18 @@ public MapWriter map(String name, boolean keysSorted) { fail("Map"); return null; } + + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + fail("Extension"); + return null; + } + + @Override + public ExtensionWriter extension(ArrowType arrowType) { + fail("Extension"); + return null; + } <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 06cb235f7d..951edd5eee 100644 --- a/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -293,6 +293,11 @@ public MapWriter map(boolean keysSorted) { return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted)); } + @Override + public ExtensionWriter extension(ArrowType arrowType) { + return getWriter(MinorType.EXTENSIONTYPE).extension(arrowType); + } + @Override public StructWriter struct(String name) { return getWriter(MinorType.STRUCT).struct(name); @@ -318,6 +323,11 @@ public MapWriter map(String name, boolean keysSorted) { return getWriter(MinorType.STRUCT).map(name, keysSorted); } + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + return getWriter(MinorType.EXTENSIONTYPE).extension(name, arrowType); + } + <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/vector/src/main/codegen/templates/BaseWriter.java b/vector/src/main/codegen/templates/BaseWriter.java index e952d46f1f..627381478f 100644 --- a/vector/src/main/codegen/templates/BaseWriter.java +++ b/vector/src/main/codegen/templates/BaseWriter.java @@ -61,6 +61,7 @@ public interface StructWriter extends BaseWriter { void copyReaderToField(String name, FieldReader reader); StructWriter struct(String name); + ExtensionWriter extension(String name, ArrowType arrowType); ListWriter list(String name); ListWriter listView(String name); MapWriter map(String name); @@ -79,6 +80,7 @@ public interface ListWriter extends BaseWriter { ListWriter listView(); MapWriter map(); MapWriter map(boolean keysSorted); + ExtensionWriter extension(ArrowType arrowType); void copyReader(FieldReader reader); <#list vv.types as type><#list type.minor as minor> @@ -101,6 +103,13 @@ public interface MapWriter extends ListWriter { MapWriter value(); } + public interface ExtensionWriter extends BaseWriter { + void writeNull(); + void write(T var1); + void writeExtensionType(Object var1); + void addExtensionTypeFactory(T var1); + } + public interface ScalarWriter extends <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, BaseWriter {} diff --git a/vector/src/main/codegen/templates/PromotableWriter.java b/vector/src/main/codegen/templates/PromotableWriter.java index c0e686f317..3bed594b3f 100644 --- a/vector/src/main/codegen/templates/PromotableWriter.java +++ b/vector/src/main/codegen/templates/PromotableWriter.java @@ -285,6 +285,9 @@ protected void setWriter(ValueVector v) { case UNION: writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory); break; + case EXTENSIONTYPE: + writer = new UnionExtensionWriter((ExtensionTypeVector) vector); + break; default: writer = type.getNewFieldWriter(vector); break; @@ -316,6 +319,7 @@ protected boolean requiresArrowType(MinorType type) { || type == MinorType.MAP || type == MinorType.DURATION || type == MinorType.FIXEDSIZEBINARY + || type == MinorType.EXTENSIONTYPE || (type.name().startsWith("TIMESTAMP") && type.name().endsWith("TZ")); } @@ -536,6 +540,16 @@ public void writeLargeVarChar(String value) { getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value); } + @Override + public void writeExtensionType(Object value) { + getWriter(MinorType.EXTENSIONTYPE).writeExtensionType(value); + } + + @Override + public void addExtensionTypeFactory(T var1) { + getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeFactory(var1); + } + @Override public void allocate() { getWriter().allocate(); diff --git a/vector/src/main/codegen/templates/StructWriters.java b/vector/src/main/codegen/templates/StructWriters.java index 3e6b9fd773..413f707c70 100644 --- a/vector/src/main/codegen/templates/StructWriters.java +++ b/vector/src/main/codegen/templates/StructWriters.java @@ -83,6 +83,9 @@ public class ${mode}StructWriter extends AbstractFieldWriter { fields.put(handleCase(child.getName()), writer); break; } + case EXTENSIONTYPE: + extension(child.getName(), child.getType()); + break; case UNION: FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null); UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory()); @@ -159,6 +162,29 @@ public StructWriter struct(String name) { return writer; } + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + String finalName = handleCase(name); + FieldWriter writer = fields.get(finalName); + if(writer == null){ + int vectorCount=container.size(); + FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null); + ExtensionTypeVector vector = container.addOrGet(name, fieldType, ExtensionTypeVector.class); + writer = new PromotableWriter(vector, container, getNullableStructWriterFactory()); + if(vectorCount != container.size()) { + writer.allocate(); + } + writer.setPosition(idx()); + fields.put(finalName, writer); + } else { + if (writer instanceof PromotableWriter) { + // ensure writers are initialized + ((PromotableWriter)writer).getWriter(MinorType.EXTENSIONTYPE, arrowType); + } + } + return (ExtensionWriter) writer; + } + @Override public void close() throws Exception { clear(); diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java index 3962e1d073..037ab818d4 100644 --- a/vector/src/main/codegen/templates/UnionListWriter.java +++ b/vector/src/main/codegen/templates/UnionListWriter.java @@ -201,6 +201,17 @@ public MapWriter map(String name, boolean keysSorted) { return mapWriter; } + @Override + public ExtensionWriter extension(ArrowType arrowType) { + writer.extension(arrowType); + return writer; + } + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + ExtensionWriter extensionWriter = writer.extension(name, arrowType); + return extensionWriter; + } + <#if listName == "LargeList"> @Override public void startList() { @@ -323,6 +334,18 @@ public void writeNull() { } } + @Override + public void writeExtensionType(Object value) { + writer.writeExtensionType(value); + } + @Override + public void addExtensionTypeFactory(T var1) { + writer.addExtensionTypeFactory(var1); + } + public void write(T var1) { + writer.write(var1); + } + <#list vv.types as type> <#list type.minor as minor> <#assign name = minor.class?cap_first /> diff --git a/vector/src/main/codegen/templates/UnionMapWriter.java b/vector/src/main/codegen/templates/UnionMapWriter.java index 90b55cb65e..8b2f091215 100644 --- a/vector/src/main/codegen/templates/UnionMapWriter.java +++ b/vector/src/main/codegen/templates/UnionMapWriter.java @@ -231,4 +231,16 @@ public MapWriter map() { return super.map(); } } + + @Override + public ExtensionWriter extension(ArrowType type) { + switch (mode) { + case KEY: + return entryWriter.extension(MapVector.KEY_NAME, type); + case VALUE: + return entryWriter.extension(MapVector.VALUE_NAME, type); + default: + return super.extension(type); + } + } } diff --git a/vector/src/main/codegen/templates/UnionWriter.java b/vector/src/main/codegen/templates/UnionWriter.java index bfe97e2770..272edab17c 100644 --- a/vector/src/main/codegen/templates/UnionWriter.java +++ b/vector/src/main/codegen/templates/UnionWriter.java @@ -213,6 +213,10 @@ public MapWriter asMap(ArrowType arrowType) { return getMapWriter(arrowType); } + private ExtensionWriter getExtensionWriter(ArrowType arrowType) { + throw new UnsupportedOperationException("ExtensionTypes are not supported yet."); + } + BaseWriter getWriter(MinorType minorType) { return getWriter(minorType, null); } @@ -227,6 +231,8 @@ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) { return getListViewWriter(); case MAP: return getMapWriter(arrowType); + case EXTENSIONTYPE: + return getExtensionWriter(arrowType); <#list vv.types as type> <#list type.minor as minor> <#assign name = minor.class?cap_first /> @@ -460,6 +466,20 @@ public MapWriter map(String name, boolean keysSorted) { return getStructWriter().map(name, keysSorted); } + @Override + public ExtensionWriter extension(ArrowType arrowType) { + data.setType(idx(), MinorType.EXTENSIONTYPE); + getListWriter().setPosition(idx()); + return getListWriter().extension(arrowType); + } + + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + data.setType(idx(), MinorType.EXTENSIONTYPE); + getStructWriter().setPosition(idx()); + return getStructWriter().extension(name, arrowType); + } + <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java new file mode 100644 index 0000000000..6c77af88f3 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; + +public interface ExtensionTypeWriterFactory { + T getWriterImpl(ExtensionTypeVector vector); +} \ No newline at end of file diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java new file mode 100644 index 0000000000..9c7035b00c --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.pojo.Field; + +public class UnionExtensionWriter extends AbstractFieldWriter { + protected ExtensionTypeVector vector; + protected AbstractFieldWriter writer; + + public UnionExtensionWriter(ExtensionTypeVector vector) { + this.vector = vector; + } + + @Override + public void allocate() { + vector.allocateNew(); + } + + @Override + public void clear() { + vector.clear(); + } + + @Override + public int getValueCapacity() { + return vector.getValueCapacity(); + } + + @Override + public Field getField() { + return vector.getField(); + } + + @Override + public void close() throws Exception { + vector.close(); + } + + @Override + public void writeExtensionType(Object var1) { + this.writer.writeExtensionType(var1); + } + + @Override + public void addExtensionTypeFactory(S var1) { + this.writer = var1.getWriterImpl(vector); + this.writer.setPosition(idx()); + } + + public void write(T var1) { + this.writer.write(var1); + } + + @Override + public void setPosition(int index) { + super.setPosition(index); + if (this.writer != null) { + this.writer.setPosition(index); + } + } +} \ No newline at end of file diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java index 949eb35d8e..d4246176fd 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java @@ -25,7 +25,7 @@ * Composite of all writer types. Writers are convenience classes for incrementally adding values to * {@linkplain org.apache.arrow.vector.ValueVector}s. */ -public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter { +public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter, ExtensionWriter { void allocate(); void clear(); diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java new file mode 100644 index 0000000000..ead880130d --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holders; + +public abstract class ExtensionHolder implements ValueHolder { + public int isSet; +} \ No newline at end of file diff --git a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 4ef0fbe2d9..ef60bfc7f4 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -37,9 +37,11 @@ import org.apache.arrow.vector.holders.ComplexHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.TestExtensionType; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -336,6 +338,40 @@ public void testGetTransferPairWithFieldAndCallBack() { } } + @Test + public void testStructVectorWithExtensionTypes() { + TestExtensionType.UuidType uuidType = new TestExtensionType.UuidType(); + Field uuidField = new Field("struct_child", FieldType.nullable(uuidType), null); + Field structField = + new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); + StructVector s1 = new StructVector(structField, allocator, null); + StructVector s2 = (StructVector) structField.createVector(allocator); + s1.close(); + s2.close(); + } + + @Test + public void testStructVectorTransferPairWithExtensionType() { + TestExtensionType.UuidType uuidType = new TestExtensionType.UuidType(); + Field uuidField = new Field("uuid_child", FieldType.nullable(uuidType), null); + Field structField = + new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); + + StructVector s1 = (StructVector) structField.createVector(allocator); + TestExtensionType.UuidVector uuidVector = + s1.addOrGet("uuid_child", FieldType.nullable(uuidType), TestExtensionType.UuidVector.class); + s1.setValueCount(1); + uuidVector.set(0, new UUID(1, 2)); + s1.setIndexDefined(0); + + TransferPair tp = s1.getTransferPair(structField, allocator); + final StructVector toVector = (StructVector) tp.getTo(); + assertEquals(s1.getField(), toVector.getField()); + + s1.close(); + toVector.close(); + } + private StructVector simpleStructVector(String name, BufferAllocator allocator) { final String INT_COL = "struct_int_child"; final String FLT_COL = "struct_flt_child"; diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index a791e55135..36ba75b9d9 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -26,10 +26,12 @@ import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.util.Objects; +import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.DirtyRootAllocator; +import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.VarBinaryVector; @@ -52,6 +54,8 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.TestExtensionType.UuidType; +import org.apache.arrow.vector.types.pojo.TestExtensionType.UuidVector; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; @@ -776,4 +780,95 @@ public void testPromoteToUnionFromDecimal() throws Exception { assertEquals(1, intHolder.value); } } + + @Test + public void testExtensionType() throws Exception { + try (final NonNullableStructVector container = + NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final UuidVector v = + container.addOrGet("uuid", FieldType.nullable(new UuidType()), UuidVector.class); + final PromotableWriter writer = new PromotableWriter(v, container)) { + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + container.allocateNew(); + container.setValueCount(1); + writer.addExtensionTypeFactory(new UuidWriterFactory()); + + writer.setPosition(0); + writer.writeExtensionType(u1); + writer.setPosition(1); + writer.writeExtensionType(u2); + + container.setValueCount(2); + + UuidVector uuidVector = (UuidVector) container.getChild("uuid"); + assertEquals(u1, uuidVector.getObject(0)); + assertEquals(u2, uuidVector.getObject(1)); + } + } + + public class UuidWriterFactory implements ExtensionTypeWriterFactory { + + @Override + public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) { + if (extensionTypeVector instanceof UuidVector) { + return new UuidWriterImpl((UuidVector) extensionTypeVector); + } + return null; + } + } + + public class UuidWriterImpl extends AbstractFieldWriter { + private final UuidVector vector; + + public UuidWriterImpl(UuidVector vector) { + this.vector = vector; + } + + @Override + public Field getField() { + return this.vector.getField(); + } + + @Override + public int getValueCapacity() { + return this.vector.getValueCapacity(); + } + + @Override + public void allocate() { + this.vector.allocateNew(); + } + + @Override + public void close() { + this.vector.close(); + } + + @Override + public void clear() { + this.vector.clear(); + } + + @Override + protected int idx() { + return super.idx(); + } + + @Override + public void writeExtensionType(Object var1) { + UUID uuid = (UUID) var1; + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + this.vector.setSafe(this.idx(), bb.array()); + this.vector.setValueCount(this.idx() + 1); + } + + @Override + public void writeNull() { + this.vector.setNull(this.idx()); + this.vector.setValueCount(this.idx() + 1); + } + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index 8f54a6e5d7..9ab681789a 100644 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -295,7 +295,7 @@ public void testVectorCompare() { } } - static class UuidType extends ExtensionType { + public static class UuidType extends ExtensionType { @Override public ArrowType storageType() { @@ -332,12 +332,14 @@ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocato } } - static class UuidVector extends ExtensionTypeVector + public static class UuidVector extends ExtensionTypeVector implements ValueIterableVector { + private final Field field; public UuidVector( String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { super(name, allocator, underlyingVector); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); } @Override @@ -362,6 +364,55 @@ public void set(int index, UUID uuid) { bb.putLong(uuid.getLeastSignificantBits()); getUnderlyingVector().set(index, bb.array()); } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); + } + + @Override + public Field getField() { + return field; + } + + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((UuidVector) to); + } + + public void setSafe(int index, byte[] value) { + getUnderlyingVector().setIndexDefined(index); + getUnderlyingVector().setSafe(index, value); + } + + public class TransferImpl implements TransferPair { + UuidVector to; + ValueVector targetUnderlyingVector; + TransferPair tp; + + public TransferImpl(UuidVector to) { + this.to = to; + targetUnderlyingVector = this.to.getUnderlyingVector(); + tp = getUnderlyingVector().makeTransferPair(targetUnderlyingVector); + } + + public UuidVector getTo() { + return this.to; + } + + public void transfer() { + tp.transfer(); + } + + public void splitAndTransfer(int startIndex, int length) { + tp.splitAndTransfer(startIndex, length); + } + + public void copyValueSafe(int fromIndex, int toIndex) { + tp.copyValueSafe(fromIndex, toIndex); + } + } } static class LocationType extends ExtensionType { From 9f96325cc3369b029736d0203d19a98499e4ea64 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Thu, 3 Apr 2025 19:36:25 +0300 Subject: [PATCH 2/8] GH-87: fixed build --- .../arrow/vector/complex/impl/ExtensionTypeWriterFactory.java | 2 +- .../apache/arrow/vector/complex/impl/UnionExtensionWriter.java | 2 +- .../java/org/apache/arrow/vector/holders/ExtensionHolder.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java index 6c77af88f3..19f2c296ae 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java @@ -20,4 +20,4 @@ public interface ExtensionTypeWriterFactory { T getWriterImpl(ExtensionTypeVector vector); -} \ No newline at end of file +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java index 9c7035b00c..1e7bbd9ca8 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java @@ -75,4 +75,4 @@ public void setPosition(int index) { this.writer.setPosition(index); } } -} \ No newline at end of file +} diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java index ead880130d..aff139a13d 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java @@ -18,4 +18,4 @@ public abstract class ExtensionHolder implements ValueHolder { public int isSet; -} \ No newline at end of file +} From 867c797822e5dd696f12d786527d08dd271b4a1a Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Fri, 4 Apr 2025 09:57:05 +0300 Subject: [PATCH 3/8] GH-87: fixed build --- .../impl/AbstractExtensionTypeWriter.java | 71 ++++++++ .../impl/ExtensionTypeWriterFactory.java | 6 + .../vector/complex/writer/FieldWriter.java | 4 +- .../arrow/vector/holders/ExtensionHolder.java | 1 + .../apache/arrow/vector/TestStructVector.java | 11 +- .../complex/impl/TestPromotableWriter.java | 49 +----- .../vector/types/pojo/TestExtensionType.java | 122 +------------- .../vector/types/pojo/TestUuidVector.java | 152 ++++++++++++++++++ 8 files changed, 247 insertions(+), 169 deletions(-) create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/types/pojo/TestUuidVector.java diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java new file mode 100644 index 0000000000..6c6c29dcf5 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.types.pojo.Field; + +/** + * Base {@link AbstractFieldWriter} class for an {@link + * org.apache.arrow.vector.ExtensionTypeVector}. + * + * @param a specific {@link ExtensionTypeVector}. + */ +public class AbstractExtensionTypeWriter + extends AbstractFieldWriter { + protected final T vector; + + public AbstractExtensionTypeWriter(T vector) { + this.vector = vector; + } + + @Override + public Field getField() { + return this.vector.getField(); + } + + @Override + public int getValueCapacity() { + return this.vector.getValueCapacity(); + } + + @Override + public void allocate() { + this.vector.allocateNew(); + } + + @Override + public void close() { + this.vector.close(); + } + + @Override + public void clear() { + this.vector.clear(); + } + + @Override + protected int idx() { + return super.idx(); + } + + @Override + public void writeNull() { + this.vector.setNull(this.idx()); + this.vector.setValueCount(this.idx() + 1); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java index 19f2c296ae..ad6f29eb5c 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java @@ -18,6 +18,12 @@ import org.apache.arrow.vector.ExtensionTypeVector; +/** + * A factory for {@link ExtensionTypeWriter} instances. The factory allow to configure writer + * implementation for specific ExtensionTypeVector. + * + * @param writer implementation for specific {@link ExtensionTypeVector}. + */ public interface ExtensionTypeWriterFactory { T getWriterImpl(ExtensionTypeVector vector); } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java index d4246176fd..51bf106685 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java @@ -16,6 +16,7 @@ */ package org.apache.arrow.vector.complex.writer; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ScalarWriter; @@ -25,7 +26,8 @@ * Composite of all writer types. Writers are convenience classes for incrementally adding values to * {@linkplain org.apache.arrow.vector.ValueVector}s. */ -public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter, ExtensionWriter { +public interface FieldWriter + extends StructWriter, ListWriter, MapWriter, ScalarWriter, ExtensionWriter { void allocate(); void clear(); diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java index aff139a13d..fc7ed85878 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java @@ -16,6 +16,7 @@ */ package org.apache.arrow.vector.holders; +/** Base {@link ValueHolder} class for a {@link org.apache.arrow.vector.ExtensionTypeVector}. */ public abstract class ExtensionHolder implements ValueHolder { public int isSet; } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index ef60bfc7f4..b17f0a9530 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -26,6 +26,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.AbstractStructVector; import org.apache.arrow.vector.complex.ListVector; @@ -41,7 +42,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.TestExtensionType; +import org.apache.arrow.vector.types.pojo.TestUuidVector; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -340,7 +341,7 @@ public void testGetTransferPairWithFieldAndCallBack() { @Test public void testStructVectorWithExtensionTypes() { - TestExtensionType.UuidType uuidType = new TestExtensionType.UuidType(); + TestUuidVector.UuidType uuidType = new TestUuidVector.UuidType(); Field uuidField = new Field("struct_child", FieldType.nullable(uuidType), null); Field structField = new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); @@ -352,14 +353,14 @@ public void testStructVectorWithExtensionTypes() { @Test public void testStructVectorTransferPairWithExtensionType() { - TestExtensionType.UuidType uuidType = new TestExtensionType.UuidType(); + TestUuidVector.UuidType uuidType = new TestUuidVector.UuidType(); Field uuidField = new Field("uuid_child", FieldType.nullable(uuidType), null); Field structField = new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); StructVector s1 = (StructVector) structField.createVector(allocator); - TestExtensionType.UuidVector uuidVector = - s1.addOrGet("uuid_child", FieldType.nullable(uuidType), TestExtensionType.UuidVector.class); + TestUuidVector.UuidVector uuidVector = + s1.addOrGet("uuid_child", FieldType.nullable(uuidType), TestUuidVector.UuidVector.class); s1.setValueCount(1); uuidVector.set(0, new UUID(1, 2)); s1.setIndexDefined(0); diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index 36ba75b9d9..f7cd506ef5 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -54,8 +54,8 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.TestExtensionType.UuidType; -import org.apache.arrow.vector.types.pojo.TestExtensionType.UuidVector; +import org.apache.arrow.vector.types.pojo.TestUuidVector.UuidType; +import org.apache.arrow.vector.types.pojo.TestUuidVector.UuidVector; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; @@ -784,7 +784,7 @@ public void testPromoteToUnionFromDecimal() throws Exception { @Test public void testExtensionType() throws Exception { try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); final UuidVector v = container.addOrGet("uuid", FieldType.nullable(new UuidType()), UuidVector.class); final PromotableWriter writer = new PromotableWriter(v, container)) { @@ -818,41 +818,10 @@ public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector } } - public class UuidWriterImpl extends AbstractFieldWriter { - private final UuidVector vector; + public class UuidWriterImpl extends AbstractExtensionTypeWriter { public UuidWriterImpl(UuidVector vector) { - this.vector = vector; - } - - @Override - public Field getField() { - return this.vector.getField(); - } - - @Override - public int getValueCapacity() { - return this.vector.getValueCapacity(); - } - - @Override - public void allocate() { - this.vector.allocateNew(); - } - - @Override - public void close() { - this.vector.close(); - } - - @Override - public void clear() { - this.vector.clear(); - } - - @Override - protected int idx() { - return super.idx(); + super(vector); } @Override @@ -861,13 +830,7 @@ public void writeExtensionType(Object var1) { ByteBuffer bb = ByteBuffer.allocate(16); bb.putLong(uuid.getMostSignificantBits()); bb.putLong(uuid.getLeastSignificantBits()); - this.vector.setSafe(this.idx(), bb.array()); - this.vector.setValueCount(this.idx() + 1); - } - - @Override - public void writeNull() { - this.vector.setNull(this.idx()); + ((UuidVector) this.vector).setSafe(this.idx(), bb.array()); this.vector.setValueCount(this.idx() + 1); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index 9ab681789a..0c4feb5fb1 100644 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -50,6 +50,8 @@ import org.apache.arrow.vector.ipc.ArrowFileWriter; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.apache.arrow.vector.types.pojo.TestUuidVector.UuidType; +import org.apache.arrow.vector.types.pojo.TestUuidVector.UuidVector; import org.apache.arrow.vector.util.VectorBatchAppender; import org.apache.arrow.vector.validate.ValidateVectorVisitor; import org.junit.jupiter.api.Test; @@ -295,126 +297,6 @@ public void testVectorCompare() { } } - public static class UuidType extends ExtensionType { - - @Override - public ArrowType storageType() { - return new ArrowType.FixedSizeBinary(16); - } - - @Override - public String extensionName() { - return "uuid"; - } - - @Override - public boolean extensionEquals(ExtensionType other) { - return other instanceof UuidType; - } - - @Override - public ArrowType deserialize(ArrowType storageType, String serializedData) { - if (!storageType.equals(storageType())) { - throw new UnsupportedOperationException( - "Cannot construct UuidType from underlying type " + storageType); - } - return new UuidType(); - } - - @Override - public String serialize() { - return ""; - } - - @Override - public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); - } - } - - public static class UuidVector extends ExtensionTypeVector - implements ValueIterableVector { - private final Field field; - - public UuidVector( - String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { - super(name, allocator, underlyingVector); - this.field = new Field(name, FieldType.nullable(new UuidType()), null); - } - - @Override - public UUID getObject(int index) { - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - return new UUID(bb.getLong(), bb.getLong()); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } - - public void set(int index, UUID uuid) { - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - getUnderlyingVector().set(index, bb.array()); - } - - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - getUnderlyingVector() - .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); - } - - @Override - public Field getField() { - return field; - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((UuidVector) to); - } - - public void setSafe(int index, byte[] value) { - getUnderlyingVector().setIndexDefined(index); - getUnderlyingVector().setSafe(index, value); - } - - public class TransferImpl implements TransferPair { - UuidVector to; - ValueVector targetUnderlyingVector; - TransferPair tp; - - public TransferImpl(UuidVector to) { - this.to = to; - targetUnderlyingVector = this.to.getUnderlyingVector(); - tp = getUnderlyingVector().makeTransferPair(targetUnderlyingVector); - } - - public UuidVector getTo() { - return this.to; - } - - public void transfer() { - tp.transfer(); - } - - public void splitAndTransfer(int startIndex, int length) { - tp.splitAndTransfer(startIndex, length); - } - - public void copyValueSafe(int fromIndex, int toIndex) { - tp.copyValueSafe(fromIndex, toIndex); - } - } - } - static class LocationType extends ExtensionType { @Override diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestUuidVector.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestUuidVector.java new file mode 100644 index 0000000000..feb6cf8765 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestUuidVector.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.types.pojo; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.ValueIterableVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.apache.arrow.vector.util.TransferPair; + +public class TestUuidVector { + + public static class UuidType extends ExtensionType { + + @Override + public ArrowType storageType() { + return new ArrowType.FixedSizeBinary(16); + } + + @Override + public String extensionName() { + return "uuid"; + } + + @Override + public boolean extensionEquals(ExtensionType other) { + return other instanceof UuidType; + } + + @Override + public ArrowType deserialize(ArrowType storageType, String serializedData) { + if (!storageType.equals(storageType())) { + throw new UnsupportedOperationException( + "Cannot construct UuidType from underlying type " + storageType); + } + return new UuidType(); + } + + @Override + public String serialize() { + return ""; + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { + return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); + } + } + + public static class UuidVector extends ExtensionTypeVector + implements ValueIterableVector { + private final Field field; + + public UuidVector( + String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { + super(name, allocator, underlyingVector); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + @Override + public UUID getObject(int index) { + final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); + return new UUID(bb.getLong(), bb.getLong()); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + public void set(int index, UUID uuid) { + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + getUnderlyingVector().set(index, bb.array()); + } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); + } + + @Override + public Field getField() { + return field; + } + + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((UuidVector) to); + } + + public void setSafe(int index, byte[] value) { + getUnderlyingVector().setIndexDefined(index); + getUnderlyingVector().setSafe(index, value); + } + + public class TransferImpl implements TransferPair { + UuidVector to; + ValueVector targetUnderlyingVector; + TransferPair tp; + + public TransferImpl(UuidVector to) { + this.to = to; + targetUnderlyingVector = this.to.getUnderlyingVector(); + tp = getUnderlyingVector().makeTransferPair(targetUnderlyingVector); + } + + public UuidVector getTo() { + return this.to; + } + + public void transfer() { + tp.transfer(); + } + + public void splitAndTransfer(int startIndex, int length) { + tp.splitAndTransfer(startIndex, length); + } + + public void copyValueSafe(int fromIndex, int toIndex) { + tp.copyValueSafe(fromIndex, toIndex); + } + } + } +} From 4ab857cbe01a14789bd154e5f837a24874a0a83e Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Fri, 4 Apr 2025 15:45:08 +0300 Subject: [PATCH 4/8] GH-87: fixed comments --- .../templates/AbstractFieldWriter.java | 4 +- .../main/codegen/templates/BaseWriter.java | 28 +++- .../codegen/templates/PromotableWriter.java | 4 +- .../codegen/templates/UnionListWriter.java | 4 +- .../impl/ExtensionTypeWriterFactory.java | 17 +- .../complex/impl/UnionExtensionWriter.java | 11 +- .../apache/arrow/vector/TestStructVector.java | 10 +- .../org/apache/arrow/vector/UuidVector.java | 114 +++++++++++++ .../complex/impl/TestPromotableWriter.java | 33 +--- .../complex/impl/UuidWriterFactory.java | 31 ++++ .../vector/complex/impl/UuidWriterImpl.java | 49 ++++++ .../complex/writer/TestSimpleWriter.java | 20 +++ .../arrow/vector/holder/UuidHolder.java | 23 +++ .../vector/types/pojo/TestExtensionType.java | 3 +- .../vector/types/pojo/TestUuidVector.java | 152 ------------------ .../arrow/vector/types/pojo/UuidType.java | 60 +++++++ 16 files changed, 355 insertions(+), 208 deletions(-) create mode 100644 vector/src/test/java/org/apache/arrow/vector/UuidVector.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java delete mode 100644 vector/src/test/java/org/apache/arrow/vector/types/pojo/TestUuidVector.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java diff --git a/vector/src/main/codegen/templates/AbstractFieldWriter.java b/vector/src/main/codegen/templates/AbstractFieldWriter.java index d1617c3936..cd5af5994a 100644 --- a/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -107,13 +107,13 @@ public void endEntry() { throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); } - public void write(T var1) { + public void write(ExtensionHolder var1) { this.fail("ExtensionType"); } public void writeExtensionType(Object var1) { this.fail("ExtensionType"); } - public void addExtensionTypeFactory(T var1) { + public void addExtensionTypeFactory(ExtensionTypeWriterFactory var1) { this.fail("ExtensionType"); } diff --git a/vector/src/main/codegen/templates/BaseWriter.java b/vector/src/main/codegen/templates/BaseWriter.java index 627381478f..45aeac4665 100644 --- a/vector/src/main/codegen/templates/BaseWriter.java +++ b/vector/src/main/codegen/templates/BaseWriter.java @@ -104,10 +104,32 @@ public interface MapWriter extends ListWriter { } public interface ExtensionWriter extends BaseWriter { + + /** + * Writes a null value. + */ void writeNull(); - void write(T var1); - void writeExtensionType(Object var1); - void addExtensionTypeFactory(T var1); + + /** + * Writes vlaue from the given extension holder. + * + * @param holder the extension holder to write + */ + void write(ExtensionHolder holder); + + /** + * Writes the given extension type value. + * + * @param value the extension type value to write + */ + void writeExtensionType(Object value); + + /** + * Adds the given extension type factory. This factory allows configuring writer implementations for specific ExtensionTypeVector. + * + * @param factory the extension type factory to add + */ + void addExtensionTypeFactory(ExtensionTypeWriterFactory factory); } public interface ScalarWriter extends diff --git a/vector/src/main/codegen/templates/PromotableWriter.java b/vector/src/main/codegen/templates/PromotableWriter.java index 3bed594b3f..9a11d248f6 100644 --- a/vector/src/main/codegen/templates/PromotableWriter.java +++ b/vector/src/main/codegen/templates/PromotableWriter.java @@ -546,8 +546,8 @@ public void writeExtensionType(Object value) { } @Override - public void addExtensionTypeFactory(T var1) { - getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeFactory(var1); + public void addExtensionTypeFactory(ExtensionTypeWriterFactory factory) { + getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeFactory(factory); } @Override diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java index 037ab818d4..b350fce41c 100644 --- a/vector/src/main/codegen/templates/UnionListWriter.java +++ b/vector/src/main/codegen/templates/UnionListWriter.java @@ -339,10 +339,10 @@ public void writeExtensionType(Object value) { writer.writeExtensionType(value); } @Override - public void addExtensionTypeFactory(T var1) { + public void addExtensionTypeFactory(ExtensionTypeWriterFactory var1) { writer.addExtensionTypeFactory(var1); } - public void write(T var1) { + public void write(ExtensionHolder var1) { writer.write(var1); } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java index ad6f29eb5c..09f0314c5f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java @@ -17,13 +17,22 @@ package org.apache.arrow.vector.complex.impl; import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; /** - * A factory for {@link ExtensionTypeWriter} instances. The factory allow to configure writer - * implementation for specific ExtensionTypeVector. + * A factory interface for creating instances of {@link ExtensionTypeWriter}. This factory allows + * configuring writer implementations for specific {@link ExtensionTypeVector}. * - * @param writer implementation for specific {@link ExtensionTypeVector}. + * @param the type of writer implementation for a specific {@link ExtensionTypeVector}. */ -public interface ExtensionTypeWriterFactory { +public interface ExtensionTypeWriterFactory { + + /** + * Returns an instance of the writer implementation for the given {@link ExtensionTypeVector}. + * + * @param vector the {@link ExtensionTypeVector} for which the writer implementation is to be + * returned. + * @return an instance of the writer implementation for the given {@link ExtensionTypeVector}. + */ T getWriterImpl(ExtensionTypeVector vector); } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java index 1e7bbd9ca8..b9df3addc7 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java @@ -17,12 +17,13 @@ package org.apache.arrow.vector.complex.impl; import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.holders.ExtensionHolder; import org.apache.arrow.vector.types.pojo.Field; public class UnionExtensionWriter extends AbstractFieldWriter { protected ExtensionTypeVector vector; - protected AbstractFieldWriter writer; + protected FieldWriter writer; public UnionExtensionWriter(ExtensionTypeVector vector) { this.vector = vector; @@ -59,13 +60,13 @@ public void writeExtensionType(Object var1) { } @Override - public void addExtensionTypeFactory(S var1) { - this.writer = var1.getWriterImpl(vector); + public void addExtensionTypeFactory(ExtensionTypeWriterFactory factory) { + this.writer = factory.getWriterImpl(vector); this.writer.setPosition(idx()); } - public void write(T var1) { - this.writer.write(var1); + public void write(ExtensionHolder holder) { + this.writer.write(holder); } @Override diff --git a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index b17f0a9530..d40af9ae89 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -42,7 +42,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.TestUuidVector; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -341,7 +341,7 @@ public void testGetTransferPairWithFieldAndCallBack() { @Test public void testStructVectorWithExtensionTypes() { - TestUuidVector.UuidType uuidType = new TestUuidVector.UuidType(); + UuidType uuidType = new UuidType(); Field uuidField = new Field("struct_child", FieldType.nullable(uuidType), null); Field structField = new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); @@ -353,14 +353,14 @@ public void testStructVectorWithExtensionTypes() { @Test public void testStructVectorTransferPairWithExtensionType() { - TestUuidVector.UuidType uuidType = new TestUuidVector.UuidType(); + UuidType uuidType = new UuidType(); Field uuidField = new Field("uuid_child", FieldType.nullable(uuidType), null); Field structField = new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); StructVector s1 = (StructVector) structField.createVector(allocator); - TestUuidVector.UuidVector uuidVector = - s1.addOrGet("uuid_child", FieldType.nullable(uuidType), TestUuidVector.UuidVector.class); + UuidVector uuidVector = + s1.addOrGet("uuid_child", FieldType.nullable(uuidType), UuidVector.class); s1.setValueCount(1); uuidVector.set(0, new UUID(1, 2)); s1.setIndexDefined(0); diff --git a/vector/src/test/java/org/apache/arrow/vector/UuidVector.java b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java new file mode 100644 index 0000000000..954d0fb2ea --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; +import org.apache.arrow.vector.util.TransferPair; + +public class UuidVector extends ExtensionTypeVector + implements ValueIterableVector { + private final Field field; + + public UuidVector( + String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { + super(name, allocator, underlyingVector); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + public UuidVector(String name, BufferAllocator allocator) { + super(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + @Override + public UUID getObject(int index) { + final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); + return new UUID(bb.getLong(), bb.getLong()); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + public void set(int index, UUID uuid) { + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + getUnderlyingVector().set(index, bb.array()); + } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); + } + + @Override + public Field getField() { + return field; + } + + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((UuidVector) to); + } + + public void setSafe(int index, byte[] value) { + getUnderlyingVector().setIndexDefined(index); + getUnderlyingVector().setSafe(index, value); + } + + public class TransferImpl implements TransferPair { + UuidVector to; + ValueVector targetUnderlyingVector; + TransferPair tp; + + public TransferImpl(UuidVector to) { + this.to = to; + targetUnderlyingVector = this.to.getUnderlyingVector(); + tp = getUnderlyingVector().makeTransferPair(targetUnderlyingVector); + } + + public UuidVector getTo() { + return this.to; + } + + public void transfer() { + tp.transfer(); + } + + public void splitAndTransfer(int startIndex, int length) { + tp.splitAndTransfer(startIndex, length); + } + + public void copyValueSafe(int fromIndex, int toIndex) { + tp.copyValueSafe(fromIndex, toIndex); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index f7cd506ef5..5dc6de8bfd 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -31,9 +31,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.DirtyRootAllocator; -import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; @@ -54,8 +54,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.TestUuidVector.UuidType; -import org.apache.arrow.vector.types.pojo.TestUuidVector.UuidVector; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; @@ -806,32 +805,4 @@ public void testExtensionType() throws Exception { assertEquals(u2, uuidVector.getObject(1)); } } - - public class UuidWriterFactory implements ExtensionTypeWriterFactory { - - @Override - public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) { - if (extensionTypeVector instanceof UuidVector) { - return new UuidWriterImpl((UuidVector) extensionTypeVector); - } - return null; - } - } - - public class UuidWriterImpl extends AbstractExtensionTypeWriter { - - public UuidWriterImpl(UuidVector vector) { - super(vector); - } - - @Override - public void writeExtensionType(Object var1) { - UUID uuid = (UUID) var1; - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - ((UuidVector) this.vector).setSafe(this.idx(), bb.array()); - this.vector.setValueCount(this.idx() + 1); - } - } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java new file mode 100644 index 0000000000..1b1bf4e6e4 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.UuidVector; + +public class UuidWriterFactory implements ExtensionTypeWriterFactory { + + @Override + public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) { + if (extensionTypeVector instanceof UuidVector) { + return new UuidWriterImpl((UuidVector) extensionTypeVector); + } + return null; + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java new file mode 100644 index 0000000000..0f8960139d --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.holder.UuidHolder; +import org.apache.arrow.vector.holders.ExtensionHolder; + +public class UuidWriterImpl extends AbstractExtensionTypeWriter { + + public UuidWriterImpl(UuidVector vector) { + super(vector); + } + + @Override + public void writeExtensionType(Object value) { + UUID uuid = (UUID) value; + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + ((UuidVector) this.vector).setSafe(this.idx(), bb.array()); + this.vector.setValueCount(this.idx() + 1); + } + + @Override + public void write(ExtensionHolder holder) { + if (holder instanceof UuidHolder) { + UuidHolder uuidHolder = (UuidHolder) holder; + ((UuidVector) this.vector).setSafe(this.idx(), uuidHolder.value); + this.vector.setValueCount(this.idx() + 1); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index 5bb5962704..bf1b9b0dfa 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -20,16 +20,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; +import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.impl.LargeVarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.LargeVarCharWriterImpl; +import org.apache.arrow.vector.complex.impl.UuidWriterImpl; import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -184,4 +188,20 @@ public void testWriteTextToLargeVarChar() throws Exception { assertEquals(input, result); } } + + @Test + public void testWriteToExtensionVector() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + UUID uuid = UUID.randomUUID(); + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + UuidHolder holder = new UuidHolder(); + holder.value = bb.array(); + writer.write(holder); + UUID result = vector.getObject(0); + assertEquals(uuid, result); + } + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java b/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java new file mode 100644 index 0000000000..207b0951a7 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holder; + +import org.apache.arrow.vector.holders.ExtensionHolder; + +public class UuidHolder extends ExtensionHolder { + public byte[] value; +} diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index 0c4feb5fb1..d24708d66c 100644 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -41,6 +41,7 @@ import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.FixedSizeBinaryVector; import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.ValueIterableVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.compare.Range; @@ -50,8 +51,6 @@ import org.apache.arrow.vector.ipc.ArrowFileWriter; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.apache.arrow.vector.types.pojo.TestUuidVector.UuidType; -import org.apache.arrow.vector.types.pojo.TestUuidVector.UuidVector; import org.apache.arrow.vector.util.VectorBatchAppender; import org.apache.arrow.vector.validate.ValidateVectorVisitor; import org.junit.jupiter.api.Test; diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestUuidVector.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestUuidVector.java deleted file mode 100644 index feb6cf8765..0000000000 --- a/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestUuidVector.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import java.nio.ByteBuffer; -import java.util.UUID; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.apache.arrow.vector.util.TransferPair; - -public class TestUuidVector { - - public static class UuidType extends ExtensionType { - - @Override - public ArrowType storageType() { - return new ArrowType.FixedSizeBinary(16); - } - - @Override - public String extensionName() { - return "uuid"; - } - - @Override - public boolean extensionEquals(ExtensionType other) { - return other instanceof UuidType; - } - - @Override - public ArrowType deserialize(ArrowType storageType, String serializedData) { - if (!storageType.equals(storageType())) { - throw new UnsupportedOperationException( - "Cannot construct UuidType from underlying type " + storageType); - } - return new UuidType(); - } - - @Override - public String serialize() { - return ""; - } - - @Override - public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); - } - } - - public static class UuidVector extends ExtensionTypeVector - implements ValueIterableVector { - private final Field field; - - public UuidVector( - String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { - super(name, allocator, underlyingVector); - this.field = new Field(name, FieldType.nullable(new UuidType()), null); - } - - @Override - public UUID getObject(int index) { - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - return new UUID(bb.getLong(), bb.getLong()); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } - - public void set(int index, UUID uuid) { - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - getUnderlyingVector().set(index, bb.array()); - } - - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - getUnderlyingVector() - .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); - } - - @Override - public Field getField() { - return field; - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((UuidVector) to); - } - - public void setSafe(int index, byte[] value) { - getUnderlyingVector().setIndexDefined(index); - getUnderlyingVector().setSafe(index, value); - } - - public class TransferImpl implements TransferPair { - UuidVector to; - ValueVector targetUnderlyingVector; - TransferPair tp; - - public TransferImpl(UuidVector to) { - this.to = to; - targetUnderlyingVector = this.to.getUnderlyingVector(); - tp = getUnderlyingVector().makeTransferPair(targetUnderlyingVector); - } - - public UuidVector getTo() { - return this.to; - } - - public void transfer() { - tp.transfer(); - } - - public void splitAndTransfer(int startIndex, int length) { - tp.splitAndTransfer(startIndex, length); - } - - public void copyValueSafe(int fromIndex, int toIndex) { - tp.copyValueSafe(fromIndex, toIndex); - } - } - } -} diff --git a/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java b/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java new file mode 100644 index 0000000000..5e2bd8881b --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.types.pojo; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; + +public class UuidType extends ExtensionType { + + @Override + public ArrowType storageType() { + return new ArrowType.FixedSizeBinary(16); + } + + @Override + public String extensionName() { + return "uuid"; + } + + @Override + public boolean extensionEquals(ExtensionType other) { + return other instanceof UuidType; + } + + @Override + public ArrowType deserialize(ArrowType storageType, String serializedData) { + if (!storageType.equals(storageType())) { + throw new UnsupportedOperationException( + "Cannot construct UuidType from underlying type " + storageType); + } + return new UuidType(); + } + + @Override + public String serialize() { + return ""; + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { + return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); + } +} From 3e88e768819babc90c0809b75b342733cfa861ea Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 9 Apr 2025 09:26:35 +0300 Subject: [PATCH 5/8] GH-87: fixed comments --- .../codegen/templates/AbstractFieldWriter.java | 4 ++-- .../src/main/codegen/templates/BaseWriter.java | 6 +++--- .../main/codegen/templates/PromotableWriter.java | 8 ++++---- .../main/codegen/templates/UnionListWriter.java | 8 ++++---- .../complex/impl/UnionExtensionWriter.java | 6 +++--- .../java/org/apache/arrow/vector/UuidVector.java | 2 +- .../complex/impl/TestPromotableWriter.java | 6 +++--- .../vector/complex/impl/UuidWriterImpl.java | 16 +++++++--------- 8 files changed, 27 insertions(+), 29 deletions(-) diff --git a/vector/src/main/codegen/templates/AbstractFieldWriter.java b/vector/src/main/codegen/templates/AbstractFieldWriter.java index cd5af5994a..952b7590d0 100644 --- a/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -110,10 +110,10 @@ public void endEntry() { public void write(ExtensionHolder var1) { this.fail("ExtensionType"); } - public void writeExtensionType(Object var1) { + public void writeExtension(Object var1) { this.fail("ExtensionType"); } - public void addExtensionTypeFactory(ExtensionTypeWriterFactory var1) { + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { this.fail("ExtensionType"); } diff --git a/vector/src/main/codegen/templates/BaseWriter.java b/vector/src/main/codegen/templates/BaseWriter.java index 45aeac4665..c0146fc382 100644 --- a/vector/src/main/codegen/templates/BaseWriter.java +++ b/vector/src/main/codegen/templates/BaseWriter.java @@ -111,7 +111,7 @@ public interface ExtensionWriter extends BaseWriter { void writeNull(); /** - * Writes vlaue from the given extension holder. + * Writes value from the given extension holder. * * @param holder the extension holder to write */ @@ -122,14 +122,14 @@ public interface ExtensionWriter extends BaseWriter { * * @param value the extension type value to write */ - void writeExtensionType(Object value); + void writeExtension(Object value); /** * Adds the given extension type factory. This factory allows configuring writer implementations for specific ExtensionTypeVector. * * @param factory the extension type factory to add */ - void addExtensionTypeFactory(ExtensionTypeWriterFactory factory); + void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory); } public interface ScalarWriter extends diff --git a/vector/src/main/codegen/templates/PromotableWriter.java b/vector/src/main/codegen/templates/PromotableWriter.java index 9a11d248f6..8d7d57bb9d 100644 --- a/vector/src/main/codegen/templates/PromotableWriter.java +++ b/vector/src/main/codegen/templates/PromotableWriter.java @@ -541,13 +541,13 @@ public void writeLargeVarChar(String value) { } @Override - public void writeExtensionType(Object value) { - getWriter(MinorType.EXTENSIONTYPE).writeExtensionType(value); + public void writeExtension(Object value) { + getWriter(MinorType.EXTENSIONTYPE).writeExtension(value); } @Override - public void addExtensionTypeFactory(ExtensionTypeWriterFactory factory) { - getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeFactory(factory); + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) { + getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory); } @Override diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java index b350fce41c..9424533f29 100644 --- a/vector/src/main/codegen/templates/UnionListWriter.java +++ b/vector/src/main/codegen/templates/UnionListWriter.java @@ -335,12 +335,12 @@ public void writeNull() { } @Override - public void writeExtensionType(Object value) { - writer.writeExtensionType(value); + public void writeExtension(Object value) { + writer.writeExtension(value); } @Override - public void addExtensionTypeFactory(ExtensionTypeWriterFactory var1) { - writer.addExtensionTypeFactory(var1); + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { + writer.addExtensionTypeWriterFactory(var1); } public void write(ExtensionHolder var1) { writer.write(var1); diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java index b9df3addc7..d341384bd9 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java @@ -55,12 +55,12 @@ public void close() throws Exception { } @Override - public void writeExtensionType(Object var1) { - this.writer.writeExtensionType(var1); + public void writeExtension(Object var1) { + this.writer.writeExtension(var1); } @Override - public void addExtensionTypeFactory(ExtensionTypeWriterFactory factory) { + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) { this.writer = factory.getWriterImpl(vector); this.writer.setPosition(idx()); } diff --git a/vector/src/test/java/org/apache/arrow/vector/UuidVector.java b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java index 954d0fb2ea..5c90d45f60 100644 --- a/vector/src/test/java/org/apache/arrow/vector/UuidVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/UuidVector.java @@ -34,7 +34,7 @@ public UuidVector( super(name, allocator, underlyingVector); this.field = new Field(name, FieldType.nullable(new UuidType()), null); } - + public UuidVector(String name, BufferAllocator allocator) { super(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); this.field = new Field(name, FieldType.nullable(new UuidType()), null); diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index 5dc6de8bfd..1556852c5a 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -791,12 +791,12 @@ public void testExtensionType() throws Exception { UUID u2 = UUID.randomUUID(); container.allocateNew(); container.setValueCount(1); - writer.addExtensionTypeFactory(new UuidWriterFactory()); + writer.addExtensionTypeWriterFactory(new UuidWriterFactory()); writer.setPosition(0); - writer.writeExtensionType(u1); + writer.writeExtension(u1); writer.setPosition(1); - writer.writeExtensionType(u2); + writer.writeExtension(u2); container.setValueCount(2); diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java index 0f8960139d..227a3b5aae 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -22,28 +22,26 @@ import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.holders.ExtensionHolder; -public class UuidWriterImpl extends AbstractExtensionTypeWriter { +public class UuidWriterImpl extends AbstractExtensionTypeWriter { public UuidWriterImpl(UuidVector vector) { super(vector); } @Override - public void writeExtensionType(Object value) { + public void writeExtension(Object value) { UUID uuid = (UUID) value; ByteBuffer bb = ByteBuffer.allocate(16); bb.putLong(uuid.getMostSignificantBits()); bb.putLong(uuid.getLeastSignificantBits()); - ((UuidVector) this.vector).setSafe(this.idx(), bb.array()); - this.vector.setValueCount(this.idx() + 1); + vector.setSafe(idx(), bb.array()); + vector.setValueCount(this.idx() + 1); } @Override public void write(ExtensionHolder holder) { - if (holder instanceof UuidHolder) { - UuidHolder uuidHolder = (UuidHolder) holder; - ((UuidVector) this.vector).setSafe(this.idx(), uuidHolder.value); - this.vector.setValueCount(this.idx() + 1); - } + UuidHolder uuidHolder = (UuidHolder) holder; + vector.setSafe(this.idx(), uuidHolder.value); + vector.setValueCount(this.idx() + 1); } } From 4df283a246e05e74ccce002f397b874917576068 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 9 Apr 2025 09:43:32 +0300 Subject: [PATCH 6/8] GH-87: removed unneeded override --- .../vector/complex/impl/AbstractExtensionTypeWriter.java | 9 ++------- .../apache/arrow/vector/complex/impl/UuidWriterImpl.java | 8 ++++---- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java index 6c6c29dcf5..fccff6c21f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java @@ -58,14 +58,9 @@ public void clear() { this.vector.clear(); } - @Override - protected int idx() { - return super.idx(); - } - @Override public void writeNull() { - this.vector.setNull(this.idx()); - this.vector.setValueCount(this.idx() + 1); + this.vector.setNull(getPosition()); + this.vector.setValueCount(getPosition() + 1); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java index 227a3b5aae..68029b1df5 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -34,14 +34,14 @@ public void writeExtension(Object value) { ByteBuffer bb = ByteBuffer.allocate(16); bb.putLong(uuid.getMostSignificantBits()); bb.putLong(uuid.getLeastSignificantBits()); - vector.setSafe(idx(), bb.array()); - vector.setValueCount(this.idx() + 1); + vector.setSafe(getPosition(), bb.array()); + vector.setValueCount(getPosition() + 1); } @Override public void write(ExtensionHolder holder) { UuidHolder uuidHolder = (UuidHolder) holder; - vector.setSafe(this.idx(), uuidHolder.value); - vector.setValueCount(this.idx() + 1); + vector.setSafe(getPosition(), uuidHolder.value); + vector.setValueCount(getPosition() + 1); } } From 9c8a022b6ed4d92d2927d4da9c973f40c58660bf Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 9 Apr 2025 14:24:50 +0300 Subject: [PATCH 7/8] GH-87: fixed build --- vector/src/main/codegen/templates/BaseWriter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vector/src/main/codegen/templates/BaseWriter.java b/vector/src/main/codegen/templates/BaseWriter.java index c0146fc382..78da7fddc3 100644 --- a/vector/src/main/codegen/templates/BaseWriter.java +++ b/vector/src/main/codegen/templates/BaseWriter.java @@ -104,7 +104,7 @@ public interface MapWriter extends ListWriter { } public interface ExtensionWriter extends BaseWriter { - + /** * Writes a null value. */ @@ -123,7 +123,7 @@ public interface ExtensionWriter extends BaseWriter { * @param value the extension type value to write */ void writeExtension(Object value); - + /** * Adds the given extension type factory. This factory allows configuring writer implementations for specific ExtensionTypeVector. * From f8799eaabd0fc5389ac55d6417a8369d69d1dcfd Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 9 Apr 2025 14:35:28 +0300 Subject: [PATCH 8/8] GH-87: fixed build --- vector/src/main/codegen/templates/AbstractFieldWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vector/src/main/codegen/templates/AbstractFieldWriter.java b/vector/src/main/codegen/templates/AbstractFieldWriter.java index 952b7590d0..ae5b97faef 100644 --- a/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -251,7 +251,7 @@ public MapWriter map(String name, boolean keysSorted) { fail("Map"); return null; } - + @Override public ExtensionWriter extension(String name, ArrowType arrowType) { fail("Extension");