Skip to content

Commit 0eb69a3

Browse files
Tidy up union handling (will need revising along with union vector itself)
1 parent e0550da commit 0eb69a3

File tree

1 file changed

+12
-11
lines changed

1 file changed

+12
-11
lines changed

adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -181,15 +181,18 @@ private static Consumer createConsumer(
181181
switch (type) {
182182
case UNION:
183183
boolean nullableUnion = schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL);
184-
if (config.isHandleNullable() && schema.getTypes().size() == 2 && nullableUnion) {
184+
if (schema.getTypes().size() == 2 && nullableUnion && config.isHandleNullable()) {
185+
// For a simple nullable (null | type), interpret the union as a single nullable field.
186+
// Requires setting handleNullable in the config, otherwise fall back on the literal interpretation.
185187
int nullIndex = schema.getTypes().get(0).getType() == Schema.Type.NULL ? 0 : 1;
186188
int childIndex = nullIndex == 0 ? 1 : 0;
187189
Schema childSchema = schema.getTypes().get(childIndex);
188190
Consumer<?> childConsumer = createConsumer(childSchema, name, true, config, consumerVector);
189191
consumer = new AvroNullableConsumer<>(childConsumer, nullIndex);
190192
}
191193
else {
192-
consumer = createUnionConsumer(schema, name, config, consumerVector);
194+
// Literal interpretation of a union, which may or may not include a null element.
195+
consumer = createUnionConsumer(schema, name, nullableUnion, config, consumerVector);
193196
}
194197
break;
195198
case ARRAY:
@@ -548,19 +551,20 @@ private static Field avroSchemaToField(
548551
switch (type) {
549552
case UNION:
550553
boolean nullableUnion = schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL);
551-
// For a simple nullable (null | type), just call avroSchemaToField on the child with nullable = true
552-
if (config.isHandleNullable() && schema.getTypes().size() == 2 && nullableUnion) {
554+
if (nullableUnion && schema.getTypes().size() == 2 && config.isHandleNullable()) {
555+
// For a simple nullable (null | type), interpret the union as a single nullable field.
556+
// Requires setting handleNullable in the config, otherwise fall back on the literal interpretation.
553557
Schema childSchema = schema.getTypes().get(0).getType() == Schema.Type.NULL
554558
? schema.getTypes().get(1)
555559
: schema.getTypes().get(0);
556560
return avroSchemaToField(childSchema, name, true, config, externalProps);
557561
}
558562
else {
559-
// TODO: Add null type to union if any children are nullable
563+
// Literal interpretation of a union, which may or may not include a null element.
560564
for (int i = 0; i < schema.getTypes().size(); i++) {
561565
Schema childSchema = schema.getTypes().get(i);
562566
// Union child vector should use default name
563-
children.add(avroSchemaToField(childSchema, null, config));
567+
children.add(avroSchemaToField(childSchema, null, nullableUnion, config, null));
564568
}
565569
fieldType =
566570
createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps);
@@ -814,12 +818,9 @@ private static Consumer createMapConsumer(
814818
}
815819

816820
private static Consumer createUnionConsumer(
817-
Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) {
821+
Schema schema, String name, boolean nullableUnion, AvroToArrowConfig config, FieldVector consumerVector) {
818822
final int size = schema.getTypes().size();
819823

820-
final boolean nullable =
821-
schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL);
822-
823824
UnionVector unionVector;
824825
if (consumerVector == null) {
825826
final Field field = avroSchemaToField(schema, name, config);
@@ -836,7 +837,7 @@ private static Consumer createUnionConsumer(
836837
for (int i = 0; i < size; i++) {
837838
FieldVector child = childVectors.get(i);
838839
Schema subSchema = schema.getTypes().get(i);
839-
Consumer delegate = createConsumer(subSchema, subSchema.getName(), nullable, config, child);
840+
Consumer delegate = createConsumer(subSchema, subSchema.getName(), nullableUnion, config, child);
840841
delegates[i] = delegate;
841842
types[i] = child.getMinorType();
842843
}

0 commit comments

Comments
 (0)