Skip to content

Commit d80c284

Browse files
committed
Set field-id when needed
Fixes #1798
1 parent 1a5e32a commit d80c284

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1777,7 +1777,7 @@ def struct(
17771777
field_arrays.append(array)
17781778
fields.append(self._construct_field(field, array.type))
17791779
elif field.optional:
1780-
arrow_type = schema_to_pyarrow(field.field_type, include_field_ids=False)
1780+
arrow_type = schema_to_pyarrow(field.field_type, include_field_ids=self._include_field_ids)
17811781
field_arrays.append(pa.nulls(len(struct_array), type=arrow_type))
17821782
fields.append(self._construct_field(field, arrow_type))
17831783
else:

tests/integration/test_writes/test_writes.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
DateType,
5353
DoubleType,
5454
IntegerType,
55+
ListType,
5556
LongType,
5657
NestedField,
5758
StringType,
@@ -1647,3 +1648,37 @@ def test_abort_table_transaction_on_exception(
16471648

16481649
# Validate the transaction is aborted and no partial update is applied
16491650
assert len(tbl.scan().to_pandas()) == table_size # type: ignore
1651+
1652+
1653+
def test_write_optional_list(session_catalog: Catalog) -> None:
1654+
identifier = "default.test_write_optional_list"
1655+
schema = Schema(
1656+
NestedField(field_id=1, name="name", field_type=StringType(), required=False),
1657+
NestedField(
1658+
field_id=3,
1659+
name="my_list",
1660+
field_type=ListType(element_id=45, element=StringType(), element_required=False),
1661+
required=False,
1662+
),
1663+
)
1664+
session_catalog.create_table_if_not_exists(identifier, schema)
1665+
1666+
df_1 = pa.Table.from_pylist(
1667+
[
1668+
{"name": "one", "my_list": ["test"]},
1669+
{"name": "another", "my_list": ["test"]},
1670+
]
1671+
)
1672+
session_catalog.load_table(identifier).append(df_1)
1673+
1674+
assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 2
1675+
1676+
df_2 = pa.Table.from_pylist(
1677+
[
1678+
{"name": "one"},
1679+
{"name": "another"},
1680+
]
1681+
)
1682+
session_catalog.load_table(identifier).append(df_2)
1683+
1684+
assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 4

0 commit comments

Comments
 (0)