allow field names with dots and add test

geruh · geruh · commit 665d543816a6 · 2025-07-15T12:27:00.000-07:00
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
@@ -2770,19 +2770,29 @@ def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.T
 
 
 def _get_field_from_arrow_table(arrow_table: pa.Table, field_path: str) -> pa.Array:
-    """Get a nested field from an Arrow table struct type field using dot notation.
+    """Get a field from an Arrow table, supporting both literal field names and nested field paths.
+
+    This function handles two cases:
+    1. Literal field names that may contain dots (e.g., "some.id")
+    2. Nested field paths using dot notation (e.g., "bar.baz" for nested access)
 
     Args:
         arrow_table: The Arrow table containing the field
-        field_path: Dot-separated field path (e.g., "name" or "bar.baz.timestamp")
+        field_path: Field name or dot-separated path
 
     Returns:
-        The unnested field as a PyArrow Array
+        The field as a PyArrow Array
+
+    Raises:
+        KeyError: If the field path cannot be resolved
     """
-    if "." not in field_path:
+    # Try exact column name match (handles field names containing literal dots)
+    if field_path in arrow_table.column_names:
         return arrow_table[field_path]
 
+    # If not found as exact name, treat as nested field path
     path_parts = field_path.split(".")
+    # Get the struct column from the table (e.g., "bar" from "bar.baz")
     field_array = arrow_table[path_parts[0]]
-    field_array = pc.struct_field(field_array, path_parts[1:])
-    return field_array
+    # Navigate into the struct using the remaining path parts
+    return pc.struct_field(field_array, path_parts[1:])
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
@@ -2416,6 +2416,36 @@ def test_partition_for_deep_nested_field() -> None:
     assert partition_values == {"data-1", "data-2"}
 
 
+def test_inspect_partition_for_nested_field(catalog: InMemoryCatalog) -> None:
+    schema = Schema(
+        NestedField(id=1, name="foo", field_type=StringType(), required=True),
+        NestedField(
+            id=2,
+            name="bar",
+            field_type=StructType(
+                NestedField(id=3, name="baz", field_type=StringType(), required=False),
+                NestedField(id=4, name="qux", field_type=IntegerType(), required=False),
+            ),
+            required=True,
+        ),
+    )
+    spec = PartitionSpec(PartitionField(source_id=3, field_id=1000, transform=IdentityTransform(), name="part"))
+    catalog.create_namespace("default")
+    table = catalog.create_table("default.test_partition_in_struct", schema=schema, partition_spec=spec)
+    test_data = [
+        {"foo": "a", "bar": {"baz": "data-a", "qux": 1}},
+        {"foo": "b", "bar": {"baz": "data-b", "qux": 2}},
+    ]
+
+    arrow_table = pa.Table.from_pylist(test_data, schema=table.schema().as_arrow())
+    table.append(arrow_table)
+    partitions_table = table.inspect.partitions()
+    partitions = partitions_table["partition"].to_pylist()
+
+    assert len(partitions) == 2
+    assert {part["part"] for part in partitions} == {"data-a", "data-b"}
+
+
 def test_identity_partition_on_multi_columns() -> None:
     test_pa_schema = pa.schema([("born_year", pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())])
     test_schema = Schema(