apache · kevinjqliu · Jul 22, 2025 · Jul 9, 2025 · Jul 10, 2025 · Jul 19, 2025
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
@@ -1982,11 +1982,13 @@ class StatsAggregator:
     current_min: Any
     current_max: Any
     trunc_length: Optional[int]
+    column_name: Optional[str]
 
-    def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc_length: Optional[int] = None) -> None:
+    def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc_length: Optional[int] = None, column_name: Optional[str] = None) -> None:
         self.current_min = None
         self.current_max = None
         self.trunc_length = trunc_length
+        self.column_name = column_name
 
         expected_physical_type = _primitive_to_physical(iceberg_type)
         if expected_physical_type != physical_type_string:
@@ -1998,7 +2000,7 @@ def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc
                 pass
             else:
                 raise ValueError(
-                    f"Unexpected physical type {physical_type_string} for {iceberg_type}, expected {expected_physical_type}"
+                    f"Unexpected physical type {physical_type_string} for {self.column_name or '<unknown column>'} with iceberg type {iceberg_type}, expected {expected_physical_type}"
                 )
 
         self.primitive_type = iceberg_type
@@ -2405,7 +2407,7 @@ def data_file_statistics_from_parquet_metadata(
 
                     if field_id not in col_aggs:
                         col_aggs[field_id] = StatsAggregator(
-                            stats_col.iceberg_type, statistics.physical_type, stats_col.mode.length
+                            stats_col.iceberg_type, statistics.physical_type, stats_col.mode.length, stats_col.column_name
                         )
 
                     if isinstance(stats_col.iceberg_type, DecimalType) and statistics.physical_type != "FIXED_LEN_BYTE_ARRAY":

diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
@@ -2054,7 +2054,7 @@ def test_make_compatible_name() -> None:
     ],
 )
 def test_stats_aggregator_update_min(vals: List[Any], primitive_type: PrimitiveType, expected_result: Any) -> None:
-    stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type))
+    stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type), column_name="test_col")
 
     for val in vals:
         stats.update_min(val)
@@ -2074,7 +2074,7 @@ def test_stats_aggregator_update_min(vals: List[Any], primitive_type: PrimitiveT
     ],
 )
 def test_stats_aggregator_update_max(vals: List[Any], primitive_type: PrimitiveType, expected_result: Any) -> None:
-    stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type))
+    stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type), column_name="test_col")
 
     for val in vals:
         stats.update_max(val)