Skip to content

Commit 02bf9fb

Browse files
committed
add column name to the error message in StatsAggregator
1 parent 9abec7e commit 02bf9fb

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,11 +1982,13 @@ class StatsAggregator:
19821982
current_min: Any
19831983
current_max: Any
19841984
trunc_length: Optional[int]
1985+
column_name: Optional[str]
19851986

1986-
def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc_length: Optional[int] = None) -> None:
1987+
def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc_length: Optional[int] = None, column_name: Optional[str] = None) -> None:
19871988
self.current_min = None
19881989
self.current_max = None
19891990
self.trunc_length = trunc_length
1991+
self.column_name = column_name
19901992

19911993
expected_physical_type = _primitive_to_physical(iceberg_type)
19921994
if expected_physical_type != physical_type_string:
@@ -1998,7 +2000,7 @@ def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc
19982000
pass
19992001
else:
20002002
raise ValueError(
2001-
f"Unexpected physical type {physical_type_string} for {iceberg_type}, expected {expected_physical_type}"
2003+
f"Unexpected physical type {physical_type_string} for {self.column_name or '<unknown column>'} with iceberg type {iceberg_type}, expected {expected_physical_type}"
20022004
)
20032005

20042006
self.primitive_type = iceberg_type
@@ -2405,7 +2407,7 @@ def data_file_statistics_from_parquet_metadata(
24052407

24062408
if field_id not in col_aggs:
24072409
col_aggs[field_id] = StatsAggregator(
2408-
stats_col.iceberg_type, statistics.physical_type, stats_col.mode.length
2410+
stats_col.iceberg_type, statistics.physical_type, stats_col.mode.length, stats_col.column_name
24092411
)
24102412

24112413
if isinstance(stats_col.iceberg_type, DecimalType) and statistics.physical_type != "FIXED_LEN_BYTE_ARRAY":

tests/io/test_pyarrow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,7 +2054,7 @@ def test_make_compatible_name() -> None:
20542054
],
20552055
)
20562056
def test_stats_aggregator_update_min(vals: List[Any], primitive_type: PrimitiveType, expected_result: Any) -> None:
2057-
stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type))
2057+
stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type), column_name="test_col")
20582058

20592059
for val in vals:
20602060
stats.update_min(val)
@@ -2074,7 +2074,7 @@ def test_stats_aggregator_update_min(vals: List[Any], primitive_type: PrimitiveT
20742074
],
20752075
)
20762076
def test_stats_aggregator_update_max(vals: List[Any], primitive_type: PrimitiveType, expected_result: Any) -> None:
2077-
stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type))
2077+
stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type), column_name="test_col")
20782078

20792079
for val in vals:
20802080
stats.update_max(val)

0 commit comments

Comments
 (0)