Skip to content

Commit 7dca26a

Browse files
author
redpheonixx
committed
trim whitespace
1 parent bc01e57 commit 7dca26a

File tree

2 files changed

+18
-17
lines changed

2 files changed

+18
-17
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from concurrent.futures import Future
4040
from copy import copy
4141
from dataclasses import dataclass
42+
from decimal import Decimal
4243
from enum import Enum
4344
from functools import lru_cache, singledispatch
4445
from typing import (
@@ -177,7 +178,6 @@
177178
from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
178179
from pyiceberg.utils.singleton import Singleton
179180
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
180-
from decimal import Decimal
181181

182182
if TYPE_CHECKING:
183183
from pyiceberg.table import FileScanTask, WriteTask
@@ -1877,11 +1877,7 @@ def visit_fixed(self, fixed_type: FixedType) -> str:
18771877
return "FIXED_LEN_BYTE_ARRAY"
18781878

18791879
def visit_decimal(self, decimal_type: DecimalType) -> str:
1880-
return (
1881-
"INT32" if decimal_type.precision <= 9
1882-
else "INT64" if decimal_type.precision <= 18
1883-
else "FIXED_LEN_BYTE_ARRAY"
1884-
)
1880+
return "INT32" if decimal_type.precision <= 9 else "INT64" if decimal_type.precision <= 18 else "FIXED_LEN_BYTE_ARRAY"
18851881

18861882
def visit_boolean(self, boolean_type: BooleanType) -> str:
18871883
return "BOOLEAN"
@@ -2356,11 +2352,15 @@ def data_file_statistics_from_parquet_metadata(
23562352
)
23572353

23582354
if isinstance(stats_col.iceberg_type, DecimalType) and statistics.physical_type != "FIXED_LEN_BYTE_ARRAY":
2359-
precision= stats_col.iceberg_type.precision
2355+
precision = stats_col.iceberg_type.precision
23602356
scale = stats_col.iceberg_type.scale
23612357
decimal_type = pa.decimal128(precision, scale)
2362-
col_aggs[field_id].update_min(pa.array([Decimal(statistics.min_raw)/ (10 ** scale)], decimal_type)[0].as_py())
2363-
col_aggs[field_id].update_max(pa.array([Decimal(statistics.max_raw)/ (10 ** scale)], decimal_type)[0].as_py())
2358+
col_aggs[field_id].update_min(
2359+
pa.array([Decimal(statistics.min_raw) / (10**scale)], decimal_type)[0].as_py()
2360+
)
2361+
col_aggs[field_id].update_max(
2362+
pa.array([Decimal(statistics.max_raw) / (10**scale)], decimal_type)[0].as_py()
2363+
)
23642364
else:
23652365
col_aggs[field_id].update_min(statistics.min)
23662366
col_aggs[field_id].update_max(statistics.max)

tests/io/test_pyarrow_stats.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
timedelta,
2828
timezone,
2929
)
30+
from decimal import Decimal
3031
from typing import (
3132
Any,
3233
Dict,
@@ -72,7 +73,7 @@
7273
StringType,
7374
)
7475
from pyiceberg.utils.datetime import date_to_days, datetime_to_micros, time_to_micros
75-
from decimal import Decimal
76+
7677

7778
@dataclass(frozen=True)
7879
class TestStruct:
@@ -473,7 +474,7 @@ def construct_test_table_primitive_types() -> Tuple[pq.FileMetaData, Union[Table
473474
strings = ["hello", "world"]
474475
uuids = [uuid.uuid3(uuid.NAMESPACE_DNS, "foo").bytes, uuid.uuid3(uuid.NAMESPACE_DNS, "bar").bytes]
475476
binaries = [b"hello", b"world"]
476-
decimal8 = pa.array([Decimal('123.45'), Decimal('678.91')], pa.decimal128(8, 2))
477+
decimal8 = pa.array([Decimal("123.45"), Decimal("678.91")], pa.decimal128(8, 2))
477478
decimal16 = pa.array([Decimal("12345679.123456"), Decimal("67891234.678912")], pa.decimal128(16, 6))
478479
decimal32 = pa.array([Decimal("1234567890123.123456"), Decimal("9876543210703.654321")], pa.decimal128(19, 6))
479480

@@ -538,9 +539,9 @@ def test_metrics_primitive_types() -> None:
538539
assert datafile.lower_bounds[10] == b"he"
539540
assert datafile.lower_bounds[11] == uuid.uuid3(uuid.NAMESPACE_DNS, "foo").bytes
540541
assert datafile.lower_bounds[12] == b"he"
541-
assert datafile.lower_bounds[13][::-1].ljust(4, b'\x00') == STRUCT_INT32.pack(12345)
542-
assert datafile.lower_bounds[14][::-1].ljust(8, b'\x00') == STRUCT_INT64.pack(12345679123456)
543-
assert str(int.from_bytes(datafile.lower_bounds[15], byteorder='big', signed=True)).encode('utf-8')== b"1234567890123123456"
542+
assert datafile.lower_bounds[13][::-1].ljust(4, b"\x00") == STRUCT_INT32.pack(12345)
543+
assert datafile.lower_bounds[14][::-1].ljust(8, b"\x00") == STRUCT_INT64.pack(12345679123456)
544+
assert str(int.from_bytes(datafile.lower_bounds[15], byteorder="big", signed=True)).encode("utf-8") == b"1234567890123123456"
544545

545546
assert len(datafile.upper_bounds) == 15
546547
assert datafile.upper_bounds[1] == STRUCT_BOOL.pack(True)
@@ -555,9 +556,9 @@ def test_metrics_primitive_types() -> None:
555556
assert datafile.upper_bounds[10] == b"wp"
556557
assert datafile.upper_bounds[11] == uuid.uuid3(uuid.NAMESPACE_DNS, "bar").bytes
557558
assert datafile.upper_bounds[12] == b"wp"
558-
assert datafile.upper_bounds[13][::-1].ljust(4, b'\x00')== STRUCT_INT32.pack(67891)
559-
assert datafile.upper_bounds[14][::-1].ljust(8, b'\x00')== STRUCT_INT64.pack(67891234678912)
560-
assert str(int.from_bytes(datafile.upper_bounds[15], byteorder='big', signed=True)).encode('utf-8')== b"9876543210703654321"
559+
assert datafile.upper_bounds[13][::-1].ljust(4, b"\x00") == STRUCT_INT32.pack(67891)
560+
assert datafile.upper_bounds[14][::-1].ljust(8, b"\x00") == STRUCT_INT64.pack(67891234678912)
561+
assert str(int.from_bytes(datafile.upper_bounds[15], byteorder="big", signed=True)).encode("utf-8") == b"9876543210703654321"
561562

562563

563564
def construct_test_table_invalid_upper_bound() -> Tuple[pq.FileMetaData, Union[TableMetadataV1, TableMetadataV2]]:

0 commit comments

Comments
 (0)