Skip to content

Commit 5d52c95

Browse files
committed
add ns parsing support
1 parent e3fb632 commit 5d52c95

File tree

5 files changed

+59
-24
lines changed

5 files changed

+59
-24
lines changed

pyiceberg/transforms.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,8 @@ def hash_func(v: Any) -> int:
330330
elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)):
331331

332332
def hash_func(v: Any) -> int:
333+
# In order to bucket TimestampNano the same as Timestamp
334+
# convert to micros before hashing.
333335
if isinstance(v, py_datetime.datetime):
334336
v = datetime.datetime_to_micros(v)
335337
else:

pyiceberg/types.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from __future__ import annotations
3434

3535
import re
36+
from enum import IntEnum
3637
from functools import cached_property
3738
from typing import (
3839
Any,
@@ -62,6 +63,12 @@
6263
FIXED_PARSER = ParseNumberFromBrackets(FIXED)
6364

6465

66+
class TableVersion(IntEnum):
67+
ONE = 1
68+
TWO = 2
69+
THREE = 3
70+
71+
6572
def transform_dict_value_to_str(dict: Dict[str, Any]) -> Dict[str, str]:
6673
"""Transform all values in the dictionary to string. Raise an error if any value is None."""
6774
for key, value in dict.items():
@@ -181,9 +188,9 @@ def is_primitive(self) -> bool:
181188
def is_struct(self) -> bool:
182189
return isinstance(self, StructType)
183190

184-
def minimum_format_version(self) -> int:
191+
def minimum_format_version(self) -> TableVersion:
185192
"""Minimum Iceberg format version after which this type is supported."""
186-
return 1
193+
return TableVersion.ONE
187194

188195

189196
class PrimitiveType(Singleton, IcebergRootModel[str], IcebergType):
@@ -726,8 +733,8 @@ class TimestampNanoType(PrimitiveType):
726733

727734
root: Literal["timestamp_ns"] = Field(default="timestamp_ns")
728735

729-
def minimum_format_version(self) -> int:
730-
return 3
736+
def minimum_format_version(self) -> TableVersion:
737+
return TableVersion.THREE
731738

732739

733740
class TimestamptzNanoType(PrimitiveType):
@@ -745,8 +752,8 @@ class TimestamptzNanoType(PrimitiveType):
745752

746753
root: Literal["timestamptz_ns"] = Field(default="timestamptz_ns")
747754

748-
def minimum_format_version(self) -> int:
749-
return 3
755+
def minimum_format_version(self) -> TableVersion:
756+
return TableVersion.THREE
750757

751758

752759
class StringType(PrimitiveType):

pyiceberg/utils/datetime.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@
2929
EPOCH_DATE = date.fromisoformat("1970-01-01")
3030
EPOCH_TIMESTAMP = datetime.fromisoformat("1970-01-01T00:00:00.000000")
3131
ISO_TIMESTAMP = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(.\d{1,6})?")
32+
ISO_TIMESTAMP_NANO = re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(.\d{1,6})?(\d{1,3})?")
3233
EPOCH_TIMESTAMPTZ = datetime.fromisoformat("1970-01-01T00:00:00.000000+00:00")
3334
ISO_TIMESTAMPTZ = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(.\d{1,6})?[-+]\d{2}:\d{2}")
35+
ISO_TIMESTAMPTZ_NANO = re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(.\d{1,6})?(\d{1,3})?([-+]\d{2}:\d{2})")
3436

3537

3638
def micros_to_days(timestamp: int) -> int:
@@ -115,28 +117,30 @@ def datetime_to_nanos(dt: datetime) -> int:
115117

116118

117119
def timestamp_to_nanos(timestamp_str: str) -> int:
118-
"""Convert an ISO-9601 formatted timestamp without zone to microseconds from 1970-01-01T00:00:00.000000.
119-
120-
Currently only microsecond precision timestamp_str is supported as python datetime does not have
121-
nanoseconds support.
122-
"""
123-
if ISO_TIMESTAMP.fullmatch(timestamp_str):
124-
return datetime_to_nanos(datetime.fromisoformat(timestamp_str))
125-
if ISO_TIMESTAMPTZ.fullmatch(timestamp_str):
120+
"""Convert an ISO-9601 formatted timestamp without zone to nanoseconds from 1970-01-01T00:00:00.000000000."""
121+
if match := ISO_TIMESTAMP_NANO.fullmatch(timestamp_str):
122+
# Python datetime does not have native nanoseconds support
123+
# Hence we need to extract nanoseconds timestamp manually
124+
ns_str = match.group(3) or "0"
125+
ms_str = match.group(2) if match.group(2) else ""
126+
timestamp_str_without_ns_str = match.group(1) + ms_str
127+
return datetime_to_nanos(datetime.fromisoformat(timestamp_str_without_ns_str)) + int(ns_str)
128+
if ISO_TIMESTAMPTZ_NANO.fullmatch(timestamp_str):
126129
# When we can match a timestamp without a zone, we can give a more specific error
127130
raise ValueError(f"Zone offset provided, but not expected: {timestamp_str}")
128131
raise ValueError(f"Invalid timestamp without zone: {timestamp_str} (must be ISO-8601)")
129132

130133

131134
def timestamptz_to_nanos(timestamptz_str: str) -> int:
132-
"""Convert an ISO-8601 formatted timestamp with zone to microseconds from 1970-01-01T00:00:00.000000+00:00.
133-
134-
Currently only microsecond precision timestamp_str is supported as python datetime does not have
135-
nanoseconds support.
136-
"""
137-
if ISO_TIMESTAMPTZ.fullmatch(timestamptz_str):
138-
return datetime_to_nanos(datetime.fromisoformat(timestamptz_str))
139-
if ISO_TIMESTAMP.fullmatch(timestamptz_str):
135+
"""Convert an ISO-8601 formatted timestamp with zone to nanoseconds from 1970-01-01T00:00:00.000000000+00:00."""
136+
if match := ISO_TIMESTAMPTZ_NANO.fullmatch(timestamptz_str):
137+
# Python datetime does not have native nanoseconds support
138+
# Hence we need to extract nanoseconds timestamp manually
139+
ns_str = match.group(3) or "0"
140+
ms_str = match.group(2) if match.group(2) else ""
141+
timestamptz_str_without_ns_str = match.group(1) + ms_str + match.group(4)
142+
return datetime_to_nanos(datetime.fromisoformat(timestamptz_str_without_ns_str)) + int(ns_str)
143+
if ISO_TIMESTAMPTZ_NANO.fullmatch(timestamptz_str):
140144
# When we can match a timestamp without a zone, we can give a more specific error
141145
raise ValueError(f"Missing zone offset: {timestamptz_str} (must be ISO-8601)")
142146
raise ValueError(f"Invalid timestamp with zone: {timestamptz_str} (must be ISO-8601)")

tests/test_transforms.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,21 @@
152152
TimestampNanoType(),
153153
-1207196810,
154154
),
155+
(
156+
timestamp_to_nanos("2017-11-16T22:31:08.000001001"),
157+
TimestampNanoType(),
158+
-1207196810,
159+
),
155160
(
156161
timestamptz_to_nanos("2017-11-16T14:31:08.000001-08:00"),
157162
TimestamptzNanoType(),
158163
-1207196810,
159164
),
165+
(
166+
timestamptz_to_nanos("2017-11-16T14:31:08.000001001-08:00"),
167+
TimestamptzNanoType(),
168+
-1207196810,
169+
),
160170
],
161171
)
162172
def test_bucket_hash_values(test_input: Any, test_type: PrimitiveType, expected: Any) -> None:

tests/utils/test_datetime.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,13 +105,25 @@ def test_datetime_to_nanos(datetime_: datetime, nanos: int) -> None:
105105
assert nanos == datetime_to_nanos(datetime_)
106106

107107

108-
@pytest.mark.parametrize("timestamp, nanos", [("1970-01-01T00:00:00", 0), ("2025-02-23T20:21:44.375612", 1740342104375612000)])
108+
@pytest.mark.parametrize(
109+
"timestamp, nanos",
110+
[
111+
("1970-01-01T00:00:00", 0),
112+
("2025-02-23T20:21:44.375612", 1740342104375612000),
113+
("2025-02-23T20:21:44.375612001", 1740342104375612001),
114+
],
115+
)
109116
def test_timestamp_to_nanos(timestamp: str, nanos: int) -> None:
110117
assert nanos == timestamp_to_nanos(timestamp)
111118

112119

113120
@pytest.mark.parametrize(
114-
"timestamp, nanos", [("1970-01-01T00:00:00+00:00", 0), ("2025-02-23T16:21:44.375612-04:00", 1740342104375612000)]
121+
"timestamp, nanos",
122+
[
123+
("1970-01-01T00:00:00+00:00", 0),
124+
("2025-02-23T16:21:44.375612-04:00", 1740342104375612000),
125+
("2025-02-23T16:21:44.375612001-04:00", 1740342104375612001),
126+
],
115127
)
116128
def test_timestamptz_to_nanos(timestamp: str, nanos: int) -> None:
117129
assert nanos == timestamptz_to_nanos(timestamp)

0 commit comments

Comments
 (0)