|
19 | 19 | import os |
20 | 20 | import random |
21 | 21 | import time |
| 22 | +import uuid |
22 | 23 | from datetime import date, datetime, timedelta |
23 | 24 | from decimal import Decimal |
24 | 25 | from pathlib import Path |
|
48 | 49 | from pyiceberg.schema import Schema |
49 | 50 | from pyiceberg.table import TableProperties |
50 | 51 | from pyiceberg.table.sorting import SortDirection, SortField, SortOrder |
51 | | -from pyiceberg.transforms import DayTransform, HourTransform, IdentityTransform |
| 52 | +from pyiceberg.transforms import BucketTransform, DayTransform, HourTransform, IdentityTransform |
52 | 53 | from pyiceberg.types import ( |
53 | 54 | DateType, |
54 | 55 | DecimalType, |
|
58 | 59 | LongType, |
59 | 60 | NestedField, |
60 | 61 | StringType, |
| 62 | + UUIDType, |
61 | 63 | ) |
62 | 64 | from utils import _create_table |
63 | 65 |
|
@@ -1841,3 +1843,56 @@ def test_read_write_decimals(session_catalog: Catalog) -> None: |
1841 | 1843 | tbl.append(arrow_table) |
1842 | 1844 |
|
1843 | 1845 | assert tbl.scan().to_arrow() == arrow_table |
| 1846 | + |
| 1847 | + |
| 1848 | +@pytest.mark.integration |
| 1849 | +def test_read_write_uuids_partitioned(session_catalog: Catalog) -> None: |
| 1850 | + """Test simple reading and writing partitioned UUID data types in supported transform. |
| 1851 | + - BucketTransform |
| 1852 | + - IdentityTransform |
| 1853 | + """ |
| 1854 | + |
| 1855 | + identifier = "default.test_read_write_uuids" |
| 1856 | + uuids = [ |
| 1857 | + uuid.UUID("ec9b663b-062f-4200-a130-8de19c21b800").bytes, |
| 1858 | + uuid.UUID("5f473c64-dbeb-449b-bdfa-b6b4185b1bde").bytes, |
| 1859 | + None, |
| 1860 | + ] |
| 1861 | + |
| 1862 | + arrow_table = pa.Table.from_pydict( |
| 1863 | + { |
| 1864 | + "uuid_1": pa.array(uuids, type=pa.binary(16)), |
| 1865 | + "uuid_2": pa.array(uuids, type=pa.binary(16)), |
| 1866 | + } |
| 1867 | + ) |
| 1868 | + |
| 1869 | + tbl = _create_table( |
| 1870 | + session_catalog, |
| 1871 | + identifier, |
| 1872 | + properties={"format-version": 2}, |
| 1873 | + schema=Schema( |
| 1874 | + NestedField(field_id=1, name="uuid_1", field_type=UUIDType(), required=False), |
| 1875 | + NestedField(field_id=2, name="uuid_2", field_type=UUIDType(), required=False), |
| 1876 | + ), |
| 1877 | + partition_spec=PartitionSpec( |
| 1878 | + PartitionField(source_id=1, field_id=1001, transform=BucketTransform(2), name="uuid_bucket"), |
| 1879 | + PartitionField(source_id=2, field_id=1002, transform=IdentityTransform(), name="uuid_indentity"), |
| 1880 | + ), |
| 1881 | + ) |
| 1882 | + |
| 1883 | + tbl.append(arrow_table) |
| 1884 | + assert tbl.scan().to_arrow() == arrow_table |
| 1885 | + # Check BucketTransform partitioning filtering |
| 1886 | + assert tbl.scan(row_filter=f"uuid_1 == '{uuid.UUID(bytes=uuids[0])}'").to_arrow() == pa.Table.from_pydict( |
| 1887 | + { |
| 1888 | + "uuid_1": pa.array([uuids[0]], type=pa.binary(16)), |
| 1889 | + "uuid_2": pa.array([uuids[0]], type=pa.binary(16)), |
| 1890 | + } |
| 1891 | + ) |
| 1892 | + # Check IdentityTransform partitioning filtering |
| 1893 | + assert tbl.scan(row_filter=f"uuid_2 == '{uuid.UUID(bytes=uuids[1])}'").to_arrow() == pa.Table.from_pydict( |
| 1894 | + { |
| 1895 | + "uuid_1": pa.array([uuids[1]], type=pa.binary(16)), |
| 1896 | + "uuid_2": pa.array([uuids[1]], type=pa.binary(16)), |
| 1897 | + } |
| 1898 | + ) |
0 commit comments