Skip to content

Commit 4288bdb

Browse files
authored
Merge branch 'main' into dependabot/pip/mkdocstrings-0.30.1
2 parents 178dc0e + a8df020 commit 4288bdb

File tree

8 files changed

+316
-257
lines changed

8 files changed

+316
-257
lines changed

.github/workflows/pypi-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
if: startsWith(matrix.os, 'ubuntu')
6363

6464
- name: Build wheels
65-
uses: pypa/cibuildwheel@v3.1.4
65+
uses: pypa/cibuildwheel@v3.2.0
6666
with:
6767
output-dir: wheelhouse
6868
config-file: "pyproject.toml"

.github/workflows/svn-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
if: startsWith(matrix.os, 'ubuntu')
5858

5959
- name: Build wheels
60-
uses: pypa/cibuildwheel@v3.1.4
60+
uses: pypa/cibuildwheel@v3.2.0
6161
with:
6262
output-dir: wheelhouse
6363
config-file: "pyproject.toml"

dev/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ WORKDIR ${SPARK_HOME}
3939
ENV SPARK_VERSION=3.5.6
4040
ENV SCALA_VERSION=2.12
4141
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_${SCALA_VERSION}
42-
ENV ICEBERG_VERSION=1.9.2
42+
ENV ICEBERG_VERSION=1.10.0
4343
ENV PYICEBERG_VERSION=0.10.0
4444
ENV HADOOP_VERSION=3.3.4
4545
ENV AWS_SDK_VERSION=1.12.753

poetry.lock

Lines changed: 262 additions & 247 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/table/__init__.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,11 @@ def upsert(
859859
return UpsertResult(rows_updated=update_row_cnt, rows_inserted=insert_row_cnt)
860860

861861
def add_files(
862-
self, file_paths: List[str], snapshot_properties: Dict[str, str] = EMPTY_DICT, check_duplicate_files: bool = True
862+
self,
863+
file_paths: List[str],
864+
snapshot_properties: Dict[str, str] = EMPTY_DICT,
865+
check_duplicate_files: bool = True,
866+
branch: Optional[str] = MAIN_BRANCH,
863867
) -> None:
864868
"""
865869
Shorthand API for adding files as data files to the table transaction.
@@ -888,12 +892,12 @@ def add_files(
888892
self.set_properties(
889893
**{TableProperties.DEFAULT_NAME_MAPPING: self.table_metadata.schema().name_mapping.model_dump_json()}
890894
)
891-
with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
895+
with self._append_snapshot_producer(snapshot_properties, branch=branch) as append_files:
892896
data_files = _parquet_files_to_data_files(
893897
table_metadata=self.table_metadata, file_paths=file_paths, io=self._table.io
894898
)
895899
for data_file in data_files:
896-
update_snapshot.append_data_file(data_file)
900+
append_files.append_data_file(data_file)
897901

898902
def update_spec(self) -> UpdateSpec:
899903
"""Create a new UpdateSpec to update the partitioning of the table.
@@ -1431,7 +1435,11 @@ def delete(
14311435
)
14321436

14331437
def add_files(
1434-
self, file_paths: List[str], snapshot_properties: Dict[str, str] = EMPTY_DICT, check_duplicate_files: bool = True
1438+
self,
1439+
file_paths: List[str],
1440+
snapshot_properties: Dict[str, str] = EMPTY_DICT,
1441+
check_duplicate_files: bool = True,
1442+
branch: Optional[str] = MAIN_BRANCH,
14351443
) -> None:
14361444
"""
14371445
Shorthand API for adding files as data files to the table.
@@ -1444,7 +1452,10 @@ def add_files(
14441452
"""
14451453
with self.transaction() as tx:
14461454
tx.add_files(
1447-
file_paths=file_paths, snapshot_properties=snapshot_properties, check_duplicate_files=check_duplicate_files
1455+
file_paths=file_paths,
1456+
snapshot_properties=snapshot_properties,
1457+
check_duplicate_files=check_duplicate_files,
1458+
branch=branch,
14481459
)
14491460

14501461
def update_spec(self, case_sensitive: bool = True) -> UpdateSpec:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ moto = { version = "^5.0.2", extras = ["server"] }
102102
typing-extensions = "4.15.0"
103103
pytest-mock = "3.15.0"
104104
pyspark = { version = "3.5.6", extras = ["connect"] }
105-
cython = "3.1.3"
105+
cython = "3.1.4"
106106
deptry = ">=0.14,<0.24"
107107
docutils = "!=0.21.post1" # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520
108108
mypy-boto3-glue = ">=1.28.18"

tests/integration/test_add_files.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,3 +926,36 @@ def test_add_files_hour_transform(session_catalog: Catalog) -> None:
926926
writer.write_table(arrow_table)
927927

928928
tbl.add_files(file_paths=[file_path])
929+
930+
931+
@pytest.mark.integration
932+
def test_add_files_to_branch(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None:
933+
identifier = f"default.test_add_files_branch_v{format_version}"
934+
branch = "branch1"
935+
936+
tbl = _create_table(session_catalog, identifier, format_version)
937+
938+
file_paths = [f"s3://warehouse/default/addfile/v{format_version}/test-{i}.parquet" for i in range(5)]
939+
# write parquet files
940+
for file_path in file_paths:
941+
fo = tbl.io.new_output(file_path)
942+
with fo.create(overwrite=True) as fos:
943+
with pq.ParquetWriter(fos, schema=ARROW_SCHEMA) as writer:
944+
writer.write_table(ARROW_TABLE)
945+
946+
# Dummy write to avoid failures on creating branch in empty table
947+
tbl.append(ARROW_TABLE)
948+
assert tbl.metadata.current_snapshot_id is not None
949+
tbl.manage_snapshots().create_branch(snapshot_id=tbl.metadata.current_snapshot_id, branch_name=branch).commit()
950+
951+
# add the parquet files as data files
952+
tbl.add_files(file_paths=file_paths, branch=branch)
953+
954+
df = spark.table(identifier)
955+
assert df.count() == 1, "Expected 1 row in Main table"
956+
957+
branch_df = spark.table(f"{identifier}.branch_{branch}")
958+
assert branch_df.count() == 6, "Expected 5 rows in branch"
959+
960+
for col in branch_df.columns:
961+
assert branch_df.filter(branch_df[col].isNotNull()).count() == 6, "Expected all 6 rows to be non-null"

tests/integration/test_reads.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def test_daft_nan(catalog: Catalog) -> None:
332332
def test_daft_nan_rewritten(catalog: Catalog) -> None:
333333
table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten")
334334
df = table_test_null_nan_rewritten.to_daft()
335-
df = df.where(df["col_numeric"].float.is_nan())
335+
df = df.where(df["col_numeric"].is_nan())
336336
df = df.select("idx", "col_numeric")
337337
assert df.count_rows() == 1
338338
assert df.to_pydict()["idx"][0] == 1

0 commit comments

Comments
 (0)