Skip to content

Commit a9d8a1a

Browse files
committed
Adding add_files_overwrite method
1 parent ee7e9f0 commit a9d8a1a

File tree

2 files changed

+440
-1
lines changed

2 files changed

+440
-1
lines changed

pyiceberg/table/__init__.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,32 @@ def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] =
664664
for data_file in data_files:
665665
update_snapshot.append_data_file(data_file)
666666

667+
def add_files_overwrite(
668+
self,
669+
file_paths: List[str],
670+
overwrite_filter: Union[BooleanExpression, str] = ALWAYS_TRUE,
671+
snapshot_properties: Dict[str, str] = EMPTY_DICT,
672+
) -> None:
673+
"""Shorthand API for adding files as data files and overwriting the table.
674+
675+
Args:
676+
file_paths: The list of full file paths to be added as data files to the table
677+
overwrite_filter: ALWAYS_TRUE when you overwrite all the data,
678+
or a boolean expression in case of a partial overwrite
679+
snapshot_properties: Custom properties to be added to the snapshot summary
680+
Raises:
681+
FileNotFoundError: If the file does not exist.
682+
"""
683+
if self._table.name_mapping() is None:
684+
self.set_properties(**{TableProperties.DEFAULT_NAME_MAPPING: self._table.schema().name_mapping.model_dump_json()})
685+
self.delete(delete_filter=overwrite_filter, snapshot_properties=snapshot_properties)
686+
with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
687+
data_files = _parquet_files_to_data_files(
688+
table_metadata=self._table.metadata, file_paths=file_paths, io=self._table.io
689+
)
690+
for data_file in data_files:
691+
update_snapshot.append_data_file(data_file)
692+
667693
def update_spec(self) -> UpdateSpec:
668694
"""Create a new UpdateSpec to update the partitioning of the table.
669695
@@ -1613,6 +1639,29 @@ def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] =
16131639
with self.transaction() as tx:
16141640
tx.add_files(file_paths=file_paths, snapshot_properties=snapshot_properties)
16151641

1642+
def add_files_overwrite(
1643+
self,
1644+
file_paths: List[str],
1645+
overwrite_filter: Union[BooleanExpression, str] = ALWAYS_TRUE,
1646+
snapshot_properties: Dict[str, str] = EMPTY_DICT,
1647+
) -> None:
1648+
"""
1649+
Shorthand API for adding files as data files and overwriting the table.
1650+
1651+
Args:
1652+
file_paths: The list of full file paths to be added as data files to the table
1653+
overwrite_filter: ALWAYS_TRUE when you overwrite all the data,
1654+
or a boolean expression in case of a partial overwrite
1655+
snapshot_properties: Custom properties to be added to the snapshot summary
1656+
1657+
Raises:
1658+
FileNotFoundError: If the file does not exist.
1659+
"""
1660+
with self.transaction() as tx:
1661+
tx.add_files_overwrite(
1662+
file_paths=file_paths, overwrite_filter=overwrite_filter, snapshot_properties=snapshot_properties
1663+
)
1664+
16161665
def update_spec(self, case_sensitive: bool = True) -> UpdateSpec:
16171666
return UpdateSpec(Transaction(self, autocommit=True), case_sensitive=case_sensitive)
16181667

0 commit comments

Comments
 (0)