1919import logging
2020from datetime import datetime , timedelta , timezone
2121from functools import reduce
22- from typing import TYPE_CHECKING , Set
22+ from typing import TYPE_CHECKING , Optional , Set
2323
2424from pyiceberg .utils .concurrent import ExecutorFactory
2525
@@ -41,7 +41,7 @@ def __init__(self, tbl: Table) -> None:
4141 except ModuleNotFoundError as e :
4242 raise ModuleNotFoundError ("For metadata operations PyArrow needs to be installed" ) from e
4343
44- def _orphaned_files (self , location : str , older_than : timedelta = timedelta ( days = 3 ) ) -> Set [str ]:
44+ def _orphaned_files (self , location : str , older_than : Optional [ timedelta ] = None ) -> Set [str ]:
4545 """Get all files which are not referenced in any metadata files of an Iceberg table and can thus be considered "orphaned".
4646
4747 Args:
@@ -69,15 +69,18 @@ def _orphaned_files(self, location: str, older_than: timedelta = timedelta(days=
6969
7070 _ , _ , path = pyarrow_io .parse_location (location )
7171 selector = FileSelector (path , recursive = True )
72+
7273 # filter to just files as it may return directories, and filter on time
74+ if older_than is None :
75+ older_than = timedelta (0 )
7376 as_of = datetime .now (timezone .utc ) - older_than
7477 all_files = [f .path for f in fs .get_file_info (selector ) if f .type == FileType .File and f .mtime < as_of ]
7578
7679 orphaned_files = set (all_files ).difference (flat_known_files )
7780
7881 return orphaned_files
7982
80- def remove_orphaned_files (self , older_than : timedelta = timedelta ( days = 3 ) , dry_run : bool = False ) -> None :
83+ def remove_orphaned_files (self , older_than : Optional [ timedelta ] = None , dry_run : bool = False ) -> None :
8184 """Remove files which are not referenced in any metadata files of an Iceberg table and can thus be considered "orphaned".
8285
8386 Args:
0 commit comments