Skip to content

Commit f4d98d2

Browse files
committed
allow older_than to be None
1 parent b4c14fc commit f4d98d2

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

pyiceberg/table/maintenance.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import logging
2020
from datetime import datetime, timedelta, timezone
2121
from functools import reduce
22-
from typing import TYPE_CHECKING, Set
22+
from typing import TYPE_CHECKING, Optional, Set
2323

2424
from pyiceberg.utils.concurrent import ExecutorFactory
2525

@@ -41,7 +41,7 @@ def __init__(self, tbl: Table) -> None:
4141
except ModuleNotFoundError as e:
4242
raise ModuleNotFoundError("For metadata operations PyArrow needs to be installed") from e
4343

44-
def _orphaned_files(self, location: str, older_than: timedelta = timedelta(days=3)) -> Set[str]:
44+
def _orphaned_files(self, location: str, older_than: Optional[timedelta] = None) -> Set[str]:
4545
"""Get all files which are not referenced in any metadata files of an Iceberg table and can thus be considered "orphaned".
4646
4747
Args:
@@ -69,15 +69,18 @@ def _orphaned_files(self, location: str, older_than: timedelta = timedelta(days=
6969

7070
_, _, path = pyarrow_io.parse_location(location)
7171
selector = FileSelector(path, recursive=True)
72+
7273
# filter to just files as it may return directories, and filter on time
74+
if older_than is None:
75+
older_than = timedelta(0)
7376
as_of = datetime.now(timezone.utc) - older_than
7477
all_files = [f.path for f in fs.get_file_info(selector) if f.type == FileType.File and f.mtime < as_of]
7578

7679
orphaned_files = set(all_files).difference(flat_known_files)
7780

7881
return orphaned_files
7982

80-
def remove_orphaned_files(self, older_than: timedelta = timedelta(days=3), dry_run: bool = False) -> None:
83+
def remove_orphaned_files(self, older_than: Optional[timedelta] = None, dry_run: bool = False) -> None:
8184
"""Remove files which are not referenced in any metadata files of an Iceberg table and can thus be considered "orphaned".
8285
8386
Args:

tests/table/test_remove_orphans.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,6 @@ def test_remove_orphaned_files(catalog: Catalog) -> None:
7272
tbl.maintenance.remove_orphaned_files(dry_run=True)
7373
assert orphaned_file.exists()
7474

75-
# should not delete because it was just created...
76-
tbl.maintenance.remove_orphaned_files()
77-
assert orphaned_file.exists()
78-
7975
# modify creation date to be older than 3 days
8076
five_days_ago = (datetime.now() - timedelta(days=5)).timestamp()
8177
os.utime(orphaned_file, (five_days_ago, five_days_ago))

0 commit comments

Comments
 (0)