Skip to content

Commit 2ed605a

Browse files
committed
enable hf filesystem
1 parent 284fba4 commit 2ed605a

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

pyiceberg/io/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@
9292
GCS_SERVICE_HOST = "gcs.service.host"
9393
GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
9494
GCS_VERSION_AWARE = "gcs.version-aware"
95+
HF_ENDPOINT = "hf.endpoint"
96+
HF_TOKEN = "hf.token"
9597
PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
9698

9799

@@ -306,6 +308,7 @@ def delete(self, location: Union[str, InputFile, OutputFile]) -> None:
306308
"viewfs": [ARROW_FILE_IO],
307309
"abfs": [FSSPEC_FILE_IO],
308310
"abfss": [FSSPEC_FILE_IO],
311+
"hf": [FSSPEC_FILE_IO],
309312
}
310313

311314

pyiceberg/io/fsspec.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@
5959
GCS_SESSION_KWARGS,
6060
GCS_TOKEN,
6161
GCS_VERSION_AWARE,
62+
HF_ENDPOINT,
63+
HF_TOKEN,
6264
S3_ACCESS_KEY_ID,
6365
S3_CONNECT_TIMEOUT,
6466
S3_ENDPOINT,
@@ -208,6 +210,15 @@ def _adls(properties: Properties) -> AbstractFileSystem:
208210
)
209211

210212

213+
def _hf(properties: Properties) -> AbstractFileSystem:
214+
from huggingface_hub import HfFileSystem
215+
216+
return HfFileSystem(
217+
endpoint=properties.get(HF_ENDPOINT),
218+
token=properties.get(HF_TOKEN),
219+
)
220+
221+
211222
SCHEME_TO_FS = {
212223
"": _file,
213224
"file": _file,
@@ -218,6 +229,7 @@ def _adls(properties: Properties) -> AbstractFileSystem:
218229
"abfss": _adls,
219230
"gs": _gs,
220231
"gcs": _gs,
232+
"hf": _hf,
221233
}
222234

223235

0 commit comments

Comments
 (0)