Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,4 @@ poetry install
poetry run mkdocs serve
```

This will start a local server at `http://127.0.0.1:8000/dapi/` where you can view the documentation.

### API docs
To generate API docs:

```
pdoc --html --output-dir api-docs dapi --force
```
This will start a local server at `http://127.0.0.1:8000/dapi/` where you can view the documentation.
67 changes: 51 additions & 16 deletions dapi/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,36 @@
from typing import List


def _safe_quote(path: str) -> str:
"""Safely URL-encode a path, avoiding double encoding.

Args:
path (str): The path to encode

Returns:
str: URL-encoded path

Example:
>>> _safe_quote("folder with spaces")
'folder%20with%20spaces'
>>> _safe_quote("folder%20with%20spaces") # Already encoded
'folder%20with%20spaces'
"""
# Check if the path appears to be already URL-encoded
# by trying to decode it and seeing if it changes
try:
decoded = urllib.parse.unquote(path)
if decoded != path:
# Path was URL-encoded, return as-is to avoid double encoding
return path
else:
# Path was not URL-encoded, encode it
return urllib.parse.quote(path)
except Exception:
# If there's any error in decoding, just encode the original path
return urllib.parse.quote(path)


# _parse_tapis_uri helper remains the same
def _parse_tapis_uri(tapis_uri: str) -> (str, str):
"""Parse a Tapis URI into system ID and path components.
Expand All @@ -19,7 +49,7 @@ def _parse_tapis_uri(tapis_uri: str) -> (str, str):
tapis_uri (str): URI in the format 'tapis://system_id/path'.

Returns:
tuple: A tuple containing (system_id, path) where path is URL-decoded.
tuple: A tuple containing (system_id, path).

Raises:
ValueError: If the URI format is invalid or missing required components.
Expand Down Expand Up @@ -190,8 +220,7 @@ def get_ds_path_uri(t: Tapis, path: str, verify_exists: bool = False) -> str:
)
else:
tapis_path = path_remainder
encoded_path = urllib.parse.quote(tapis_path)
input_uri = f"tapis://{storage_system_id}/{encoded_path}"
input_uri = f"tapis://{storage_system_id}/{tapis_path}"
print(f"Translated '{path}' to '{input_uri}' using t.username")
break # Found match, exit loop

Expand All @@ -206,8 +235,7 @@ def get_ds_path_uri(t: Tapis, path: str, verify_exists: bool = False) -> str:
if pattern in path:
path_remainder = path.split(pattern, 1)[1].lstrip("/")
tapis_path = path_remainder
encoded_path = urllib.parse.quote(tapis_path)
input_uri = f"tapis://{storage_system_id}/{encoded_path}"
input_uri = f"tapis://{storage_system_id}/{tapis_path}"
print(f"Translated '{path}' to '{input_uri}'")
break # Found match, exit loop

Expand Down Expand Up @@ -295,8 +323,7 @@ def get_ds_path_uri(t: Tapis, path: str, verify_exists: bool = False) -> str:
f"Could not resolve project ID '{project_id_part}' to a Tapis system ID."
)

encoded_path_within_project = urllib.parse.quote(path_within_project)
input_uri = f"tapis://{found_system_id}/{encoded_path_within_project}"
input_uri = f"tapis://{found_system_id}/{path_within_project}"
print(f"Translated '{path}' to '{input_uri}' using Tapis v3 lookup")
break # Found match, exit loop

Expand All @@ -316,26 +343,26 @@ def get_ds_path_uri(t: Tapis, path: str, verify_exists: bool = False) -> str:
print(f"Verifying existence of translated path: {input_uri}")
try:
system_id, remote_path = _parse_tapis_uri(input_uri)
# Decode the path part for the listFiles call, as it expects unencoded paths
decoded_remote_path = urllib.parse.unquote(remote_path)
print(f"Checking system '{system_id}' for path '{decoded_remote_path}'...")
# The Tapis API expects URL-encoded paths when they contain spaces or special characters
encoded_remote_path = _safe_quote(remote_path)
print(f"Checking system '{system_id}' for path '{remote_path}'...")
# Use limit=1 for efficiency, we only care if it *exists*
# Note: listFiles might return successfully for the *parent* directory
# if the final component doesn't exist. A more robust check might
# involve checking the result count or specific item name, but this
# basic check catches non-existent parent directories.
t.files.listFiles(systemId=system_id, path=decoded_remote_path, limit=1)
t.files.listFiles(systemId=system_id, path=encoded_remote_path, limit=1)
print(f"Verification successful: Path exists.")
except BaseTapyException as e:
# Specifically check for 404 on the listFiles call
if hasattr(e, "response") and e.response and e.response.status_code == 404:
raise FileOperationError(
f"Verification failed: Path '{decoded_remote_path}' does not exist on system '{system_id}'. Translated URI: {input_uri}"
f"Verification failed: Path '{remote_path}' does not exist on system '{system_id}'. Translated URI: {input_uri}"
) from e
else:
# Re-raise other Tapis errors encountered during verification
raise FileOperationError(
f"Verification error for path '{decoded_remote_path}' on system '{system_id}': {e}"
f"Verification error for path '{remote_path}' on system '{system_id}': {e}"
) from e
except (
ValueError
Expand Down Expand Up @@ -379,8 +406,12 @@ def upload_file(t: Tapis, local_path: str, remote_uri: str):
print(
f"Uploading '{local_path}' to system '{system_id}' at path '{dest_path}'..."
)
# URL-encode the destination path for API call
encoded_dest_path = _safe_quote(dest_path)
t.upload(
system_id=system_id, source_file_path=local_path, dest_file_path=dest_path
system_id=system_id,
source_file_path=local_path,
dest_file_path=encoded_dest_path,
)
print("Upload complete.")
except BaseTapyException as e:
Expand Down Expand Up @@ -424,8 +455,10 @@ def download_file(t: Tapis, remote_uri: str, local_path: str):
os.makedirs(local_dir, exist_ok=True)
# Use getContents which returns the raw bytes
# Set stream=True for potentially large files
# URL-encode the source path for API call
encoded_source_path = _safe_quote(source_path)
response = t.files.getContents(
systemId=system_id, path=source_path, stream=True
systemId=system_id, path=encoded_source_path, stream=True
)

# Write the streamed content to the local file
Expand Down Expand Up @@ -477,8 +510,10 @@ def list_files(
try:
system_id, path = _parse_tapis_uri(remote_uri)
print(f"Listing files in system '{system_id}' at path '{path}'...")
# URL-encode the path for API call
encoded_path = _safe_quote(path)
results = t.files.listFiles(
systemId=system_id, path=path, limit=limit, offset=offset
systemId=system_id, path=encoded_path, limit=limit, offset=offset
)
print(f"Found {len(results)} items.")
return results
Expand Down
10 changes: 3 additions & 7 deletions dapi/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,9 +1007,7 @@ def archive_uri(self) -> Optional[str]:
details = self._get_details()
if details.archiveSystemId and details.archiveSystemDir:
archive_path = details.archiveSystemDir.lstrip("/")
return (
f"tapis://{details.archiveSystemId}/{urllib.parse.quote(archive_path)}"
)
return f"tapis://{details.archiveSystemId}/{archive_path}"
return None

def list_outputs(
Expand Down Expand Up @@ -1048,7 +1046,7 @@ def list_outputs(
full_archive_path = os.path.join(details.archiveSystemDir, path.lstrip("/"))
full_archive_path = os.path.normpath(full_archive_path).lstrip("/")
try:
archive_base_uri = f"tapis://{details.archiveSystemId}/{urllib.parse.quote(full_archive_path)}"
archive_base_uri = f"tapis://{details.archiveSystemId}/{full_archive_path}"
from .files import list_files

return list_files(self._tapis, archive_base_uri, limit=limit, offset=offset)
Expand Down Expand Up @@ -1084,9 +1082,7 @@ def download_output(self, remote_path: str, local_target: str):
details.archiveSystemDir, remote_path.lstrip("/")
)
full_archive_path = os.path.normpath(full_archive_path).lstrip("/")
remote_uri = (
f"tapis://{details.archiveSystemId}/{urllib.parse.quote(full_archive_path)}"
)
remote_uri = f"tapis://{details.archiveSystemId}/{full_archive_path}"
try:
from .files import download_file

Expand Down
Loading