Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions datajoint/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,11 +300,36 @@ def upload_filepath(self, local_filepath):
)
else:
# upload the file and create its tracking entry
self._upload_file(
local_filepath,
self._make_external_filepath(relative_filepath),
metadata={"contents_hash": str(contents_hash) if contents_hash else ""},
)
external_path = self._make_external_filepath(relative_filepath)
already_uploaded = False
if self.spec["protocol"] == "s3":
stat = self.s3.stat(str(external_path))
if stat is not None and stat.size == file_size:
# Verify contents_hash from S3 metadata when available
if skip_checksum:
already_uploaded = True
else:
remote_meta = {
k.lower().lstrip("x-amz-meta-"): v
for k, v in (stat.metadata or {}).items()
}
remote_hash = remote_meta.get("contents_hash", "")
if remote_hash == str(contents_hash):
already_uploaded = True
if already_uploaded:
logger.info(
f"File already exists on S3 with matching size"
f"{'' if skip_checksum else ' and checksum'}"
f", skipping upload: '{relative_filepath}'"
)
if not already_uploaded:
self._upload_file(
local_filepath,
external_path,
metadata={
"contents_hash": str(contents_hash) if contents_hash else ""
},
)
self.connection.query(
"INSERT INTO {tab} (hash, size, filepath, contents_hash) VALUES (%s, {size}, '{filepath}', %s)".format(
tab=self.full_table_name,
Expand Down
17 changes: 10 additions & 7 deletions datajoint/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,19 @@ def fget(self, name, local_filepath):
if "contents_hash" in meta:
return uuid.UUID(meta["contents_hash"])

def exists(self, name):
logger.debug("exists: {}:{}".format(self.bucket, name))
def stat(self, name):
"""Return stat result for an object, or None if it does not exist."""
logger.debug("stat: {}:{}".format(self.bucket, name))
try:
self.client.stat_object(self.bucket, str(name))
return self.client.stat_object(self.bucket, str(name))
except minio.error.S3Error as e:
if e.code == "NoSuchKey":
return False
else:
raise e
return True
return None
raise e

def exists(self, name):
logger.debug("exists: {}:{}".format(self.bucket, name))
return self.stat(name) is not None

def get_size(self, name):
logger.debug("get_size: {}:{}".format(self.bucket, name))
Expand Down
Loading