Skip to content

Commit 446277e

Browse files
fix: Skip filepath checksum on insert when file exceeds size limit
When `filepath_checksum_size_limit` is set, skip checksum computation for files larger than the limit during upload/insert. This prevents transaction timeouts when inserting large files with filepath attributes. The same config option was already used for download checksum skipping. Now it applies to uploads as well. When checksum is skipped: - A warning is logged - contents_hash is stored as NULL - Existing file verification is bypassed Fixes #1386 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 5d644c9 commit 446277e

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

datajoint/external.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -276,13 +276,25 @@ def upload_filepath(self, local_filepath):
276276
uuid = uuid_from_buffer(
277277
init_string=relative_filepath
278278
) # hash relative path, not contents
279-
contents_hash = uuid_from_file(local_filepath)
279+
280+
# Check if checksum should be skipped based on file size limit
281+
file_size = Path(local_filepath).stat().st_size
282+
size_limit = config.get("filepath_checksum_size_limit")
283+
skip_checksum = size_limit is not None and file_size > size_limit
284+
285+
if skip_checksum:
286+
contents_hash = None
287+
logger.warning(
288+
f"Skipping checksum for '{relative_filepath}' ({file_size} bytes > {size_limit} byte limit)"
289+
)
290+
else:
291+
contents_hash = uuid_from_file(local_filepath)
280292

281293
# check if the remote file already exists and verify that it matches
282294
check_hash = (self & {"hash": uuid}).fetch("contents_hash")
283295
if check_hash.size:
284296
# the tracking entry exists, check that it's the same file as before
285-
if contents_hash != check_hash[0]:
297+
if not skip_checksum and contents_hash != check_hash[0]:
286298
raise DataJointError(
287299
f"A different version of '{relative_filepath}' has already been placed."
288300
)
@@ -291,15 +303,15 @@ def upload_filepath(self, local_filepath):
291303
self._upload_file(
292304
local_filepath,
293305
self._make_external_filepath(relative_filepath),
294-
metadata={"contents_hash": str(contents_hash)},
306+
metadata={"contents_hash": str(contents_hash) if contents_hash else ""},
295307
)
296308
self.connection.query(
297309
"INSERT INTO {tab} (hash, size, filepath, contents_hash) VALUES (%s, {size}, '{filepath}', %s)".format(
298310
tab=self.full_table_name,
299-
size=Path(local_filepath).stat().st_size,
311+
size=file_size,
300312
filepath=relative_filepath,
301313
),
302-
args=(uuid.bytes, contents_hash.bytes),
314+
args=(uuid.bytes, contents_hash.bytes if contents_hash else None),
303315
)
304316
return uuid
305317

0 commit comments

Comments
 (0)