Skip to content

Commit 236cd06

Browse files
committed
Optimize _calc_initial_entry_offset by introducing cache
1 parent a9e85c6 commit 236cd06

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

Lib/test/test_zipfile/test_core.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,6 +1787,8 @@ def test_repack_file_entry_before_first_file(self):
17871787
with open(TESTFN, 'wb') as fh:
17881788
with zipfile.ZipFile(fh, 'w') as zh:
17891789
zh.writestr('file.txt', b'dummy')
1790+
zh.writestr('file2.txt', b'dummy')
1791+
zh.writestr('file3.txt', b'dummy')
17901792
fh.write(b' ')
17911793
expected_zinfos = self._prepare_zip_from_test_files(fh, test_files)
17921794
expected_size = os.path.getsize(TESTFN)
@@ -1795,6 +1797,8 @@ def test_repack_file_entry_before_first_file(self):
17951797
with open(TESTFN, 'wb') as fh:
17961798
with zipfile.ZipFile(fh, 'w') as zh:
17971799
zh.writestr('file.txt', b'dummy')
1800+
zh.writestr('file2.txt', b'dummy')
1801+
zh.writestr('file3.txt', b'dummy')
17981802
fh.write(b' ')
17991803
zinfos = self._prepare_zip_from_test_files(fh, self.test_files)
18001804
with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh:

Lib/zipfile/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,11 +1495,12 @@ def _repack(self, zfile, *, chunk_size=2**20):
14951495
zfile._didModify = True
14961496

14971497
def _calc_initial_entry_offset(self, fp, data_offset):
1498+
checked_offsets = set()
14981499
if data_offset > 0:
14991500
if self.debug > 2:
15001501
print('scanning file signatures before:', data_offset)
15011502
for pos in self._iter_scan_signature(fp, stringFileHeader, 0, data_offset):
1502-
if self._starts_consecutive_file_entries(fp, pos, data_offset):
1503+
if self._starts_consecutive_file_entries(fp, pos, data_offset, checked_offsets):
15031504
return data_offset - pos
15041505
return 0
15051506

@@ -1528,13 +1529,23 @@ def _iter_scan_signature(self, fp, signature, start_offset, end_offset, chunk_si
15281529
remainder = chunk[-(sig_len - 1):]
15291530
pos += read_size
15301531

1531-
def _starts_consecutive_file_entries(self, fp, start_offset, end_offset):
1532+
def _starts_consecutive_file_entries(self, fp, start_offset, end_offset, checked_offsets):
15321533
offset = start_offset
15331534

15341535
while offset < end_offset:
15351536
if self.debug > 2:
15361537
print('checking local file entry:', offset)
15371538

1539+
# Cache checked offsets to improve performance by failing
1540+
# subsequent (possible) file entry offsets early. They are
1541+
# rechecked only when proven false eventually.
1542+
if offset in checked_offsets:
1543+
if self.debug > 2:
1544+
print('skipping checked:', offset)
1545+
return False
1546+
else:
1547+
checked_offsets.add(offset)
1548+
15381549
fp.seek(offset)
15391550
try:
15401551
fheader = self._read_local_file_header(fp)

0 commit comments

Comments
 (0)