Skip to content

Commit bdc58c7

Browse files
committed
Introduce _validate_local_file_entry and refactor
1 parent 236cd06 commit bdc58c7

File tree

1 file changed

+58
-53
lines changed

1 file changed

+58
-53
lines changed

Lib/zipfile/__init__.py

Lines changed: 58 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,7 +1500,9 @@ def _calc_initial_entry_offset(self, fp, data_offset):
15001500
if self.debug > 2:
15011501
print('scanning file signatures before:', data_offset)
15021502
for pos in self._iter_scan_signature(fp, stringFileHeader, 0, data_offset):
1503-
if self._starts_consecutive_file_entries(fp, pos, data_offset, checked_offsets):
1503+
if self.debug > 2:
1504+
print('checking file signature at:', pos)
1505+
if self._validate_local_file_entry_sequence(fp, pos, data_offset, checked_offsets):
15041506
return data_offset - pos
15051507
return 0
15061508

@@ -1529,12 +1531,12 @@ def _iter_scan_signature(self, fp, signature, start_offset, end_offset, chunk_si
15291531
remainder = chunk[-(sig_len - 1):]
15301532
pos += read_size
15311533

1532-
def _starts_consecutive_file_entries(self, fp, start_offset, end_offset, checked_offsets):
1534+
def _validate_local_file_entry_sequence(self, fp, start_offset, end_offset, checked_offsets):
15331535
offset = start_offset
15341536

15351537
while offset < end_offset:
15361538
if self.debug > 2:
1537-
print('checking local file entry:', offset)
1539+
print('checking local file entry at:', offset)
15381540

15391541
# Cache checked offsets to improve performance by failing
15401542
# subsequent (possible) file entry offsets early. They are
@@ -1546,69 +1548,72 @@ def _starts_consecutive_file_entries(self, fp, start_offset, end_offset, checked
15461548
else:
15471549
checked_offsets.add(offset)
15481550

1549-
fp.seek(offset)
1550-
try:
1551-
fheader = self._read_local_file_header(fp)
1552-
except BadZipFile:
1551+
entry_size = self._validate_local_file_entry(fp, offset, end_offset)
1552+
if entry_size is None:
15531553
return False
1554+
offset += entry_size
15541555

1555-
# Create a dummy ZipInfo to utilize parsing.
1556-
# Flush only the required information.
1557-
zinfo = ZipInfo()
1558-
zinfo.header_offset = offset
1559-
zinfo.flag_bits = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS]
1560-
zinfo.compress_size = fheader[_FH_COMPRESSED_SIZE]
1561-
zinfo.file_size = fheader[_FH_UNCOMPRESSED_SIZE]
1562-
zinfo.CRC = fheader[_FH_CRC]
1563-
1564-
filename = fp.read(fheader[_FH_FILENAME_LENGTH])
1565-
zinfo.extra = fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1566-
pos = fp.tell()
1556+
return offset == end_offset
15671557

1568-
if pos > end_offset:
1569-
return False
1558+
def _validate_local_file_entry(self, fp, offset, end_offset):
1559+
fp.seek(offset)
1560+
try:
1561+
fheader = self._read_local_file_header(fp)
1562+
except BadZipFile:
1563+
return None
15701564

1571-
try:
1572-
zinfo._decodeExtra(crc32(filename)) # parse zip64
1573-
except BadZipFile:
1574-
return False
1565+
# Create a dummy ZipInfo to utilize parsing.
1566+
# Flush only the required information.
1567+
zinfo = ZipInfo()
1568+
zinfo.header_offset = offset
1569+
zinfo.flag_bits = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS]
1570+
zinfo.compress_size = fheader[_FH_COMPRESSED_SIZE]
1571+
zinfo.file_size = fheader[_FH_UNCOMPRESSED_SIZE]
1572+
zinfo.CRC = fheader[_FH_CRC]
15751573

1576-
data_descriptor_size = 0
1574+
filename = fp.read(fheader[_FH_FILENAME_LENGTH])
1575+
zinfo.extra = fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1576+
pos = fp.tell()
15771577

1578-
if zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
1579-
# According to the spec, these fields should be zero when data
1580-
# descriptor is used. Otherwise treat as a false positive on
1581-
# random bytes to return early, as scanning for data descriptor
1582-
# is rather intensive.
1583-
if not (zinfo.CRC == zinfo.compress_size == zinfo.file_size == 0):
1584-
return False
1578+
if pos > end_offset:
1579+
return None
15851580

1586-
zip64 = (
1587-
fheader[_FH_UNCOMPRESSED_SIZE] == 0xffffffff or
1588-
fheader[_FH_COMPRESSED_SIZE] == 0xffffffff
1589-
)
1581+
try:
1582+
zinfo._decodeExtra(crc32(filename)) # parse zip64
1583+
except BadZipFile:
1584+
return None
15901585

1591-
dd = self._scan_data_descriptor(fp, pos, end_offset, zip64)
1586+
data_descriptor_size = 0
15921587

1593-
if dd is None:
1594-
return False
1588+
if zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
1589+
# According to the spec, these fields should be zero when data
1590+
# descriptor is used. Otherwise treat as a false positive on
1591+
# random bytes to return early, as scanning for data descriptor
1592+
# is rather intensive.
1593+
if not (zinfo.CRC == zinfo.compress_size == zinfo.file_size == 0):
1594+
return None
1595+
1596+
zip64 = (
1597+
fheader[_FH_UNCOMPRESSED_SIZE] == 0xffffffff or
1598+
fheader[_FH_COMPRESSED_SIZE] == 0xffffffff
1599+
)
15951600

1596-
crc, compress_size, file_size, data_descriptor_size = dd
1597-
zinfo.CRC = crc
1598-
zinfo.compress_size = compress_size
1599-
zinfo.file_size = file_size
1601+
dd = self._scan_data_descriptor(fp, pos, end_offset, zip64)
16001602

1601-
offset += (
1602-
sizeFileHeader +
1603-
fheader[_FH_FILENAME_LENGTH] + fheader[_FH_EXTRA_FIELD_LENGTH] +
1604-
zinfo.compress_size +
1605-
data_descriptor_size
1606-
)
1603+
if dd is None:
1604+
return None
16071605

1608-
if self.debug > 2:
1609-
print('next', offset)
1606+
crc, compress_size, file_size, data_descriptor_size = dd
1607+
zinfo.CRC = crc
1608+
zinfo.compress_size = compress_size
1609+
zinfo.file_size = file_size
16101610

1611-
return offset == end_offset
1611+
return (
1612+
sizeFileHeader +
1613+
fheader[_FH_FILENAME_LENGTH] + fheader[_FH_EXTRA_FIELD_LENGTH] +
1614+
zinfo.compress_size +
1615+
data_descriptor_size
1616+
)
16121617

16131618
def _read_local_file_header(self, fp):
16141619
fheader = fp.read(sizeFileHeader)

0 commit comments

Comments
 (0)