@@ -1500,7 +1500,9 @@ def _calc_initial_entry_offset(self, fp, data_offset):
15001500 if self .debug > 2 :
15011501 print ('scanning file signatures before:' , data_offset )
15021502 for pos in self ._iter_scan_signature (fp , stringFileHeader , 0 , data_offset ):
1503- if self ._starts_consecutive_file_entries (fp , pos , data_offset , checked_offsets ):
1503+ if self .debug > 2 :
1504+ print ('checking file signature at:' , pos )
1505+ if self ._validate_local_file_entry_sequence (fp , pos , data_offset , checked_offsets ):
15041506 return data_offset - pos
15051507 return 0
15061508
@@ -1529,12 +1531,12 @@ def _iter_scan_signature(self, fp, signature, start_offset, end_offset, chunk_si
15291531 remainder = chunk [- (sig_len - 1 ):]
15301532 pos += read_size
15311533
1532- def _starts_consecutive_file_entries (self , fp , start_offset , end_offset , checked_offsets ):
1534+ def _validate_local_file_entry_sequence (self , fp , start_offset , end_offset , checked_offsets ):
15331535 offset = start_offset
15341536
15351537 while offset < end_offset :
15361538 if self .debug > 2 :
1537- print ('checking local file entry:' , offset )
1539+ print ('checking local file entry at :' , offset )
15381540
15391541 # Cache checked offsets to improve performance by failing
15401542 # subsequent (possible) file entry offsets early. They are
@@ -1546,69 +1548,72 @@ def _starts_consecutive_file_entries(self, fp, start_offset, end_offset, checked
15461548 else :
15471549 checked_offsets .add (offset )
15481550
1549- fp .seek (offset )
1550- try :
1551- fheader = self ._read_local_file_header (fp )
1552- except BadZipFile :
1551+ entry_size = self ._validate_local_file_entry (fp , offset , end_offset )
1552+ if entry_size is None :
15531553 return False
1554+ offset += entry_size
15541555
1555- # Create a dummy ZipInfo to utilize parsing.
1556- # Flush only the required information.
1557- zinfo = ZipInfo ()
1558- zinfo .header_offset = offset
1559- zinfo .flag_bits = fheader [_FH_GENERAL_PURPOSE_FLAG_BITS ]
1560- zinfo .compress_size = fheader [_FH_COMPRESSED_SIZE ]
1561- zinfo .file_size = fheader [_FH_UNCOMPRESSED_SIZE ]
1562- zinfo .CRC = fheader [_FH_CRC ]
1563-
1564- filename = fp .read (fheader [_FH_FILENAME_LENGTH ])
1565- zinfo .extra = fp .read (fheader [_FH_EXTRA_FIELD_LENGTH ])
1566- pos = fp .tell ()
1556+ return offset == end_offset
15671557
1568- if pos > end_offset :
1569- return False
1558+ def _validate_local_file_entry (self , fp , offset , end_offset ):
1559+ fp .seek (offset )
1560+ try :
1561+ fheader = self ._read_local_file_header (fp )
1562+ except BadZipFile :
1563+ return None
15701564
1571- try :
1572- zinfo ._decodeExtra (crc32 (filename )) # parse zip64
1573- except BadZipFile :
1574- return False
1565+ # Create a dummy ZipInfo to utilize parsing.
1566+ # Flush only the required information.
1567+ zinfo = ZipInfo ()
1568+ zinfo .header_offset = offset
1569+ zinfo .flag_bits = fheader [_FH_GENERAL_PURPOSE_FLAG_BITS ]
1570+ zinfo .compress_size = fheader [_FH_COMPRESSED_SIZE ]
1571+ zinfo .file_size = fheader [_FH_UNCOMPRESSED_SIZE ]
1572+ zinfo .CRC = fheader [_FH_CRC ]
15751573
1576- data_descriptor_size = 0
1574+ filename = fp .read (fheader [_FH_FILENAME_LENGTH ])
1575+ zinfo .extra = fp .read (fheader [_FH_EXTRA_FIELD_LENGTH ])
1576+ pos = fp .tell ()
15771577
1578- if zinfo .flag_bits & _MASK_USE_DATA_DESCRIPTOR :
1579- # According to the spec, these fields should be zero when data
1580- # descriptor is used. Otherwise treat as a false positive on
1581- # random bytes to return early, as scanning for data descriptor
1582- # is rather intensive.
1583- if not (zinfo .CRC == zinfo .compress_size == zinfo .file_size == 0 ):
1584- return False
1578+ if pos > end_offset :
1579+ return None
15851580
1586- zip64 = (
1587- fheader [ _FH_UNCOMPRESSED_SIZE ] == 0xffffffff or
1588- fheader [ _FH_COMPRESSED_SIZE ] == 0xffffffff
1589- )
1581+ try :
1582+ zinfo . _decodeExtra ( crc32 ( filename )) # parse zip64
1583+ except BadZipFile :
1584+ return None
15901585
1591- dd = self . _scan_data_descriptor ( fp , pos , end_offset , zip64 )
1586+ data_descriptor_size = 0
15921587
1593- if dd is None :
1594- return False
1588+ if zinfo .flag_bits & _MASK_USE_DATA_DESCRIPTOR :
1589+ # According to the spec, these fields should be zero when data
1590+ # descriptor is used. Otherwise treat as a false positive on
1591+ # random bytes to return early, as scanning for data descriptor
1592+ # is rather intensive.
1593+ if not (zinfo .CRC == zinfo .compress_size == zinfo .file_size == 0 ):
1594+ return None
1595+
1596+ zip64 = (
1597+ fheader [_FH_UNCOMPRESSED_SIZE ] == 0xffffffff or
1598+ fheader [_FH_COMPRESSED_SIZE ] == 0xffffffff
1599+ )
15951600
1596- crc , compress_size , file_size , data_descriptor_size = dd
1597- zinfo .CRC = crc
1598- zinfo .compress_size = compress_size
1599- zinfo .file_size = file_size
1601+ dd = self ._scan_data_descriptor (fp , pos , end_offset , zip64 )
16001602
1601- offset += (
1602- sizeFileHeader +
1603- fheader [_FH_FILENAME_LENGTH ] + fheader [_FH_EXTRA_FIELD_LENGTH ] +
1604- zinfo .compress_size +
1605- data_descriptor_size
1606- )
1603+ if dd is None :
1604+ return None
16071605
1608- if self .debug > 2 :
1609- print ('next' , offset )
1606+ crc , compress_size , file_size , data_descriptor_size = dd
1607+ zinfo .CRC = crc
1608+ zinfo .compress_size = compress_size
1609+ zinfo .file_size = file_size
16101610
1611- return offset == end_offset
1611+ return (
1612+ sizeFileHeader +
1613+ fheader [_FH_FILENAME_LENGTH ] + fheader [_FH_EXTRA_FIELD_LENGTH ] +
1614+ zinfo .compress_size +
1615+ data_descriptor_size
1616+ )
16121617
16131618 def _read_local_file_header (self , fp ):
16141619 fheader = fp .read (sizeFileHeader )
0 commit comments