From 522fa8a8dcba1bf0db4e2e1ad6757819ea478bca Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Mon, 10 Mar 2014 15:30:01 +0200 Subject: [PATCH 01/13] add unit tests for taking input from file-like objects --- tests/test_zipstream.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/test_zipstream.py b/tests/test_zipstream.py index 56e1ef2..d81b69a 100644 --- a/tests/test_zipstream.py +++ b/tests/test_zipstream.py @@ -6,6 +6,7 @@ import unittest import zipstream import zipfile +import socket class ZipInfoTestCase(unittest.TestCase): @@ -54,6 +55,42 @@ def test_write_file(self): os.remove(f.name) + def test_write_fp(self): + z = zipstream.ZipFile(mode='w') + for fileobj in self.fileobjs: + z.write(fileobj) + + f = tempfile.NamedTemporaryFile(suffix='zip', delete=False) + for chunk in z: + f.write(chunk) + f.close() + + z2 = zipfile.ZipFile(f.name, 'r') + z2.testzip() + + os.remove(f.name) + + @unittest.skipUnless(os.name == "posix", "requires POSIX") + def test_write_socket(self): + z = zipstream.ZipFile(mode='w') + s, c = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) + try: + s.send("FILE CONTENTS") + z.write(c.makefile()) + s.close() + + f = tempfile.NamedTemporaryFile(suffix='zip', delete=False) + for chunk in z: + f.write(chunk) + f.close() + + z2 = zipfile.ZipFile(f.name, 'r') + z2.testzip() + + os.remove(f.name) + finally: + c.close() + if __name__ == '__main__': unittest.main() \ No newline at end of file From 27ea172688ec7e0841ec3a0bb29da4395ed1d7d8 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Mon, 10 Mar 2014 15:30:31 +0200 Subject: [PATCH 02/13] support file-like objects as well as pathnames --- zipstream/__init__.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index 811e2cf..a0d8e9f 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -215,7 +215,16 @@ def __write(self, filename, arcname=None, compress_type=None): raise RuntimeError( "Attempt to write to ZIP archive that was already closed") - st = os.stat(filename) + fp = None + if hasattr(filename, 'fileno'): + fp = filename + st = os.fstat(filename.fileno()) + if arcname is None: + arcname = '' + else: + fp = open(filename, 'rb') + st = os.stat(filename) + isdir = stat.S_ISDIR(st.st_mode) mtime = time.localtime(st.st_mtime) date_time = mtime[0:6] @@ -255,7 +264,7 @@ def __write(self, filename, arcname=None, compress_type=None): return cmpr = _get_compressor(zinfo.compress_type) - with open(filename, 'rb') as fp: + with fp: # Must overwrite CRC and sizes with correct data later zinfo.CRC = CRC = 0 zinfo.compress_size = compress_size = 0 From cd9f4618875b1d3e2350602811c9b3351af12361 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Mon, 10 Mar 2014 15:30:55 +0200 Subject: [PATCH 03/13] make sure no file has an unsupported mtime --- zipstream/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index a0d8e9f..5e22df4 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -227,6 +227,8 @@ def __write(self, filename, arcname=None, compress_type=None): isdir = stat.S_ISDIR(st.st_mode) mtime = time.localtime(st.st_mtime) + if (mtime.tm_year < 1980): + mtime = time.localtime() date_time = mtime[0:6] # Create ZipInfo instance to store file information if arcname is None: From e0152aa06a8d4dd86b1be98129bc2f882b3d1704 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Mon, 10 Mar 2014 17:18:18 +0200 Subject: [PATCH 04/13] support pseudo-files that don't have fileno --- zipstream/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index 5e22df4..fe1d4d7 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -216,9 +216,12 @@ def __write(self, filename, arcname=None, compress_type=None): "Attempt to write to ZIP archive that was already closed") fp = None - if hasattr(filename, 'fileno'): + if hasattr(filename, 'read'): fp = filename - st = os.fstat(filename.fileno()) + if hasattr(filename, 'fileno'): + st = os.fstat(filename.fileno()) + else: + st = os.fstat(0) if arcname is None: arcname = '' else: From aee13e0b617b433b2fb4453e19c883f32cac1b53 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Tue, 20 May 2014 15:09:07 +0300 Subject: [PATCH 05/13] be more resilient when stating pseudo files --- zipstream/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index fe1d4d7..801e7f7 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -218,9 +218,9 @@ def __write(self, filename, arcname=None, compress_type=None): fp = None if hasattr(filename, 'read'): fp = filename - if hasattr(filename, 'fileno'): + try: st = os.fstat(filename.fileno()) - else: + except Exception: st = os.fstat(0) if arcname is None: arcname = '' From b5719152418303e9b8f8b10368ef3a1794815fa2 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Fri, 23 May 2014 12:08:32 +0300 Subject: [PATCH 06/13] encode string to bytes for py3 --- tests/test_zipstream.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_zipstream.py b/tests/test_zipstream.py index d81b69a..17b9074 100644 --- a/tests/test_zipstream.py +++ b/tests/test_zipstream.py @@ -75,7 +75,8 @@ def test_write_socket(self): z = zipstream.ZipFile(mode='w') s, c = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) try: - s.send("FILE CONTENTS") + txt = "FILE CONTENTS" + s.send(txt.encode("utf-8")) z.write(c.makefile()) s.close() From 24805fb4b1e39e8fa8fe8d18edc136effd2f5a16 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Fri, 23 May 2014 12:14:12 +0300 Subject: [PATCH 07/13] don't use unittest.skipUnless --- tests/test_zipstream.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/test_zipstream.py b/tests/test_zipstream.py index 17b9074..8b66fef 100644 --- a/tests/test_zipstream.py +++ b/tests/test_zipstream.py @@ -7,6 +7,17 @@ import zipstream import zipfile import socket +import functools +from nose.plugins.skip import SkipTest + + +def skipIfNotPosix(f): + @functools.wraps(f) + def wrapper(*args, **kwargs): + if os.name == "posix": + return f(*args, **kwargs) + raise SkipTest("requires POSIX") + return wrapper class ZipInfoTestCase(unittest.TestCase): @@ -70,7 +81,7 @@ def test_write_fp(self): os.remove(f.name) - @unittest.skipUnless(os.name == "posix", "requires POSIX") + @skipIfNotPosix def test_write_socket(self): z = zipstream.ZipFile(mode='w') s, c = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) From c1e73278733c7acd70b97af901abb8f0273109c2 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Fri, 23 May 2014 12:29:19 +0300 Subject: [PATCH 08/13] more python3 bytes fixing --- zipstream/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index 801e7f7..53124e3 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -282,6 +282,7 @@ def __write(self, filename, arcname=None, compress_type=None): buf = fp.read(1024 * 8) if not buf: break + buf = buf.encode('utf-8') # ensure we have bytes file_size = file_size + len(buf) CRC = crc32(buf, CRC) & 0xffffffff if cmpr: From bf0ff4d3a4b4ca49c20e8f6132c45b1a030f4bc4 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Fri, 23 May 2014 13:19:05 +0300 Subject: [PATCH 09/13] better fix for python 3 string encoding problems --- tests/test_zipstream.py | 8 ++++++-- zipstream/__init__.py | 1 - 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_zipstream.py b/tests/test_zipstream.py index 8b66fef..47a603b 100644 --- a/tests/test_zipstream.py +++ b/tests/test_zipstream.py @@ -87,8 +87,12 @@ def test_write_socket(self): s, c = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) try: txt = "FILE CONTENTS" - s.send(txt.encode("utf-8")) - z.write(c.makefile()) + s.send(txt.encode("ascii")) + try: + inf = c.makefile(mode='b') + except TypeError: + inf = c.makefile() + z.write(inf) s.close() f = tempfile.NamedTemporaryFile(suffix='zip', delete=False) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index 53124e3..801e7f7 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -282,7 +282,6 @@ def __write(self, filename, arcname=None, compress_type=None): buf = fp.read(1024 * 8) if not buf: break - buf = buf.encode('utf-8') # ensure we have bytes file_size = file_size + len(buf) CRC = crc32(buf, CRC) & 0xffffffff if cmpr: From 189e0e58225238d249aee78e0beeb59e7a7eca04 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Thu, 19 Jun 2014 17:08:36 +0300 Subject: [PATCH 10/13] Support specifying file size before hand This solves the problem that some tools don't display the correct size when listing the files of the archive, but also fixes zip64 support. --- zipstream/__init__.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index 801e7f7..8e6cb8d 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -208,7 +208,7 @@ def write(self, filename, arcname=None, compress_type=None): ((filename, ), {'arcname': arcname, 'compress_type': compress_type}), ) - def __write(self, filename, arcname=None, compress_type=None): + def __write(self, filename, arcname=None, compress_type=None, size=None): """Put the bytes from filename into the archive under the name arcname.""" if not self.fp: @@ -248,7 +248,7 @@ def __write(self, filename, arcname=None, compress_type=None): else: zinfo.compress_type = compress_type - zinfo.file_size = st.st_size + zinfo.file_size = size or st.st_size zinfo.flag_bits = 0x00 zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor zinfo.header_offset = self.fp.tell() # Start of header bytes @@ -279,7 +279,12 @@ def __write(self, filename, arcname=None, compress_type=None): yield self.fp.write(zinfo.FileHeader(zip64)) file_size = 0 while 1: - buf = fp.read(1024 * 8) + sz = 1024 * 8 + if zinfo.file_size > 0: # known size, read only that much + if zinfo.file_size == file_size: + break + sz = min(zinfo.file_size - file_size, sz) + buf = fp.read(sz) if not buf: break file_size = file_size + len(buf) @@ -296,6 +301,8 @@ def __write(self, filename, arcname=None, compress_type=None): else: zinfo.compress_size = file_size zinfo.CRC = CRC + if zinfo.file_size > 0 and zinfo.file_size != file_size: + raise RuntimeError('File size changed during compressing') zinfo.file_size = file_size if not zip64 and self._allowZip64: if file_size > ZIP64_LIMIT: From bf4fd2082cd1daf90d0359841a5e3368fb61a233 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Thu, 19 Jun 2014 18:24:39 +0300 Subject: [PATCH 11/13] no need to overwrite file_size if specified --- zipstream/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index 8e6cb8d..28be245 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -303,7 +303,8 @@ def __write(self, filename, arcname=None, compress_type=None, size=None): zinfo.CRC = CRC if zinfo.file_size > 0 and zinfo.file_size != file_size: raise RuntimeError('File size changed during compressing') - zinfo.file_size = file_size + else: + zinfo.file_size = file_size if not zip64 and self._allowZip64: if file_size > ZIP64_LIMIT: raise RuntimeError('File size has increased during compressing') From 213abba20b282676bcc152bba31601f42ee76513 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Mon, 8 Sep 2014 17:40:49 +0300 Subject: [PATCH 12/13] change mode b to rb for pypy compatibility --- tests/test_zipstream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_zipstream.py b/tests/test_zipstream.py index 47a603b..374f0df 100644 --- a/tests/test_zipstream.py +++ b/tests/test_zipstream.py @@ -89,7 +89,7 @@ def test_write_socket(self): txt = "FILE CONTENTS" s.send(txt.encode("ascii")) try: - inf = c.makefile(mode='b') + inf = c.makefile(mode='rb') except TypeError: inf = c.makefile() z.write(inf) @@ -109,4 +109,4 @@ def test_write_socket(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() From 452c5da041f376239b5c7b5bc911af6c3f39e8e3 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Tue, 9 Sep 2014 12:49:31 +0300 Subject: [PATCH 13/13] refactor stream input method, create separate API --- tests/test_zipstream.py | 35 ++++++++++++++++++++++++++++++-- zipstream/__init__.py | 44 ++++++++++++++++++++++++++++------------- 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/tests/test_zipstream.py b/tests/test_zipstream.py index 374f0df..82bc2a6 100644 --- a/tests/test_zipstream.py +++ b/tests/test_zipstream.py @@ -69,7 +69,7 @@ def test_write_file(self): def test_write_fp(self): z = zipstream.ZipFile(mode='w') for fileobj in self.fileobjs: - z.write(fileobj) + z.write_stream(fileobj) f = tempfile.NamedTemporaryFile(suffix='zip', delete=False) for chunk in z: @@ -81,6 +81,37 @@ def test_write_fp(self): os.remove(f.name) + def test_write_fp_with_stat(self): + z = zipstream.ZipFile(mode='w') + # test mtime + z.write_stream(self.fileobjs[0], arcname="mtime", + mtime=315532900) + + # test with a specific file size + fdata = tempfile.NamedTemporaryFile(suffix='.data') + fdata.write(" "*15) + fdata.seek(0) + z.write_stream(fdata, arcname="size", size=15) + + # test isdir + z.write_stream(None, arcname="isdir", isdir=True) + + f = tempfile.NamedTemporaryFile(suffix='zip', delete=False) + for chunk in z: + f.write(chunk) + f.close() + fdata.close() + + z2 = zipfile.ZipFile(f.name, 'r') + z2.testzip() + self.assertEqual( + [zi.filename for zi in z2.filelist], + ['mtime', 'size', 'isdir/']) + self.assertEqual(z2.filelist[0].date_time[5], 40) + self.assertEqual(z2.filelist[1].file_size, 15) + + os.remove(f.name) + @skipIfNotPosix def test_write_socket(self): z = zipstream.ZipFile(mode='w') @@ -92,7 +123,7 @@ def test_write_socket(self): inf = c.makefile(mode='rb') except TypeError: inf = c.makefile() - z.write(inf) + z.write_stream(inf) s.close() f = tempfile.NamedTemporaryFile(suffix='zip', delete=False) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index 28be245..1361a47 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -16,7 +16,7 @@ import zipfile from .compat import ( - str, bytes, + str, bytes, basestring, ZIP64_VERSION, ZIP_BZIP2, BZIP2_VERSION, ZIP_LZMA, LZMA_VERSION) @@ -32,6 +32,19 @@ stringDataDescriptor = b'PK\x07\x08' # magic number for data descriptor +def _stream_stat(mtime, isdir, size): + st = [0]*10 + st[0] = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # mode + st[7] = st[8] = st[9] = 315532800 # times + if isdir is True: + st[0] |= stat.S_IFDIR + if size is not None: + st[6] = size + if mtime is not None: + st[8] = mtime + return os.stat_result(st) + + def _get_compressor(compress_type): if compress_type == ZIP_DEFLATED: return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, @@ -208,25 +221,27 @@ def write(self, filename, arcname=None, compress_type=None): ((filename, ), {'arcname': arcname, 'compress_type': compress_type}), ) - def __write(self, filename, arcname=None, compress_type=None, size=None): + def write_stream(self, fp, arcname=None, compress_type=None, + mtime=None, isdir=None, size=None): + self.paths_to_write.append( + ((fp, ), {'arcname': arcname, 'compress_type': compress_type, + 'st': _stream_stat(mtime, isdir, size)}), + ) + + + def __write(self, fp, arcname=None, compress_type=None, st=None): """Put the bytes from filename into the archive under the name arcname.""" if not self.fp: raise RuntimeError( "Attempt to write to ZIP archive that was already closed") - fp = None - if hasattr(filename, 'read'): - fp = filename - try: - st = os.fstat(filename.fileno()) - except Exception: - st = os.fstat(0) - if arcname is None: - arcname = '' + if isinstance(fp, basestring): + filename, fp = (fp, None) + st = st or os.stat(filename) else: - fp = open(filename, 'rb') - st = os.stat(filename) + filename = '' + st = st or os.stat(0) isdir = stat.S_ISDIR(st.st_mode) mtime = time.localtime(st.st_mtime) @@ -248,7 +263,7 @@ def __write(self, filename, arcname=None, compress_type=None, size=None): else: zinfo.compress_type = compress_type - zinfo.file_size = size or st.st_size + zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor zinfo.header_offset = self.fp.tell() # Start of header bytes @@ -269,6 +284,7 @@ def __write(self, filename, arcname=None, compress_type=None, size=None): return cmpr = _get_compressor(zinfo.compress_type) + fp = fp or open(filename, 'rb') with fp: # Must overwrite CRC and sizes with correct data later zinfo.CRC = CRC = 0