From dc7502182a24e85faf7cbc27d3da5b272e458084 Mon Sep 17 00:00:00 2001 From: Arjan Schrijver Date: Thu, 23 Mar 2017 10:32:34 +0100 Subject: [PATCH 01/12] Allow passing buffer_size to write_iter and writestr --- zipstream/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index a176935..b824d76 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -215,20 +215,20 @@ def write(self, filename, arcname=None, compress_type=None): kwargs = {'filename': filename, 'arcname': arcname, 'compress_type': compress_type} self.paths_to_write.append(kwargs) - def write_iter(self, arcname, iterable, compress_type=None): + def write_iter(self, arcname, iterable, compress_type=None, buffer_size=None): """Write the bytes iterable `iterable` to the archive under the name `arcname`.""" - kwargs = {'arcname': arcname, 'iterable': iterable, 'compress_type': compress_type} + kwargs = {'arcname': arcname, 'iterable': iterable, 'compress_type': compress_type, 'buffer_size': buffer_size} self.paths_to_write.append(kwargs) - def writestr(self, arcname, data, compress_type=None): + def writestr(self, arcname, data, compress_type=None, buffer_size=None): """ Writes a str into ZipFile by wrapping data as a generator """ def _iterable(): yield data - return self.write_iter(arcname, _iterable(), compress_type=compress_type) + return self.write_iter(arcname, _iterable(), compress_type=compress_type, buffer_size=buffer_size) - def __write(self, filename=None, iterable=None, arcname=None, compress_type=None): + def __write(self, filename=None, iterable=None, arcname=None, compress_type=None, buffer_size=None): """Put the bytes from filename into the archive under the name `arcname`.""" if not self.fp: @@ -265,7 +265,7 @@ def __write(self, filename=None, iterable=None, arcname=None, compress_type=None if st: zinfo.file_size = st[6] else: - zinfo.file_size = 0 + zinfo.file_size = buffer_size or 0 zinfo.flag_bits = 0x00 zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor zinfo.header_offset = self.fp.tell() # Start of header bytes From 7e861b1f2a60420bb626158d4528d2aeec956cc6 Mon Sep 17 00:00:00 2001 From: Arjan Schrijver Date: Mon, 18 Mar 2019 10:44:01 +0100 Subject: [PATCH 02/12] New release of forked package --- README.markdown => README.md | 7 ++----- setup.py | 23 +++++++++++++++++------ 2 files changed, 19 insertions(+), 11 deletions(-) rename README.markdown => README.md (92%) diff --git a/README.markdown b/README.md similarity index 92% rename from README.markdown rename to README.md index 1fe72e8..994ec53 100644 --- a/README.markdown +++ b/README.md @@ -1,9 +1,6 @@ # python-zipstream -[![Build Status](https://travis-ci.org/allanlei/python-zipstream.png?branch=master)](https://travis-ci.org/allanlei/python-zipstream) -[![Coverage Status](https://coveralls.io/repos/allanlei/python-zipstream/badge.png)](https://coveralls.io/r/allanlei/python-zipstream) - zipstream.py is a zip archive generator based on python 3.3's zipfile.py. It was created to generate a zip file generator for streaming (ie web apps). This is beneficial for when you want to provide a downloadable archive of a large collection of regular files, which would be infeasible to @@ -78,12 +75,12 @@ archives. ## Installation ``` -pip install zipstream +pip install zipstream-new ``` ## Requirements - * Python 2.6, 2.7, 3.2, 3.3, pypy + * Python 2.6+, 3.2+, pypy ## Examples diff --git a/setup.py b/setup.py index 4472cee..e7084df 100644 --- a/setup.py +++ b/setup.py @@ -2,15 +2,26 @@ from setuptools import setup, find_packages +with open("README.md", "r") as fh: + long_description = fh.read() + setup( - name='zipstream', - version='1.1.4', - description='Zipfile generator', - author='Allan Lei', - author_email='allanlei@helveticode.com', - url='https://github.com/allanlei/python-zipstream', + name='zipstream-new', + version='1.1.5', + description='Zipfile generator that takes input files as well as streams', + long_description=long_description, + long_description_content_type="text/markdown", + author='arjan5', + author_email='arjan@anymore.nl', + url='https://github.com/arjan-s/python-zipstream', packages=find_packages(exclude=['tests']), keywords='zip streaming', test_suite='nose.collector', tests_require=['nose'], + classifiers=[ + "Programming Language :: Python", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", + "Topic :: System :: Archiving :: Compression", + ], ) From 4ea51e39b8f5f08c791820ba57ddf45764e76d56 Mon Sep 17 00:00:00 2001 From: Franklyn Tackitt Date: Wed, 15 May 2019 11:20:02 -0700 Subject: [PATCH 03/12] Add partial flushing of ZipStreams I use this to flush partial zips as files are streamed into them from requests, and then at the end add manifest and error files to the end of the archive I've also added a related test and example of use --- README.md | 21 +++++++++++++++++++++ tests/test_zipstream.py | 25 +++++++++++++++++++++++++ tox.ini | 2 +- zipstream/__init__.py | 11 ++++++++--- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 994ec53..5c92a91 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,27 @@ def zipball(): response = Response(z, mimetype='application/zip') response.headers['Content-Disposition'] = 'attachment; filename={}'.format('files.zip') return response + +# Partial flushing of the zip before closing + +@app.route('/package.zip', methods=['GET'], endpoint='zipball') +def zipball(): + def generate_zip_with_manifest(): + z = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED) + + manifest = [] + for filename in os.listdir('/path/to/files'): + z.write(os.path.join('/path/to/files', filename), arcname=filename) + yield from z.flush() + manifest.append(filename) + + z.write_str('manifest.json', json.dumps(manifest).encode()) + + yield from z + + response = Response(z, mimetype='application/zip') + response.headers['Content-Disposition'] = 'attachment; filename={}'.format('files.zip') + return response ``` ### django 1.5+ diff --git a/tests/test_zipstream.py b/tests/test_zipstream.py index 9910fe2..6bda736 100644 --- a/tests/test_zipstream.py +++ b/tests/test_zipstream.py @@ -92,6 +92,31 @@ def test_writestr(self): os.remove(f.name) + def test_partial_writes(self): + z = zipstream.ZipFile(mode='w') + f = tempfile.NamedTemporaryFile(suffix='zip', delete=False) + + with open(SAMPLE_FILE_RTF, 'rb') as fp: + z.writestr('sample1.rtf', fp.read()) + + for chunk in z.flush(): + f.write(chunk) + + with open(SAMPLE_FILE_RTF, 'rb') as fp: + z.writestr('sample2.rtf', fp.read()) + + for chunk in z.flush(): + f.write(chunk) + + for chunk in z: + f.write(chunk) + + f.close() + z2 = zipfile.ZipFile(f.name, 'r') + self.assertFalse(z2.testzip()) + + os.remove(f.name) + def test_write_iterable_no_archive(self): z = zipstream.ZipFile(mode='w') self.assertRaises(TypeError, z.write_iter, iterable=range(10)) diff --git a/tox.ini b/tox.ini index 8302cc5..d93b9c8 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py26, py27, py32, py33, py34, py35, pypy +envlist = py26, py27, py32, py33, py34, py35, py36, py37, pypy [testenv] deps=nose diff --git a/zipstream/__init__.py b/zipstream/__init__.py index b824d76..5d7accd 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -178,9 +178,8 @@ def __init__(self, fileobj=None, mode='w', compression=ZIP_STORED, allowZip64=Fa self.paths_to_write = [] def __iter__(self): - for kwargs in self.paths_to_write: - for data in self.__write(**kwargs): - yield data + for data in self.flush(): + yield data for data in self.__close(): yield data @@ -190,6 +189,12 @@ def __enter__(self): def __exit__(self, type, value, traceback): self.close() + def flush(self): + while self.paths_to_write: + kwargs = self.paths_to_write.pop() + for data in self.__write(**kwargs): + yield data + @property def comment(self): """The comment text associated with the ZIP file.""" From 981c2b84978bce8f01418216edcf3c9b2778a580 Mon Sep 17 00:00:00 2001 From: Arjan Schrijver Date: Thu, 6 Jun 2019 11:40:49 +0200 Subject: [PATCH 04/12] Release 1.1.6 --- setup.py | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index e7084df..ff7afc9 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='zipstream-new', - version='1.1.5', + version='1.1.6', description='Zipfile generator that takes input files as well as streams', long_description=long_description, long_description_content_type="text/markdown", diff --git a/tox.ini b/tox.ini index d93b9c8..99d9168 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py26, py27, py32, py33, py34, py35, py36, py37, pypy +envlist = py26, py27, py32, py33, py34, py35, py36, py37, pypy, pypy3 [testenv] deps=nose From de64a6eee7f3b5b6c0a364225e78666a4c97c3ef Mon Sep 17 00:00:00 2001 From: Ryan Lovett Date: Wed, 19 Jun 2019 13:18:12 -0700 Subject: [PATCH 05/12] Find compression type. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5c92a91..1c204f7 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ from flask import Response @app.route('/package.zip', methods=['GET'], endpoint='zipball') def zipball(): def generator(): - z = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED) + z = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED) z.write('/path/to/file') @@ -107,7 +107,7 @@ def zipball(): @app.route('/package.zip', methods=['GET'], endpoint='zipball') def zipball(): - z = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED) + z = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED) z.write('/path/to/file') response = Response(z, mimetype='application/zip') @@ -119,7 +119,7 @@ def zipball(): @app.route('/package.zip', methods=['GET'], endpoint='zipball') def zipball(): def generate_zip_with_manifest(): - z = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED) + z = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED) manifest = [] for filename in os.listdir('/path/to/files'): @@ -142,7 +142,7 @@ def zipball(): from django.http import StreamingHttpResponse def zipball(request): - z = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED) + z = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED) z.write('/path/to/file') response = StreamingHttpResponse(z, content_type='application/zip') From e46365184a41cebc32e90b5ceeac70873fc264ad Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Fri, 18 Oct 2019 10:58:01 -0700 Subject: [PATCH 06/12] Stream data in order it was received When flushing, stream out the iterators in First in first out order. Python `pop()` with no arguments would take the last path but I think it makes sense to stream the first things first. We ran into this issue where we add a bunch of files which depend on long-running futures to provide the data. The futures hit a server which processes them roughly in order, so we get better streaming performance if we make this change. --- zipstream/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index 5d7accd..a086e3a 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -191,7 +191,7 @@ def __exit__(self, type, value, traceback): def flush(self): while self.paths_to_write: - kwargs = self.paths_to_write.pop() + kwargs = self.paths_to_write.pop(0) for data in self.__write(**kwargs): yield data From 574b22f051b3b6107776c03c58a5b03f979bc97b Mon Sep 17 00:00:00 2001 From: Arjan Schrijver Date: Tue, 22 Oct 2019 12:49:07 +0200 Subject: [PATCH 07/12] Release 1.1.7 --- README.md | 2 ++ setup.py | 2 +- tox.ini | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1c204f7..beaeaaf 100644 --- a/README.md +++ b/README.md @@ -167,3 +167,5 @@ def GET(self): With python version > 2.6, just run the following command: `python -m unittest discover` Alternatively, you can use `nose`. + +If you want to run the tests on all supported Python versions, run `tox`. diff --git a/setup.py b/setup.py index ff7afc9..bdfb2de 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='zipstream-new', - version='1.1.6', + version='1.1.7', description='Zipfile generator that takes input files as well as streams', long_description=long_description, long_description_content_type="text/markdown", diff --git a/tox.ini b/tox.ini index 99d9168..971786d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py26, py27, py32, py33, py34, py35, py36, py37, pypy, pypy3 +envlist = py26, py27, py32, py33, py34, py35, py36, py37, py38, pypy, pypy3 [testenv] deps=nose From 58bfa8e9a2463b0a443dd60a6707e2675be75961 Mon Sep 17 00:00:00 2001 From: arjan-s Date: Thu, 14 Nov 2019 10:51:31 +0100 Subject: [PATCH 08/12] Create CHANGELOG.md --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3029c67 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,12 @@ +# ChangeLog + +This file details the changes that were made after forking v1.1.4 from https://github.com/allanlei/python-zipstream. + +## v1.1.5 (2019-03-18) +* Support Zip64 when compressing iterables and strings (https://github.com/allanlei/python-zipstream/pull/25) + +## v1.1.6 (2019-06-06) +* Add partial flushing of ZipStreams (https://github.com/arjan-s/python-zipstream/pull/1) + +## v1.1.7 (2019-10-22) +* Stream data in the order it was received (https://github.com/arjan-s/python-zipstream/pull/4) From 1cf1fa9dd245f71d0901d7c1774ec609a4c5712b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oszk=C3=A1r=20Kmetti?= Date: Thu, 10 Sep 2020 09:43:58 +0200 Subject: [PATCH 09/12] New datetime parameter into write_iter. --- tests/test_zipstream.py | 23 +++++++++++++++++++++++ zipstream/__init__.py | 12 ++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/tests/test_zipstream.py b/tests/test_zipstream.py index 6bda736..41bb368 100644 --- a/tests/test_zipstream.py +++ b/tests/test_zipstream.py @@ -3,6 +3,7 @@ import os import tempfile +import time import unittest import zipstream import zipfile @@ -76,6 +77,28 @@ def string_generator(): os.remove(f.name) + def test_write_iterable_with_date_time(self): + file_name_in_zip = "data_datetime" + file_date_time_in_zip = time.strptime("2011-04-19 22:30:21", "%Y-%m-%d %H:%M:%S") + + z = zipstream.ZipFile(mode='w') + def string_generator(): + for _ in range(10): + yield b'zipstream\x01\n' + z.write_iter(iterable=string_generator(), arcname=file_name_in_zip, date_time=file_date_time_in_zip) + + f = tempfile.NamedTemporaryFile(suffix='zip', delete=False) + for chunk in z: + f.write(chunk) + f.close() + + z2 = zipfile.ZipFile(f.name, 'r') + self.assertFalse(z2.testzip()) + + self.assertEqual(file_date_time_in_zip[0:5], z2.getinfo(file_name_in_zip).date_time[0:5]) + + os.remove(f.name) + def test_writestr(self): z = zipstream.ZipFile(mode='w') diff --git a/zipstream/__init__.py b/zipstream/__init__.py index a086e3a..bc03bb3 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -220,9 +220,9 @@ def write(self, filename, arcname=None, compress_type=None): kwargs = {'filename': filename, 'arcname': arcname, 'compress_type': compress_type} self.paths_to_write.append(kwargs) - def write_iter(self, arcname, iterable, compress_type=None, buffer_size=None): + def write_iter(self, arcname, iterable, compress_type=None, buffer_size=None, date_time=None): """Write the bytes iterable `iterable` to the archive under the name `arcname`.""" - kwargs = {'arcname': arcname, 'iterable': iterable, 'compress_type': compress_type, 'buffer_size': buffer_size} + kwargs = {'arcname': arcname, 'iterable': iterable, 'compress_type': compress_type, 'buffer_size': buffer_size, 'date_time': date_time} self.paths_to_write.append(kwargs) def writestr(self, arcname, data, compress_type=None, buffer_size=None): @@ -233,7 +233,7 @@ def _iterable(): yield data return self.write_iter(arcname, _iterable(), compress_type=compress_type, buffer_size=buffer_size) - def __write(self, filename=None, iterable=None, arcname=None, compress_type=None, buffer_size=None): + def __write(self, filename=None, iterable=None, arcname=None, compress_type=None, buffer_size=None, date_time=None): """Put the bytes from filename into the archive under the name `arcname`.""" if not self.fp: @@ -248,7 +248,11 @@ def __write(self, filename=None, iterable=None, arcname=None, compress_type=None mtime = time.localtime(st.st_mtime) date_time = mtime[0:6] else: - st, isdir, date_time = None, False, time.localtime()[0:6] + st, isdir = None, False + if date_time is not None and isinstance(date_time, time.struct_time): + date_time = date_time[0:6] + if date_time is None: + date_time = time.localtime()[0:6] # Create ZipInfo instance to store file information if arcname is None: arcname = filename From a2b5b8f7902c4e42ce0e832dbd9aa388dee6ffc4 Mon Sep 17 00:00:00 2001 From: Arjan Schrijver Date: Mon, 14 Sep 2020 10:48:53 +0200 Subject: [PATCH 10/12] Add date_time parameter to writestr() too --- CHANGELOG.md | 11 +++++++---- zipstream/__init__.py | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3029c67..efb91d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,14 @@ This file details the changes that were made after forking v1.1.4 from https://github.com/allanlei/python-zipstream. -## v1.1.5 (2019-03-18) -* Support Zip64 when compressing iterables and strings (https://github.com/allanlei/python-zipstream/pull/25) +## Unreleased +* New datetime parameter in write_iter (https://github.com/arjan-s/python-zipstream/pull/8) + +## v1.1.7 (2019-10-22) +* Stream data in the order it was received (https://github.com/arjan-s/python-zipstream/pull/4) ## v1.1.6 (2019-06-06) * Add partial flushing of ZipStreams (https://github.com/arjan-s/python-zipstream/pull/1) -## v1.1.7 (2019-10-22) -* Stream data in the order it was received (https://github.com/arjan-s/python-zipstream/pull/4) +## v1.1.5 (2019-03-18) +* Support Zip64 when compressing iterables and strings (https://github.com/allanlei/python-zipstream/pull/25) diff --git a/zipstream/__init__.py b/zipstream/__init__.py index bc03bb3..2bdf9c1 100644 --- a/zipstream/__init__.py +++ b/zipstream/__init__.py @@ -225,13 +225,13 @@ def write_iter(self, arcname, iterable, compress_type=None, buffer_size=None, da kwargs = {'arcname': arcname, 'iterable': iterable, 'compress_type': compress_type, 'buffer_size': buffer_size, 'date_time': date_time} self.paths_to_write.append(kwargs) - def writestr(self, arcname, data, compress_type=None, buffer_size=None): + def writestr(self, arcname, data, compress_type=None, buffer_size=None, date_time=None): """ Writes a str into ZipFile by wrapping data as a generator """ def _iterable(): yield data - return self.write_iter(arcname, _iterable(), compress_type=compress_type, buffer_size=buffer_size) + return self.write_iter(arcname, _iterable(), compress_type=compress_type, buffer_size=buffer_size, date_time=date_time) def __write(self, filename=None, iterable=None, arcname=None, compress_type=None, buffer_size=None, date_time=None): """Put the bytes from filename into the archive under the name From 961886fbb77530515bf6b3f595dd5fd29c1c1c97 Mon Sep 17 00:00:00 2001 From: Arjan Schrijver Date: Mon, 14 Sep 2020 11:22:20 +0200 Subject: [PATCH 11/12] Release 1.1.8 --- .bumpversion.cfg | 11 +++++++++++ CHANGELOG.md | 2 +- setup.py | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 .bumpversion.cfg diff --git a/.bumpversion.cfg b/.bumpversion.cfg new file mode 100644 index 0000000..39ce0fa --- /dev/null +++ b/.bumpversion.cfg @@ -0,0 +1,11 @@ +[bumpversion] +current_version = 1.1.8 +commit = true +message = Release {new_version} +tag = true + +[bumpversion:file:CHANGELOG.md] +search = Unreleased +replace = v{new_version} ({now:%Y-%m-%d}) + +[bumpversion:file:setup.py] diff --git a/CHANGELOG.md b/CHANGELOG.md index efb91d9..8ef4ef7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ This file details the changes that were made after forking v1.1.4 from https://github.com/allanlei/python-zipstream. -## Unreleased +## v1.1.8 (2020-09-14) * New datetime parameter in write_iter (https://github.com/arjan-s/python-zipstream/pull/8) ## v1.1.7 (2019-10-22) diff --git a/setup.py b/setup.py index bdfb2de..31e2ebc 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='zipstream-new', - version='1.1.7', + version='1.1.8', description='Zipfile generator that takes input files as well as streams', long_description=long_description, long_description_content_type="text/markdown", From add237d8e1c251d89b2927ca1f78efce6bceb897 Mon Sep 17 00:00:00 2001 From: Janis Beckedorf Date: Sat, 12 Dec 2020 12:15:52 +0100 Subject: [PATCH 12/12] fix readme example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index beaeaaf..89fa418 100644 --- a/README.md +++ b/README.md @@ -131,7 +131,7 @@ def zipball(): yield from z - response = Response(z, mimetype='application/zip') + response = Response(generate_zip_with_manifest(), mimetype='application/zip') response.headers['Content-Disposition'] = 'attachment; filename={}'.format('files.zip') return response ```