Skip to content

Commit d1acf60

Browse files
author
Valentin Thorey
authored
Replace shutil.make_archive by custom zip function (#182)
* Replace shutil.make_archive by custom zip function * Add docstring * Move make_zipfile into utils and stop writing folders
1 parent e3192b1 commit d1acf60

File tree

2 files changed

+24
-5
lines changed

2 files changed

+24
-5
lines changed

dataikuapi/dss/savedmodel.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from .ml import DSSTrainedClusteringModelDetails
66
from .ml import DSSTrainedPredictionModelDetails
77

8+
from ..utils import make_zipfile
9+
810
try:
911
basestring
1012
except NameError:
@@ -119,15 +121,15 @@ def get_origin_ml_task(self):
119121
if fmi is not None:
120122
return DSSMLTask.from_full_model_id(self.client, fmi, project_key=self.project_key)
121123

122-
def import_mlflow_version_from_path(self, version_id, path, code_env_name = "INHERIT"):
124+
def import_mlflow_version_from_path(self, version_id, path, code_env_name="INHERIT"):
123125
"""
124126
Create a new version for this saved model from a path containing a MLFlow model.
125127
126128
Requires the saved model to have been created using :meth:`dataikuapi.dss.project.DSSProject.create_mlflow_pyfunc_model`.
127129
128130
:param str version_id: Identifier of the version to create
129131
:param str path: An absolute path on the local filesystem. Must be a folder, and must contain a MLFlow model
130-
:param str code_env_name: Name of the code env to use for this model version. The code env must contain at least
132+
:param str code_env_name: Name of the code env to use for this model version. The code env must contain at least
131133
mlflow and the package(s) corresponding to the used MLFlow-compatible frameworks.
132134
If value is "INHERIT", the default active code env of the project will be used
133135
:return a :class:MLFlowVersionHandler in order to interact with the new MLFlow model version
@@ -136,13 +138,14 @@ def import_mlflow_version_from_path(self, version_id, path, code_env_name = "INH
136138
# TODO: cleanup the archive
137139
import shutil
138140
import os
141+
139142
archive_temp_dir = tempfile.mkdtemp()
140143
try:
141-
archive_filename = shutil.make_archive(os.path.join(archive_temp_dir, "tmpmodel"), "zip", path) #[, root_dir[, base_dir[, verbose[, dry_run[, owner[, group[, logger]]]]]]])
144+
archive_filename = make_zipfile(os.path.join(archive_temp_dir, "tmpmodel.zip"), path)
142145

143146
with open(archive_filename, "rb") as fp:
144147
self.client._perform_empty("POST", "/projects/%s/savedmodels/%s/versions/%s?codeEnvName=%s" % (self.project_key, self.sm_id, version_id, code_env_name),
145-
files={"file":(archive_filename, fp)})
148+
files={"file": (archive_filename, fp)})
146149
return self.get_mlflow_version_handler(version_id)
147150
finally:
148151
shutil.rmtree(archive_temp_dir)

dataikuapi/utils.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import csv, sys
22
from dateutil import parser as date_iso_parser
33
from contextlib import closing
4-
4+
import os
5+
import zipfile
56
import itertools
67

78
if sys.version_info > (3,0):
@@ -101,3 +102,18 @@ def __init__(self, val):
101102

102103
def __call__(self):
103104
return self.val
105+
106+
107+
def make_zipfile(output_filename, source_dir):
108+
"""Replace shutil.make_archive which adds undesired folders to the archive
109+
in python 2.7 in some environments.
110+
"""
111+
relroot = os.path.abspath(os.path.join(source_dir))
112+
with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zipfp:
113+
for root, dirs, files in os.walk(source_dir):
114+
for file in files:
115+
filename = os.path.join(root, file)
116+
if os.path.isfile(filename):
117+
arcname = os.path.join(os.path.relpath(root, relroot), file)
118+
zipfp.write(filename, arcname)
119+
return output_filename

0 commit comments

Comments
 (0)