Skip to content

Commit b78b172

Browse files
Valentin Thoreyinstanceofme
andauthored
upload_folder function for managed folders (#186)
* upload_folder function for managed folders * Add import from managed folder for mlflow * Improve doc mlflow * Improve doc mlflow * Add possibility to load mlflow version from managed_folder object * Improve upload_folder utils function * Rename smartFolderId into folderRef * Remove useless argument check * Update documentation Co-authored-by: Adrien Lavoillotte <adrien.lavoillotte@dataiku.com> * Add inline comment Co-authored-by: Adrien Lavoillotte <adrien.lavoillotte@dataiku.com> Co-authored-by: Adrien Lavoillotte <adrien.lavoillotte@dataiku.com>
1 parent e240760 commit b78b172

File tree

2 files changed

+48
-2
lines changed

2 files changed

+48
-2
lines changed

dataikuapi/dss/managedfolder.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from ..utils import DataikuUTF8CSVReader
33
from ..utils import DataikuStreamedHttpUTF8CSVReader
44
import json
5+
import os
56
from requests import utils
67
from .metrics import ComputedMetrics
78
from .future import DSSFuture
@@ -99,7 +100,7 @@ def delete_file(self, path):
99100
def put_file(self, path, f):
100101
"""
101102
Upload the file to the managed folder
102-
103+
103104
Args:
104105
f: the file contents, as a stream
105106
path: the path of the file
@@ -108,6 +109,21 @@ def put_file(self, path, f):
108109
"POST", "/projects/%s/managedfolders/%s/contents/%s" % (self.project_key, self.odb_id, utils.quote(path)),
109110
"", f)
110111

112+
def upload_folder(self, path, folder):
113+
"""
114+
Upload folder and its content as path in the managed folder.
115+
116+
:param str path: the destination path of the folder in the managed folder
117+
:param str folder: path (absolute or relative) of the source folder to upload
118+
"""
119+
real_root = os.path.realpath(folder)
120+
for root, _, files in os.walk(real_root):
121+
for file in files:
122+
filename = os.path.join(root, file)
123+
relpath = os.path.relpath(filename, real_root)
124+
with open(filename, "rb") as f:
125+
self.put_file(os.path.join(path, relpath), f)
126+
111127
########################################################
112128
# Managed folder actions
113129
########################################################

dataikuapi/dss/savedmodel.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .ml import DSSMLTask
55
from .ml import DSSTrainedClusteringModelDetails
66
from .ml import DSSTrainedPredictionModelDetails
7+
from .managedfolder import DSSManagedFolder
78

89
from ..utils import _make_zipfile
910

@@ -135,7 +136,6 @@ def import_mlflow_version_from_path(self, version_id, path, code_env_name="INHER
135136
:return a :class:MLFlowVersionHandler in order to interact with the new MLFlow model version
136137
"""
137138
# TODO: Add a check that it's indeed a MLFlow model folder
138-
# TODO: cleanup the archive
139139
import shutil
140140
import os
141141

@@ -150,6 +150,36 @@ def import_mlflow_version_from_path(self, version_id, path, code_env_name="INHER
150150
finally:
151151
shutil.rmtree(archive_temp_dir)
152152

153+
def import_mlflow_version_from_managed_folder(self, version_id, managed_folder, path, code_env_name="INHERIT"):
154+
"""
155+
Create a new version for this saved model from a path containing a MLFlow model in a managed folder.
156+
157+
Requires the saved model to have been created using :meth:`dataikuapi.dss.project.DSSProject.create_mlflow_pyfunc_model`.
158+
159+
:param str version_id: Identifier of the version to create
160+
:param str managed_folder: Identifier of the managed folder or `dataikuapi.dss.managedfolder.DSSManagedFolder`
161+
:param str path: Path of the MLflow folder in the managed folder
162+
:param str code_env_name: Name of the code env to use for this model version. The code env must contain at least
163+
mlflow and the package(s) corresponding to the used MLFlow-compatible frameworks.
164+
If value is "INHERIT", the default active code env of the project will be used
165+
:return a :class:MLFlowVersionHandler in order to interact with the new MLFlow model version
166+
"""
167+
# TODO: Add a check that it's indeed a MLFlow model folder
168+
folder_ref = None
169+
if type(managed_folder) is DSSManagedFolder:
170+
folder_ref = "{}.{}".format(managed_folder.project_key, managed_folder.id)
171+
else:
172+
folder_ref = managed_folder
173+
174+
self.client._perform_empty(
175+
"POST", "/projects/{project_id}/savedmodels/{saved_model_id}/versions/{version_id}?codeEnvName={codeEnvName}".format(
176+
project_id=self.project_key, saved_model_id=self.sm_id, version_id=version_id, codeEnvName=code_env_name
177+
),
178+
params={"folderRef": folder_ref, "path": path},
179+
files={"file": (None, None)} # required for backend-mandated multipart request
180+
)
181+
return self.get_mlflow_version_handler(version_id)
182+
153183
def get_mlflow_version_handler(self, version_id):
154184
"""
155185
Returns a :class:MLFlowVersionHandler to interact with a MLFlow model version

0 commit comments

Comments
 (0)