Skip to content

Commit 60b146b

Browse files
committed
Merge remote-tracking branch 'origin/release/8.0' into feature/dss80-flow-zones
2 parents 02c59d5 + 74840b8 commit 60b146b

19 files changed

+875
-683
lines changed

dataikuapi/dss/admin.py

Lines changed: 196 additions & 53 deletions
Large diffs are not rendered by default.

dataikuapi/dss/dataset.py

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from . import recipe
1212

1313
class DSSDatasetListItem(DSSTaggableObjectListItem):
14-
"""An item in a list of datasets. Do not instantiate this class"""
14+
"""An item in a list of datasets. Do not instantiate this class, use :meth:`dataikuapi.dss.project.DSSProject.list_datasets`"""
1515
def __init__(self, client, data):
1616
super(DSSDatasetListItem, self).__init__(data)
1717
self.client = client
@@ -51,7 +51,7 @@ def get_column(self, column):
5151

5252
class DSSDataset(object):
5353
"""
54-
A dataset on the DSS instance
54+
A dataset on the DSS instance. Do not instantiate this class, use :meth:`dataikuapi.dss.project.DSSProject.get_dataset`
5555
"""
5656
def __init__(self, client, project_key, dataset_name):
5757
self.client = client
@@ -109,9 +109,9 @@ def get_settings(self):
109109
"""
110110
data = self.client._perform_json("GET", "/projects/%s/datasets/%s" % (self.project_key, self.dataset_name))
111111

112-
if data["type"] in self.__class__.FS_TYPES:
112+
if data["type"] in self.__class__._FS_TYPES:
113113
return FSLikeDatasetSettings(self, data)
114-
elif data["type"] in self.__class__.SQL_TYPES:
114+
elif data["type"] in self.__class__._SQL_TYPES:
115115
return SQLDatasetSettings(self, data)
116116
else:
117117
return DSSDatasetSettings(self, data)
@@ -558,33 +558,41 @@ def get_object_discussions(self):
558558
# Test / Autofill
559559
########################################################
560560

561-
FS_TYPES = ["Filesystem", "UploadedFiles", "FilesInFolder",
561+
_FS_TYPES = ["Filesystem", "UploadedFiles", "FilesInFolder",
562562
"HDFS", "S3", "Azure", "GCS", "FTP", "SCP", "SFTP"]
563563
# HTTP is FSLike but not FS
564564

565-
SQL_TYPES = ["JDBC", "PostgreSQL", "MySQL", "Vertica", "Snowflake", "Redshift",
565+
_SQL_TYPES = ["JDBC", "PostgreSQL", "MySQL", "Vertica", "Snowflake", "Redshift",
566566
"Greenplum", "Teradata", "Oracle", "SQLServer", "SAPHANA", "Netezza",
567567
"BigQuery", "Athena", "hiveserver2"]
568568

569569
def test_and_detect(self, infer_storage_types=False):
570+
"""Used internally by autodetect_settings. It is not usually required to call this method"""
570571
settings = self.get_settings()
571572

572-
if settings.type in self.__class__.FS_TYPES:
573+
if settings.type in self.__class__._FS_TYPES:
573574
future_resp = self.client._perform_json("POST",
574575
"/projects/%s/datasets/%s/actions/testAndDetectSettings/fsLike"% (self.project_key, self.dataset_name),
575576
body = {"detectPossibleFormats" : True, "inferStorageTypes" : infer_storage_types })
576577

577578
return DSSFuture(self.client, future_resp.get('jobId', None), future_resp)
578-
elif settings.type in self.__class__.SQL_TYPES:
579+
elif settings.type in self.__class__._SQL_TYPES:
579580
return self.client._perform_json("POST",
580581
"/projects/%s/datasets/%s/actions/testAndDetectSettings/externalSQL"% (self.project_key, self.dataset_name))
581582
else:
582583
raise ValueError("don't know how to test/detect on dataset type:%s" % settings.type)
583584

584585
def autodetect_settings(self, infer_storage_types=False):
586+
"""
587+
Detects appropriate settings for this dataset using Dataiku detection engine
588+
589+
Returns new suggested settings that you can :meth:`DSSDatasetSettings.save`
590+
591+
:rtype: :class:`DSSDatasetSettings` or a subclass
592+
"""
585593
settings = self.get_settings()
586594

587-
if settings.type in self.__class__.FS_TYPES:
595+
if settings.type in self.__class__._FS_TYPES:
588596
future = self.test_and_detect(infer_storage_types)
589597
result = future.wait_for_result()
590598

@@ -597,7 +605,7 @@ def autodetect_settings(self, infer_storage_types=False):
597605

598606
return settings
599607

600-
elif settings.type in self.__class__.SQL_TYPES:
608+
elif settings.type in self.__class__._SQL_TYPES:
601609
result = self.test_and_detect()
602610

603611
if not "schemaDetection" in result:
@@ -610,6 +618,7 @@ def autodetect_settings(self, infer_storage_types=False):
610618
raise ValueError("don't know how to test/detect on dataset type:%s" % settings.type)
611619

612620
def get_as_core_dataset(self):
621+
"""Returns the :class:`dataiku.Dataset` object corresponding to this dataset"""
613622
import dataiku
614623
return dataiku.Dataset("%s.%s" % (self.project_key, self.dataset_name))
615624

@@ -645,6 +654,13 @@ def new_recipe(self, type, recipe_name=None):
645654
return builder
646655

647656
class DSSDatasetSettings(DSSTaggableObjectSettings):
657+
"""
658+
Base settings class for a DSS dataset.
659+
Do not instantiate this class directly, use :meth:`DSSDataset.get_settings`
660+
661+
Use :meth:`save` to save your changes
662+
"""
663+
648664
def __init__(self, dataset, settings):
649665
super(DSSDatasetSettings, self).__init__(settings)
650666
self.dataset = dataset
@@ -662,6 +678,10 @@ def get_raw_params(self):
662678
def type(self):
663679
return self.settings["type"]
664680

681+
@property
682+
def schema_columns(self):
683+
return self.settings["schema"]["columns"]
684+
665685
def remove_partitioning(self):
666686
self.settings["partitioning"] = {"dimensions" : []}
667687

@@ -680,6 +700,13 @@ def save(self):
680700
body=self.settings)
681701

682702
class FSLikeDatasetSettings(DSSDatasetSettings):
703+
"""
704+
Settings for a files-based dataset. This class inherits from :class:`DSSDatasetSettings`.
705+
Do not instantiate this class directly, use :meth:`DSSDataset.get_settings`
706+
707+
Use :meth:`save` to save your changes
708+
"""
709+
683710
def __init__(self, dataset, settings):
684711
super(FSLikeDatasetSettings, self).__init__(dataset, settings)
685712

@@ -711,6 +738,12 @@ def set_partitioning_file_pattern(self, pattern):
711738
self.settings["partitioning"]["filePathPattern"] = pattern
712739

713740
class SQLDatasetSettings(DSSDatasetSettings):
741+
"""
742+
Settings for a SQL dataset. This class inherits from :class:`DSSDatasetSettings`.
743+
Do not instantiate this class directly, use :meth:`DSSDataset.get_settings`
744+
745+
Use :meth:`save` to save your changes
746+
"""
714747
def __init__(self, dataset, settings):
715748
super(SQLDatasetSettings, self).__init__(dataset, settings)
716749

dataikuapi/dss/managedfolder.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from ..utils import DataikuUTF8CSVReader
33
from ..utils import DataikuStreamedHttpUTF8CSVReader
44
import json
5+
from requests import utils
56
from .metrics import ComputedMetrics
67
from .future import DSSFuture
78
from .discussion import DSSObjectDiscussions
@@ -82,27 +83,26 @@ def get_file(self, path):
8283
the file's content, as a stream
8384
"""
8485
return self.client._perform_raw(
85-
"GET", "/projects/%s/managedfolders/%s/contents/%s" % (self.project_key, self.odb_id, path))
86+
"GET", "/projects/%s/managedfolders/%s/contents/%s" % (self.project_key, self.odb_id, utils.quote(path)))
8687

8788
def delete_file(self, path):
8889
"""
8990
Delete a file from the managed folder
9091
"""
9192
return self.client._perform_empty(
92-
"DELETE", "/projects/%s/managedfolders/%s/contents/%s" % (self.project_key, self.odb_id, path))
93+
"DELETE", "/projects/%s/managedfolders/%s/contents/%s" % (self.project_key, self.odb_id, utils.quote(path)))
9394

94-
def put_file(self, name, f):
95+
def put_file(self, path, f):
9596
"""
9697
Upload the file to the managed folder
9798
9899
Args:
99100
f: the file contents, as a stream
100-
name: the name of the file
101+
path: the path of the file
101102
"""
102-
103103
return self.client._perform_json_upload(
104-
"POST", "/projects/%s/managedfolders/%s/contents/" % (self.project_key, self.odb_id),
105-
name, f)
104+
"POST", "/projects/%s/managedfolders/%s/contents/%s" % (self.project_key, self.odb_id, utils.quote(path)),
105+
"", f)
106106

107107
########################################################
108108
# Managed folder actions

dataikuapi/dss/ml.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import re
2+
13
from ..utils import DataikuException
24
from ..utils import DataikuUTF8CSVReader
35
from ..utils import DataikuStreamedHttpUTF8CSVReader
@@ -524,6 +526,22 @@ def save_user_meta(self):
524526
"PUT", "/projects/%s/savedmodels/%s/versions/%s/user-meta" % (self.saved_model.project_key,
525527
self.saved_model.sm_id, self.saved_model_version), body = um)
526528

529+
def get_origin_analysis_trained_model(self):
530+
"""
531+
Fetch details about the model in an analysis, this model has been exported from. Returns None if the
532+
deployed trained model does not have an origin analysis trained model.
533+
534+
:rtype: DSSTrainedModelDetails | None
535+
"""
536+
if self.saved_model is None:
537+
return self
538+
else:
539+
fmi = self.get_raw().get("smOrigin", {}).get("fullModelId")
540+
if fmi is not None:
541+
origin_ml_task = DSSMLTask.from_full_model_id(self.saved_model.client, fmi,
542+
project_key=self.saved_model.project_key)
543+
return origin_ml_task.get_trained_model_details(fmi)
544+
527545
class DSSTreeNode(object):
528546
def __init__(self, tree, i):
529547
self.tree = tree
@@ -1425,6 +1443,17 @@ def get_scatter_plots(self):
14251443

14261444

14271445
class DSSMLTask(object):
1446+
1447+
@staticmethod
1448+
def from_full_model_id(client, fmi, project_key=None):
1449+
match = re.match("^A-(\w+)-(\w+)-(\w+)-(s[0-9]+)-(pp[0-9]+(-part-(\w+)|-base)?)-(m[0-9]+)$", fmi)
1450+
if match is None:
1451+
return DataikuException("Invalid model id: {}".format(fmi))
1452+
else:
1453+
if project_key is None:
1454+
project_key = match.group(1)
1455+
return DSSMLTask(client, project_key, match.group(2), match.group(3))
1456+
14281457
"""A handle to interact with a MLTask for prediction or clustering in a DSS visual analysis"""
14291458
def __init__(self, client, project_key, analysis_id, mltask_id):
14301459
self.client = client
@@ -1686,15 +1715,19 @@ def redeploy_to_flow(self, model_id, recipe_name=None, saved_model_id=None, acti
16861715
"POST", "/projects/%s/models/lab/%s/%s/models/%s/actions/redeployToFlow" % (self.project_key, self.analysis_id, self.mltask_id, model_id),
16871716
body = obj)
16881717

1689-
def guess(self, prediction_type=None):
1718+
def guess(self, prediction_type=None, reguess_level=None):
16901719
"""
16911720
Guess the feature handling and the algorithms.
16921721
:param string prediction_type: In case of a prediction problem the prediction type can be specify. Valid values are BINARY_CLASSIFICATION, REGRESSION, MULTICLASS.
1722+
:param bool reguess_level: One of the following values: TARGET_CHANGE, TARGET_REGUESS and FULL_REGUESS. Only valid for prediction ML Tasks, cannot be specified if prediction_type is also set.
16931723
"""
16941724
obj = {}
16951725
if prediction_type is not None:
16961726
obj["predictionType"] = prediction_type
16971727

1728+
if reguess_level is not None:
1729+
obj["reguessLevel"] = reguess_level
1730+
16981731
self.client._perform_empty(
16991732
"PUT",
17001733
"/projects/%s/models/lab/%s/%s/guess" % (self.project_key, self.analysis_id, self.mltask_id),

0 commit comments

Comments
 (0)