dataiku · kevanescence · Oct 12, 2023 · Oct 12, 2023 · Oct 12, 2023
diff --git a/HISTORY.txt b/HISTORY.txt
@@ -1,6 +1,12 @@
 Changelog
 ==========
 
+
+12.2.3 (2023-10-12)
+---------------------
+
+* Initial release for DSS 12.2.3
+
 12.1.0 (2023-06-29)
 ---------------------
 

diff --git a/dataikuapi/apinode_admin_client.py b/dataikuapi/apinode_admin_client.py
@@ -68,7 +68,7 @@ def import_code_env_in_cache(self, file_dir, language):
         :param file_dir: path of an exported code env base folder
         :param language: language of the code env (`python` or `R`)
         """
-        self._perform_empty("POST", "cached-code-envs", params={
+        return self._perform_json("POST", "cached-code-envs", params={
             "fileDir": file_dir,
             "language": language
         })
diff --git a/dataikuapi/dss/admin.py b/dataikuapi/dss/admin.py
@@ -180,8 +180,9 @@ def get_settings(self):
 
             # make details of a connection accessible to some groups
             connection = client.get_connection("my_connection_name")
-            settings = connection.settings()
-            settings.set_readability(False, "group1", "group2")
+            settings = connection.get_settings()
+            readability = settings.details_readability
+            readability.set_readability(False, "group1", "group2")
             settings.save()
 
         :return: the settings of the connection
@@ -396,7 +397,7 @@ def set_usability(self, all, *groups):
         Set who can use the connection.
 
         :param boolean all: if True, anybody can use the connection
-        :param *string groups: a list of groups that can use the connection
+        :param \*string groups: a list of groups that can use the connection
         """
         if all:
             self.settings["usableBy"] = 'ALL' 
@@ -453,7 +454,7 @@ def set_readability(self, all, *groups):
         To make the details readable by nobody, pass all=False and no group.
 
         :param boolean all: if True, anybody can use the connection
-        :param *string groups: a list of groups that can use the connection
+        :param \*string groups: a list of groups that can use the connection
         """
         if all:
             self._data["readableBy"] = 'ALL' 

diff --git a/dataikuapi/dss/apideployer.py b/dataikuapi/dss/apideployer.py
@@ -367,7 +367,7 @@ def delete(self, disable_first=False, ignore_pre_delete_errors=False):
             If False, will raise an Exception if this deployment is enabled.
 
         :param boolean ignore_pre_delete_errors: If True, any error occurred during the actions performed previously to
-        delete the deployment will be ignored and the delete action will be performed anyway.
+            delete the deployment will be ignored and the delete action will be performed anyway.
 
         """
 

diff --git a/dataikuapi/dss/llm.py b/dataikuapi/dss/llm.py
@@ -0,0 +1,64 @@
+class DSSLLM(object):
+    """
+    A handle to interact with a DSS-managed LLM
+    """
+    def __init__(self, client, project_key, llm_id):
+        self.client = client
+        self.project_key = project_key
+        self.llm_id = llm_id
+
+    def new_completion(self):
+        return DSSLLMCompletionQuery(self)
+
+    def new_embeddings(self):
+        return DSSLLMEmbeddingsQuery(self)
+
+
+class DSSLLMEmbeddingsQuery(object):
+    def __init__(self, llm):
+        self.llm = llm
+        self.eq = {"queries": [], "llmId": llm.llm_id}
+
+    def add_text(self, text):
+        self.eq["queries"].append({"text": text})
+
+    def execute(self):
+        ret = self.llm.client._perform_json("POST", "/projects/%s/llms/embeddings" % (self.llm.project_key), body=self.eq)
+        return DSSLLMEmbeddingsResponse(ret)
+
+class DSSLLMEmbeddingsResponse(object):
+    def __init__(self, raw_resp):
+        self._raw = raw_resp
+
+    def get_embeddings(self):
+        """
+        returns all embeddings as a list of list of floats
+        """
+        return [r["embedding"] for r in self._raw["responses"]]
+
+class DSSLLMCompletionQuery(object):
+    def __init__(self, llm):
+        self.llm = llm
+        self.cq = {"messages": [], "settings": {}}
+
+    def with_message(self, message, role="user"):
+        self.cq["messages"].append({"content": message, "role": role})
+        return self
+
+    def execute(self):
+        queries = {"queries": [self.cq], "llmId": self.llm.llm_id}
+        ret = self.llm.client._perform_json("POST", "/projects/%s/llms/completions" % (self.llm.project_key), body=queries)
+
+        return DSSLLMCompletionResponse(ret["responses"][0])
+
+class DSSLLMCompletionResponse(object):
+    def __init__(self, raw_resp):
+        self._raw = raw_resp
+
+    @property
+    def success(self):
+        return self._raw["ok"]
+
+    @property
+    def text(self):
+        return self._raw["text"]
diff --git a/dataikuapi/dss/ml.py b/dataikuapi/dss/ml.py
@@ -686,7 +686,7 @@ def set_grid_search(self, shuffle=True, seed=1337):
         Sets the search strategy to "GRID", to perform a grid-search over the hyperparameters.
 
         :param shuffle: if True (default), iterate over a shuffled grid as opposed to lexicographical
-        iteration over the cartesian product of the hyperparameters
+            iteration over the cartesian product of the hyperparameters
         :type shuffle: bool
         :param seed: Seed value used to ensure reproducible results (defaults to **1337**)
         :type seed: int
@@ -811,7 +811,7 @@ def set_single_split_validation(self, split_ratio=0.8, stratified=True, cv_seed=
 
         The mode will be set to either "SHUFFLE" or "TIME_SERIES_SINGLE_SPLIT", depending on whether time-based ordering is enabled.
 
-        :param split_ratio: The ratio of the data used for training during hyperparameter search (defaults to **0.8*)
+        :param split_ratio: The ratio of the data used for training during hyperparameter search (defaults to **0.8**)
         :type split_ratio: float
         :param stratified: If True, keep the same proportion of each target classes in both splits (defaults to **True**)
         :type stratified: bool
@@ -870,7 +870,7 @@ def set_search_distribution(self, distributed=False, n_containers=4):
         Sets the distribution parameters for the hyperparameter search execution.
 
         :param distributed: if True, distribute search in the Kubernetes cluster selected
-        in the runtime environment's containerized execution configuration (defaults to **False**)
+            in the runtime environment's containerized execution configuration (defaults to **False**)
         :type distributed: bool
         :param n_containers: number of containers to use for the distributed search (defaults to **4**)
         :type n_containers: int

diff --git a/dataikuapi/dss/modelevaluationstore.py b/dataikuapi/dss/modelevaluationstore.py
@@ -234,6 +234,78 @@ def compute_metrics(self, metric_ids=None, probes=None):
             return self.client._perform_json(
                 "POST" , "%s/computeMetrics" % url)
 
+    def run_checks(self, evaluation_id='', checks=None):
+        """
+        Run checks on a partition of this model evaluation store.
+
+        If the checks are not specified, the checks
+        setup on the model evaluation store are used.
+
+        :param str evaluation_id: (optional) id of evaluation on which checks should be run. Last evaluation is used if not specified.
+        :param list[string] checks: (optional) ids of the checks to run.
+
+        :returns: a checks computation report, as a dict.
+        :rtype: dict
+        """
+        if checks is None:
+            return self.client._perform_json(
+                "POST", "/projects/%s/modelevaluationstores/%s/actions/runChecks" %(self.project_key, self.mes_id),
+                params={'evaluationId': evaluation_id})
+        else:
+            return self.client._perform_json(
+                "POST", "/projects/%s/modelevaluationstores/%s/actions/runChecks" %(self.project_key, self.mes_id),
+                params={'evaluationId': evaluation_id}, body=checks)
+
+    class MetricDefinition(dict):
+        def __init__(self, code, value, name=None, description=None):
+            dict.__init__(self, {"metricCode": code, "value": value, "name": name, "description": description})
+
+    class LabelDefinition(dict):
+        def __init__(self, key, value):
+            dict.__init__(self, {"key": key, "value": value})
+
+    def add_custom_model_evaluation(self, metrics, evaluation_id=None, name=None, labels=None, model=None):
+        """
+        Adds a model evaluation with custom metrics to the model evaluation store.
+        :param list[DSSModelEvaluationStore.MetricDefinition] metrics: the metrics to add.
+        :param str evaluation_id: the id of the evaluation (optional)
+        :param str name: the human-readable name of the evaluation (optional)
+        :param list[DSSModelEvaluationStore.LabelDefinition] labels: labels to set on the model evaluation (optionam). See below.
+        :param model: saved model version (full ID or DSSTrainedPredictionModelDetails) of the evaluated model (optional)
+        :type model: Union[str,  DSSTrainedPredictionModelDetails]
+
+        Code sample:
+
+        .. code-block:: python
+
+            import dataiku
+            from dataikuapi.dss.modelevaluationstore import DSSModelEvaluationStore
+
+            client=dataiku.api_client()
+            project=client.get_default_project()
+            mes=project.get_model_evaluation_store("7vFZWNck")
+
+            accuracy = DSSModelEvaluationStore.MetricDefinition("accuracy", 0.95, "Accuracy")
+            other = DSSModelEvaluationStore.MetricDefinition("other", 42, "Other", "Other metric desc")
+            label = DSSModelEvaluationStore.LabelDefinition("custom:myLabel", "myValue")
+
+            mes.add_custom_model_evaluation([accuracy, pouet], labels=[label])
+            mes.run_checks()
+        """
+        if hasattr(model, 'full_id'):
+            model = model.full_id
+
+        url = "/projects/%s/modelevaluationstores/%s/evaluations" % (self.project_key, self.mes_id)
+        return self.client._perform_json(
+            "POST", url,
+            body={
+                "evaluationId": evaluation_id,
+                "name": name,
+                "metrics": metrics,
+                "labels": labels,
+                "fullModelId": model
+            })
+
 
 class DSSModelEvaluationStoreSettings:
     """
@@ -411,8 +483,8 @@ def __init__(self, model_evaluation, full_info):
             self.model_full_id = self.full_info["evaluation"]["modelRef"]["fullId"]  # type: str
         else:
             self.model_full_id = None
-        self.prediction_type = self.full_info["evaluation"]["predictionType"]  # type: str
-        self.prediction_variable = self.full_info["evaluation"]["predictionVariable"]  # type: str
+        self.prediction_type = self.full_info["evaluation"].get("predictionType")  # type: str
+        self.prediction_variable = self.full_info["evaluation"].get("predictionVariable")  # type: str
         self.target_variable = self.full_info["evaluation"].get("targetVariable")  # type: str
         self.user_meta = self.full_info["evaluation"]["userMeta"]  # type: dict
         self.has_model = self.full_info["evaluation"]["hasModel"]

diff --git a/dataikuapi/dss/project.py b/dataikuapi/dss/project.py
@@ -31,6 +31,7 @@
     DSSManagedStreamingEndpointCreationHelper
 from .webapp import DSSWebApp, DSSWebAppListItem
 from .wiki import DSSWiki
+from .llm import DSSLLM
 from ..dss_plugin_mlflow import MLflowHandle
 
 class DSSProject(object):
@@ -942,50 +943,103 @@ def create_mlflow_pyfunc_model(self, name, prediction_type=None):
         id = self.client._perform_json("POST", "/projects/%s/savedmodels/" % self.project_key, body=model)["id"]
         return self.get_saved_model(id)
 
-    def create_proxy_model(self, name, prediction_type, protocol, region=None, auth_connection=None):
+    def create_external_model(self, name, prediction_type, configuration):
         """
-        Create a new Saved model that can contain proxied remote endpoints as versions.
+        EXPERIMENTAL. Creates a new Saved model that can contain external remote endpoints as versions.
 
         :param string name: Human-readable name for the new saved model in the flow
         :param string prediction_type: One of BINARY_CLASSIFICATION, MULTICLASS or REGRESSION
-        :param string protocol: Cloud provider. One of sagemaker, vertex-ai, azure-ml
-        :param string region: Deployed endpoint region, if applicable for your cloud vendor (eg. "eu-west-3")
-        :param string auth_connection: (optional) Name of the DSS connection to use for authentication. Credentials
-            will be derived from environment if not defined. The connection must be:
-            - an Amazon S3 connection for a SageMaker Saved model
-            - an Azure Blob Storage connection for an Azure ML Saved model
-            - a Google Cloud Storage connection for a Vertex AI Saved Model
+        :param dict configuration: A dictionary containing the desired external saved model configuration.
 
-        See reference documentation for details.
+          - For SageMaker, the syntax is:
 
-        * Example: create a saved model for SageMaker endpoints serving binary classification models in region eu-west-1
+            .. code-block:: python
 
-        .. code-block:: python
+                configuration = {
+                    "protocol": "sagemaker",
+                    "region": "<region-name>"
+                    "connection": "<connection-name>"
+                }
+
+            Where the parameters have the following meaning:
+
+            - ``region``: The AWS region of the endpoint, e.g. ``eu-west-1``
+            - ``connection``: (optional) The DSS SageMaker connection to use for authentication. If not defined,
+              credentials will be derived from environment. See the reference documentation for details.
+
+          - For AzureML, syntax is:
+
+            .. code-block:: python
+
+                configuration = {
+                    "protocol": "azure-ml",
+                    "connection": "<connection-name>",
+                    "subscription_id": "<id>",
+                    "resource_group": "<rg>",
+                    "workspace": "<workspace>"
+                }
+
+            Where the parameters have the following meaning:
+
+            - ``connection``: (optional) The DSS Azure ML connection to use for authentication. If not defined,
+              credentials will be derived from environment. See the reference documentation for details.
+            - ``subscription_id``: The Azure subscription ID
+            - ``resource_group``: The Azure resource group
+            - ``workspace``: The Azure ML workspace
+
+          - For Vertex AI, syntax is:
+
+            .. code-block:: python
+
+                configuration = {
+                    "protocol": "vertex-ai",
+                    "region": "<region-name>"
+                    "connection": "<connection-name>",
+                    "project_id": "<name> or <id>"
+                }
+
+            Where the parameters have the following meaning:
+
+            - ``region``: The GCP region of the endpoint, e.g. ``europe-west-1``
+            - ``connection``: (optional) The DSS Vertex AI connection to use for authentication. If not defined,
+              credentials will be derived from environment. See the reference documentation for details.
+            - ``project_id``: The ID or name of the GCP project
+
+        - Example: create a saved model for SageMaker endpoints serving binary classification models in region eu-west-1
+
+          .. code-block:: python
 
             import dataiku
             client = dataiku.api_client()
             project = client.get_default_project()
-            sm = project.create_proxy_model("SaveMaker Proxy Model", "BINARY_CLASSIFICATION", "sagemaker", "eu-west-1")
+            configuration = {
+                "protocol": "sagemaker",
+                "region": "eu-west-1"
+            }
+            sm = project.create_external_model("SaveMaker Proxy Model", "BINARY_CLASSIFICATION", configuration)
 
-        * Example: create a saved model for Vertex AI endpoints serving regression models in region eu-west-1, performing
-            authentication using DSS connection "vertex_conn" of type GCS
+        - Example: create a saved model for Vertex AI endpoints serving regression models in region eu-west-1, on
+          project "my-project", performing authentication using DSS connection "vertex_conn" of type "Vertex AI".
 
-        .. code-block:: python
+          .. code-block:: python
 
             import dataiku
             client = dataiku.api_client()
             project = client.get_default_project()
-            sm = project.create_proxy_model("Vertex AI Proxy Model", "BINARY_CLASSIFICATION", "vertex-ai",
-                "europe-west1", "vertex_conn")
+            configuration = {
+                "protocol": "vertex-ai",
+                "region": "europe-west1",
+                "connection": "vertex_conn"
+                "project_id": "my-project"
+            }
+            sm = project.create_external_model("Vertex AI Proxy Model", "BINARY_CLASSIFICATION", configuration)
 
         """
         model = {
             "savedModelType": "PROXY_MODEL",
             "predictionType": prediction_type,
             "name": name,
-            "protocol": protocol,
-            "region": region,
-            "authenticationConnection": auth_connection
+            "proxyModelConfiguration": configuration
         }
 
         saved_model_id = self.client._perform_json("POST", "/projects/%s/savedmodels/" % self.project_key, body=model)["id"]
@@ -2056,6 +2110,12 @@ def get_library(self):
         """
         return DSSLibrary(self.client, self.project_key)
 
+    ########################################################
+    # LLM
+    ########################################################
+    def get_llm(self, llm_id):
+        return DSSLLM(self.client, self.project_key, llm_id)
+
     ########################################################
     # Webapps
     ########################################################

diff --git a/dataikuapi/dss/recipe.py b/dataikuapi/dss/recipe.py
@@ -1199,7 +1199,7 @@ def set_column_aggregations(self, column, type=None, min=False, max=False, count
         :param boolean stddev: whether the standard deviation aggregate is computed
 
         :return: the settings of the aggregations on a the column, as a dict. The name of the column is in a **column** field.
-        :rtype dict
+        :rtype: dict
         """
         cs = self.get_or_create_column_settings(column)
         if type is not None: