Skip to content

Task/dss1223 bump dataikuapi #285

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions HISTORY.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Changelog
==========


12.2.3 (2023-10-12)
---------------------

* Initial release for DSS 12.2.3

12.1.0 (2023-06-29)
---------------------

Expand Down
2 changes: 1 addition & 1 deletion dataikuapi/apinode_admin_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def import_code_env_in_cache(self, file_dir, language):
:param file_dir: path of an exported code env base folder
:param language: language of the code env (`python` or `R`)
"""
self._perform_empty("POST", "cached-code-envs", params={
return self._perform_json("POST", "cached-code-envs", params={
"fileDir": file_dir,
"language": language
})
9 changes: 5 additions & 4 deletions dataikuapi/dss/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,9 @@ def get_settings(self):

# make details of a connection accessible to some groups
connection = client.get_connection("my_connection_name")
settings = connection.settings()
settings.set_readability(False, "group1", "group2")
settings = connection.get_settings()
readability = settings.details_readability
readability.set_readability(False, "group1", "group2")
settings.save()

:return: the settings of the connection
Expand Down Expand Up @@ -396,7 +397,7 @@ def set_usability(self, all, *groups):
Set who can use the connection.

:param boolean all: if True, anybody can use the connection
:param *string groups: a list of groups that can use the connection
:param \*string groups: a list of groups that can use the connection
"""
if all:
self.settings["usableBy"] = 'ALL'
Expand Down Expand Up @@ -453,7 +454,7 @@ def set_readability(self, all, *groups):
To make the details readable by nobody, pass all=False and no group.

:param boolean all: if True, anybody can use the connection
:param *string groups: a list of groups that can use the connection
:param \*string groups: a list of groups that can use the connection
"""
if all:
self._data["readableBy"] = 'ALL'
Expand Down
2 changes: 1 addition & 1 deletion dataikuapi/dss/apideployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ def delete(self, disable_first=False, ignore_pre_delete_errors=False):
If False, will raise an Exception if this deployment is enabled.

:param boolean ignore_pre_delete_errors: If True, any error occurred during the actions performed previously to
delete the deployment will be ignored and the delete action will be performed anyway.
delete the deployment will be ignored and the delete action will be performed anyway.

"""

Expand Down
64 changes: 64 additions & 0 deletions dataikuapi/dss/llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
class DSSLLM(object):
"""
A handle to interact with a DSS-managed LLM
"""
def __init__(self, client, project_key, llm_id):
self.client = client
self.project_key = project_key
self.llm_id = llm_id

def new_completion(self):
return DSSLLMCompletionQuery(self)

def new_embeddings(self):
return DSSLLMEmbeddingsQuery(self)


class DSSLLMEmbeddingsQuery(object):
def __init__(self, llm):
self.llm = llm
self.eq = {"queries": [], "llmId": llm.llm_id}

def add_text(self, text):
self.eq["queries"].append({"text": text})

def execute(self):
ret = self.llm.client._perform_json("POST", "/projects/%s/llms/embeddings" % (self.llm.project_key), body=self.eq)
return DSSLLMEmbeddingsResponse(ret)

class DSSLLMEmbeddingsResponse(object):
def __init__(self, raw_resp):
self._raw = raw_resp

def get_embeddings(self):
"""
returns all embeddings as a list of list of floats
"""
return [r["embedding"] for r in self._raw["responses"]]

class DSSLLMCompletionQuery(object):
def __init__(self, llm):
self.llm = llm
self.cq = {"messages": [], "settings": {}}

def with_message(self, message, role="user"):
self.cq["messages"].append({"content": message, "role": role})
return self

def execute(self):
queries = {"queries": [self.cq], "llmId": self.llm.llm_id}
ret = self.llm.client._perform_json("POST", "/projects/%s/llms/completions" % (self.llm.project_key), body=queries)

return DSSLLMCompletionResponse(ret["responses"][0])

class DSSLLMCompletionResponse(object):
def __init__(self, raw_resp):
self._raw = raw_resp

@property
def success(self):
return self._raw["ok"]

@property
def text(self):
return self._raw["text"]
6 changes: 3 additions & 3 deletions dataikuapi/dss/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ def set_grid_search(self, shuffle=True, seed=1337):
Sets the search strategy to "GRID", to perform a grid-search over the hyperparameters.

:param shuffle: if True (default), iterate over a shuffled grid as opposed to lexicographical
iteration over the cartesian product of the hyperparameters
iteration over the cartesian product of the hyperparameters
:type shuffle: bool
:param seed: Seed value used to ensure reproducible results (defaults to **1337**)
:type seed: int
Expand Down Expand Up @@ -811,7 +811,7 @@ def set_single_split_validation(self, split_ratio=0.8, stratified=True, cv_seed=

The mode will be set to either "SHUFFLE" or "TIME_SERIES_SINGLE_SPLIT", depending on whether time-based ordering is enabled.

:param split_ratio: The ratio of the data used for training during hyperparameter search (defaults to **0.8*)
:param split_ratio: The ratio of the data used for training during hyperparameter search (defaults to **0.8**)
:type split_ratio: float
:param stratified: If True, keep the same proportion of each target classes in both splits (defaults to **True**)
:type stratified: bool
Expand Down Expand Up @@ -870,7 +870,7 @@ def set_search_distribution(self, distributed=False, n_containers=4):
Sets the distribution parameters for the hyperparameter search execution.

:param distributed: if True, distribute search in the Kubernetes cluster selected
in the runtime environment's containerized execution configuration (defaults to **False**)
in the runtime environment's containerized execution configuration (defaults to **False**)
:type distributed: bool
:param n_containers: number of containers to use for the distributed search (defaults to **4**)
:type n_containers: int
Expand Down
76 changes: 74 additions & 2 deletions dataikuapi/dss/modelevaluationstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,78 @@ def compute_metrics(self, metric_ids=None, probes=None):
return self.client._perform_json(
"POST" , "%s/computeMetrics" % url)

def run_checks(self, evaluation_id='', checks=None):
"""
Run checks on a partition of this model evaluation store.

If the checks are not specified, the checks
setup on the model evaluation store are used.

:param str evaluation_id: (optional) id of evaluation on which checks should be run. Last evaluation is used if not specified.
:param list[string] checks: (optional) ids of the checks to run.

:returns: a checks computation report, as a dict.
:rtype: dict
"""
if checks is None:
return self.client._perform_json(
"POST", "/projects/%s/modelevaluationstores/%s/actions/runChecks" %(self.project_key, self.mes_id),
params={'evaluationId': evaluation_id})
else:
return self.client._perform_json(
"POST", "/projects/%s/modelevaluationstores/%s/actions/runChecks" %(self.project_key, self.mes_id),
params={'evaluationId': evaluation_id}, body=checks)

class MetricDefinition(dict):
def __init__(self, code, value, name=None, description=None):
dict.__init__(self, {"metricCode": code, "value": value, "name": name, "description": description})

class LabelDefinition(dict):
def __init__(self, key, value):
dict.__init__(self, {"key": key, "value": value})

def add_custom_model_evaluation(self, metrics, evaluation_id=None, name=None, labels=None, model=None):
"""
Adds a model evaluation with custom metrics to the model evaluation store.
:param list[DSSModelEvaluationStore.MetricDefinition] metrics: the metrics to add.
:param str evaluation_id: the id of the evaluation (optional)
:param str name: the human-readable name of the evaluation (optional)
:param list[DSSModelEvaluationStore.LabelDefinition] labels: labels to set on the model evaluation (optionam). See below.
:param model: saved model version (full ID or DSSTrainedPredictionModelDetails) of the evaluated model (optional)
:type model: Union[str, DSSTrainedPredictionModelDetails]

Code sample:

.. code-block:: python

import dataiku
from dataikuapi.dss.modelevaluationstore import DSSModelEvaluationStore

client=dataiku.api_client()
project=client.get_default_project()
mes=project.get_model_evaluation_store("7vFZWNck")

accuracy = DSSModelEvaluationStore.MetricDefinition("accuracy", 0.95, "Accuracy")
other = DSSModelEvaluationStore.MetricDefinition("other", 42, "Other", "Other metric desc")
label = DSSModelEvaluationStore.LabelDefinition("custom:myLabel", "myValue")

mes.add_custom_model_evaluation([accuracy, pouet], labels=[label])
mes.run_checks()
"""
if hasattr(model, 'full_id'):
model = model.full_id

url = "/projects/%s/modelevaluationstores/%s/evaluations" % (self.project_key, self.mes_id)
return self.client._perform_json(
"POST", url,
body={
"evaluationId": evaluation_id,
"name": name,
"metrics": metrics,
"labels": labels,
"fullModelId": model
})


class DSSModelEvaluationStoreSettings:
"""
Expand Down Expand Up @@ -411,8 +483,8 @@ def __init__(self, model_evaluation, full_info):
self.model_full_id = self.full_info["evaluation"]["modelRef"]["fullId"] # type: str
else:
self.model_full_id = None
self.prediction_type = self.full_info["evaluation"]["predictionType"] # type: str
self.prediction_variable = self.full_info["evaluation"]["predictionVariable"] # type: str
self.prediction_type = self.full_info["evaluation"].get("predictionType") # type: str
self.prediction_variable = self.full_info["evaluation"].get("predictionVariable") # type: str
self.target_variable = self.full_info["evaluation"].get("targetVariable") # type: str
self.user_meta = self.full_info["evaluation"]["userMeta"] # type: dict
self.has_model = self.full_info["evaluation"]["hasModel"]
Expand Down
102 changes: 81 additions & 21 deletions dataikuapi/dss/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
DSSManagedStreamingEndpointCreationHelper
from .webapp import DSSWebApp, DSSWebAppListItem
from .wiki import DSSWiki
from .llm import DSSLLM
from ..dss_plugin_mlflow import MLflowHandle

class DSSProject(object):
Expand Down Expand Up @@ -942,50 +943,103 @@ def create_mlflow_pyfunc_model(self, name, prediction_type=None):
id = self.client._perform_json("POST", "/projects/%s/savedmodels/" % self.project_key, body=model)["id"]
return self.get_saved_model(id)

def create_proxy_model(self, name, prediction_type, protocol, region=None, auth_connection=None):
def create_external_model(self, name, prediction_type, configuration):
"""
Create a new Saved model that can contain proxied remote endpoints as versions.
EXPERIMENTAL. Creates a new Saved model that can contain external remote endpoints as versions.

:param string name: Human-readable name for the new saved model in the flow
:param string prediction_type: One of BINARY_CLASSIFICATION, MULTICLASS or REGRESSION
:param string protocol: Cloud provider. One of sagemaker, vertex-ai, azure-ml
:param string region: Deployed endpoint region, if applicable for your cloud vendor (eg. "eu-west-3")
:param string auth_connection: (optional) Name of the DSS connection to use for authentication. Credentials
will be derived from environment if not defined. The connection must be:
- an Amazon S3 connection for a SageMaker Saved model
- an Azure Blob Storage connection for an Azure ML Saved model
- a Google Cloud Storage connection for a Vertex AI Saved Model
:param dict configuration: A dictionary containing the desired external saved model configuration.

See reference documentation for details.
- For SageMaker, the syntax is:

* Example: create a saved model for SageMaker endpoints serving binary classification models in region eu-west-1
.. code-block:: python

.. code-block:: python
configuration = {
"protocol": "sagemaker",
"region": "<region-name>"
"connection": "<connection-name>"
}

Where the parameters have the following meaning:

- ``region``: The AWS region of the endpoint, e.g. ``eu-west-1``
- ``connection``: (optional) The DSS SageMaker connection to use for authentication. If not defined,
credentials will be derived from environment. See the reference documentation for details.

- For AzureML, syntax is:

.. code-block:: python

configuration = {
"protocol": "azure-ml",
"connection": "<connection-name>",
"subscription_id": "<id>",
"resource_group": "<rg>",
"workspace": "<workspace>"
}

Where the parameters have the following meaning:

- ``connection``: (optional) The DSS Azure ML connection to use for authentication. If not defined,
credentials will be derived from environment. See the reference documentation for details.
- ``subscription_id``: The Azure subscription ID
- ``resource_group``: The Azure resource group
- ``workspace``: The Azure ML workspace

- For Vertex AI, syntax is:

.. code-block:: python

configuration = {
"protocol": "vertex-ai",
"region": "<region-name>"
"connection": "<connection-name>",
"project_id": "<name> or <id>"
}

Where the parameters have the following meaning:

- ``region``: The GCP region of the endpoint, e.g. ``europe-west-1``
- ``connection``: (optional) The DSS Vertex AI connection to use for authentication. If not defined,
credentials will be derived from environment. See the reference documentation for details.
- ``project_id``: The ID or name of the GCP project

- Example: create a saved model for SageMaker endpoints serving binary classification models in region eu-west-1

.. code-block:: python

import dataiku
client = dataiku.api_client()
project = client.get_default_project()
sm = project.create_proxy_model("SaveMaker Proxy Model", "BINARY_CLASSIFICATION", "sagemaker", "eu-west-1")
configuration = {
"protocol": "sagemaker",
"region": "eu-west-1"
}
sm = project.create_external_model("SaveMaker Proxy Model", "BINARY_CLASSIFICATION", configuration)

* Example: create a saved model for Vertex AI endpoints serving regression models in region eu-west-1, performing
authentication using DSS connection "vertex_conn" of type GCS
- Example: create a saved model for Vertex AI endpoints serving regression models in region eu-west-1, on
project "my-project", performing authentication using DSS connection "vertex_conn" of type "Vertex AI".

.. code-block:: python
.. code-block:: python

import dataiku
client = dataiku.api_client()
project = client.get_default_project()
sm = project.create_proxy_model("Vertex AI Proxy Model", "BINARY_CLASSIFICATION", "vertex-ai",
"europe-west1", "vertex_conn")
configuration = {
"protocol": "vertex-ai",
"region": "europe-west1",
"connection": "vertex_conn"
"project_id": "my-project"
}
sm = project.create_external_model("Vertex AI Proxy Model", "BINARY_CLASSIFICATION", configuration)

"""
model = {
"savedModelType": "PROXY_MODEL",
"predictionType": prediction_type,
"name": name,
"protocol": protocol,
"region": region,
"authenticationConnection": auth_connection
"proxyModelConfiguration": configuration
}

saved_model_id = self.client._perform_json("POST", "/projects/%s/savedmodels/" % self.project_key, body=model)["id"]
Expand Down Expand Up @@ -2056,6 +2110,12 @@ def get_library(self):
"""
return DSSLibrary(self.client, self.project_key)

########################################################
# LLM
########################################################
def get_llm(self, llm_id):
return DSSLLM(self.client, self.project_key, llm_id)

########################################################
# Webapps
########################################################
Expand Down
2 changes: 1 addition & 1 deletion dataikuapi/dss/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@ def set_column_aggregations(self, column, type=None, min=False, max=False, count
:param boolean stddev: whether the standard deviation aggregate is computed

:return: the settings of the aggregations on a the column, as a dict. The name of the column is in a **column** field.
:rtype dict
:rtype: dict
"""
cs = self.get_or_create_column_settings(column)
if type is not None:
Expand Down
Loading