Skip to content

Commit 64c7845

Browse files
authored
Added SER recipe creation (#192)
* Added SER recipe creation * Cleanup * Typo * Added documentation for running the SER * Added documentation for running the ER * Add a warning in the SER API documentation about settings being changed to their default value * Added details in the documentation and cost matrix example * Slight doc changes * Changed the doc to make better use of the default payload
1 parent 19e26ea commit 64c7845

File tree

2 files changed

+76
-1
lines changed

2 files changed

+76
-1
lines changed

dataikuapi/dss/project.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1429,6 +1429,8 @@ def new_recipe(self, type, name=None):
14291429
return recipe.PredictionScoringRecipeCreator(name, self)
14301430
elif type == "evaluation":
14311431
return recipe.EvaluationRecipeCreator(name, self)
1432+
elif type == "standalone_evaluation":
1433+
return recipe.StandaloneEvaluationRecipeCreator(name, self)
14321434
elif type == "clustering_scoring":
14331435
return recipe.ClusteringScoringRecipeCreator(name, self)
14341436
elif type == "download":

dataikuapi/dss/recipe.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,7 @@ class EvaluationRecipeCreator(DSSRecipeCreator):
13501350
13511351
json_payload['dontComputePerformance'] = True
13521352
json_payload['outputProbabilities'] = False
1353+
json_payload['metrics'] = ["precision", "recall", "auc", "f1", "costMatrixGain"]
13531354
13541355
# Manage evaluation labels
13551356
@@ -1361,7 +1362,7 @@ class EvaluationRecipeCreator(DSSRecipeCreator):
13611362
er_settings.save()
13621363
13631364
new_recipe.run()
1364-
1365+
13651366
Outputs must exist. They can be created using the following:
13661367
13671368
.. code-block:: python
@@ -1397,6 +1398,78 @@ def with_output_evaluation_store(self, mes_id):
13971398
return self._with_output(mes_id, role="evaluationStore")
13981399

13991400

1401+
class StandaloneEvaluationRecipeCreator(DSSRecipeCreator):
1402+
"""
1403+
Builder for the creation of a new "Standalone Evaluate" recipe, from an
1404+
input dataset
1405+
1406+
.. code-block:: python
1407+
1408+
# Create a new standalone evaluation of a scored dataset
1409+
1410+
project = client.get_project("MYPROJECT")
1411+
builder = StandaloneEvaluationRecipeCreator("my_standalone_evaluation_recipe", project)
1412+
builder.with_input("scored_dataset_to_evaluate")
1413+
builder.with_output_evaluation_store(evaluation_store_id)
1414+
1415+
new_recipe = builder.create()
1416+
1417+
# Modify the model parameters in the SER settings
1418+
1419+
ser_settings = new_recipe.get_settings()
1420+
ser_json_payload = ser_settings.get_json_payload()
1421+
1422+
ser_json_payload['predictionType'] = "BINARY_CLASSIFICATION"
1423+
ser_json_payload['targetVariable'] = "Survived"
1424+
ser_json_payload['predictionVariable'] = "prediction"
1425+
ser_json_payload['isProbaAware'] = True
1426+
ser_json_payload['dontComputePerformance'] = False
1427+
1428+
# For a classification model with probabilities, the 'probas' section can be filled with the mapping of the class and the probability column
1429+
# e.g. for a binary classification model with 2 columns: proba_0 and proba_1
1430+
1431+
class_0 = dict(key=0, value="proba_0")
1432+
class_1 = dict(key=1, value="proba_1")
1433+
ser_payload['probas'] = [class_0, class_1]
1434+
1435+
# Change the 'features' settings for this standalone evaluation
1436+
# e.g. reject the features that you do not want to use in the evaluation
1437+
1438+
feature_passengerid = dict(name="Passenger_Id", role="REJECT", type="TEXT")
1439+
feature_ticket = dict(name="Ticket", role="REJECT", type="TEXT")
1440+
feature_cabin = dict(name="Cabin", role="REJECT", type="TEXT")
1441+
1442+
ser_payload['features'] = [feature_passengerid, feature_ticket, feature_cabin]
1443+
1444+
# To set the cost matrix properly, access the 'metricParams' section of the payload and set the cost matrix weights:
1445+
1446+
ser_payload['metricParams'] = dict(costMatrixWeights=dict(tpGain=0.4, fpGain=-1.0, tnGain=0.2, fnGain=-0.5))
1447+
1448+
# Add the modified json payload to the recipe settings and save the recipe
1449+
# Note that with this method, all the settings that were not explicitly set are instead set to their default value.
1450+
1451+
ser_settings = new_recipe.get_settings()
1452+
1453+
ser_settings.set_json_payload(ser_payload)
1454+
ser_settings.save()
1455+
1456+
new_recipe.run()
1457+
1458+
Output model evaluation store must exist. It can be created using the following:
1459+
1460+
.. code-block:: python
1461+
1462+
evaluation_store_id = project.create_model_evaluation_store("output_model_evaluation").mes_id
1463+
"""
1464+
1465+
def __init__(self, name, project):
1466+
DSSRecipeCreator.__init__(self, 'standalone_evaluation', name, project)
1467+
1468+
def with_output_evaluation_store(self, mes_id):
1469+
"""Sets the output model evaluation store"""
1470+
return self._with_output(mes_id, role="main")
1471+
1472+
14001473
class ClusteringScoringRecipeCreator(SingleOutputRecipeCreator):
14011474
"""
14021475
Builder for the creation of a new "Clustering scoring" recipe, from an

0 commit comments

Comments
 (0)