Added SER recipe creation (#192)

pmmayne · web-flow · commit 64c78452a5e8 · 2022-01-07T17:08:16.000+01:00
* Added SER recipe creation

* Cleanup

* Typo

* Added documentation for running the SER

* Added documentation for running the ER

* Add a warning in the SER API documentation about settings being changed to their default value

* Added details in the documentation and cost matrix example

* Slight doc changes

* Changed the doc to make better use of the default payload
diff --git a/dataikuapi/dss/project.py b/dataikuapi/dss/project.py
@@ -1429,6 +1429,8 @@ def new_recipe(self, type, name=None):
             return recipe.PredictionScoringRecipeCreator(name, self)
         elif type == "evaluation":
             return recipe.EvaluationRecipeCreator(name, self)
+        elif type == "standalone_evaluation":
+            return recipe.StandaloneEvaluationRecipeCreator(name, self)
         elif type == "clustering_scoring":
             return recipe.ClusteringScoringRecipeCreator(name, self)
         elif type == "download":
diff --git a/dataikuapi/dss/recipe.py b/dataikuapi/dss/recipe.py
@@ -1350,6 +1350,7 @@ class EvaluationRecipeCreator(DSSRecipeCreator):
 
         json_payload['dontComputePerformance'] = True
         json_payload['outputProbabilities'] = False
+        json_payload['metrics'] = ["precision", "recall", "auc", "f1", "costMatrixGain"]
 
         # Manage evaluation labels
 
@@ -1361,7 +1362,7 @@ class EvaluationRecipeCreator(DSSRecipeCreator):
         er_settings.save()
 
         new_recipe.run()
-    
+
     Outputs must exist. They can be created using the following:
 
     .. code-block:: python
@@ -1397,6 +1398,78 @@ def with_output_evaluation_store(self, mes_id):
         return self._with_output(mes_id, role="evaluationStore")
 
 
+class StandaloneEvaluationRecipeCreator(DSSRecipeCreator):
+    """
+    Builder for the creation of a new "Standalone Evaluate" recipe, from an
+    input dataset
+
+    .. code-block:: python
+
+        # Create a new standalone evaluation of a scored dataset
+
+        project = client.get_project("MYPROJECT")
+        builder = StandaloneEvaluationRecipeCreator("my_standalone_evaluation_recipe", project)
+        builder.with_input("scored_dataset_to_evaluate")
+        builder.with_output_evaluation_store(evaluation_store_id)
+
+        new_recipe = builder.create()
+
+        # Modify the model parameters in the SER settings
+
+        ser_settings = new_recipe.get_settings()
+        ser_json_payload = ser_settings.get_json_payload()
+
+        ser_json_payload['predictionType'] = "BINARY_CLASSIFICATION"
+        ser_json_payload['targetVariable'] = "Survived"
+        ser_json_payload['predictionVariable'] = "prediction"
+        ser_json_payload['isProbaAware'] = True
+        ser_json_payload['dontComputePerformance'] = False
+
+        # For a classification model with probabilities, the 'probas' section can be filled with the mapping of the class and the probability column
+        # e.g. for a binary classification model with 2 columns: proba_0 and proba_1
+
+        class_0 = dict(key=0, value="proba_0")
+        class_1 = dict(key=1, value="proba_1")
+        ser_payload['probas'] = [class_0, class_1]
+
+        # Change the 'features' settings for this standalone evaluation
+        # e.g. reject the features that you do not want to use in the evaluation
+
+        feature_passengerid = dict(name="Passenger_Id", role="REJECT", type="TEXT")
+        feature_ticket = dict(name="Ticket", role="REJECT", type="TEXT")
+        feature_cabin = dict(name="Cabin", role="REJECT", type="TEXT")
+
+        ser_payload['features'] = [feature_passengerid, feature_ticket, feature_cabin]
+
+        # To set the cost matrix properly, access the 'metricParams' section of the payload and set the cost matrix weights:
+
+        ser_payload['metricParams'] = dict(costMatrixWeights=dict(tpGain=0.4, fpGain=-1.0, tnGain=0.2, fnGain=-0.5))
+
+        # Add the modified json payload to the recipe settings and save the recipe
+        # Note that with this method, all the settings that were not explicitly set are instead set to their default value.
+
+        ser_settings = new_recipe.get_settings()
+
+        ser_settings.set_json_payload(ser_payload)
+        ser_settings.save()
+
+        new_recipe.run()
+
+    Output model evaluation store must exist. It can be created using the following:
+
+    .. code-block:: python
+
+        evaluation_store_id = project.create_model_evaluation_store("output_model_evaluation").mes_id
+    """
+
+    def __init__(self, name, project):
+        DSSRecipeCreator.__init__(self, 'standalone_evaluation', name, project)
+
+    def with_output_evaluation_store(self, mes_id):
+        """Sets the output model evaluation store"""
+        return self._with_output(mes_id, role="main")
+
+
 class ClusteringScoringRecipeCreator(SingleOutputRecipeCreator):
     """
     Builder for the creation of a new "Clustering scoring" recipe, from an