feature: Add support for Partial Dependence Plots(PDP) in SageMaker Clarify

keerthanvasist · keerthanvasist · commit 1db1ff8c5233 · 2021-10-20T22:29:26.000-07:00
diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py
@@ -20,6 +20,7 @@
 import os
 import tempfile
 import re
+
 from sagemaker.processing import ProcessingInput, ProcessingOutput, Processor
 from sagemaker import image_uris, s3, utils
 
@@ -297,7 +298,30 @@ class ExplainabilityConfig(ABC):
     @abstractmethod
     def get_explainability_config(self):
         """Returns config."""
-        return None
+
+
+class PDPConfig(ExplainabilityConfig):
+    """Config class for Partial Dependence Plots (PDP)"""
+
+    def __init__(self, features=None, grid_resolution=None):
+        """Initializes config for PDP.
+
+        Args:
+            features (None or list): List of features names or indices for which partial dependence
+                plots must be computed and plotted.
+            grid_resolution (int): In case of numerical features, this number represents that
+                number of buckets that range of values must be divided into. This decides the
+                granularity of the grid in which the PDP are plotted.
+        """
+        self.pdp_config = {}
+        if features is not None:
+            self.pdp_config["features"] = features
+        if grid_resolution is not None:
+            self.pdp_config["grid_resolution"] = grid_resolution
+
+    def get_explainability_config(self):
+        """Returns config."""
+        return copy.deepcopy({"pdp": self.pdp_config})
 
 
 class SHAPConfig(ExplainabilityConfig):
@@ -471,7 +495,10 @@ def _run(
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
         """
-        analysis_config["methods"]["report"] = {"name": "report", "title": "Analysis Report"}
+        analysis_config["methods"]["report"] = {
+            "name": "report",
+            "title": "Analysis Report",
+        }
         with tempfile.TemporaryDirectory() as tmpdirname:
             analysis_config_file = os.path.join(tmpdirname, "analysis_config.json")
             with open(analysis_config_file, "w") as f:
@@ -573,7 +600,15 @@ def run_pre_training_bias(
                 job_name = utils.name_from_base(self.job_name_prefix)
             else:
                 job_name = utils.name_from_base("Clarify-Pretraining-Bias")
-        self._run(data_config, analysis_config, wait, logs, job_name, kms_key, experiment_config)
+        self._run(
+            data_config,
+            analysis_config,
+            wait,
+            logs,
+            job_name,
+            kms_key,
+            experiment_config,
+        )
 
     def run_post_training_bias(
         self,
@@ -651,7 +686,15 @@ def run_post_training_bias(
                 job_name = utils.name_from_base(self.job_name_prefix)
             else:
                 job_name = utils.name_from_base("Clarify-Posttraining-Bias")
-        self._run(data_config, analysis_config, wait, logs, job_name, kms_key, experiment_config)
+        self._run(
+            data_config,
+            analysis_config,
+            wait,
+            logs,
+            job_name,
+            kms_key,
+            experiment_config,
+        )
 
     def run_bias(
         self,
@@ -746,7 +789,15 @@ def run_bias(
                 job_name = utils.name_from_base(self.job_name_prefix)
             else:
                 job_name = utils.name_from_base("Clarify-Bias")
-        self._run(data_config, analysis_config, wait, logs, job_name, kms_key, experiment_config)
+        self._run(
+            data_config,
+            analysis_config,
+            wait,
+            logs,
+            job_name,
+            kms_key,
+            experiment_config,
+        )
 
     def run_explainability(
         self,
@@ -776,8 +827,9 @@ def run_explainability(
             data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
             model_config (:class:`~sagemaker.clarify.ModelConfig`): Config of the model and its
                 endpoint to be created.
-            explainability_config (:class:`~sagemaker.clarify.ExplainabilityConfig`): Config of the
-                specific explainability method. Currently, only SHAP is supported.
+            explainability_config (:class:`~sagemaker.clarify.ExplainabilityConfig` or list):
+                Config of the specific explainability method or a list of ExplainabilityConfig
+                objects. Currently, SHAP and PDP are the two methods supported.
             model_scores(str|int|ModelPredictedLabelConfig):  Index or JSONPath location in the
                 model output for the predicted scores to be explained. This is not required if the
                 model output is a single score. Alternatively, an instance of
@@ -786,7 +838,7 @@ def run_explainability(
             logs (bool): Whether to show the logs produced by the job.
                 Only meaningful when ``wait`` is True (default: True).
             job_name (str): Processing job name. When ``job_name`` is not specified, if
-                ``job_name_prefix`` in :class:`SageMakerClarifyProcessor` specified, the job name
+                `job_name_prefix` in :class:`SageMakerClarifyProcessor` specified, the job name
                 will be composed of ``job_name_prefix`` and current timestamp; otherwise use
                 "Clarify-Explainability" as prefix.
             kms_key (str): The ARN of the KMS key that is used to encrypt the
@@ -806,19 +858,44 @@ def run_explainability(
         analysis_config = data_config.get_config()
         predictor_config = model_config.get_predictor_config()
         if isinstance(model_scores, ModelPredictedLabelConfig):
-            probability_threshold, predicted_label_config = model_scores.get_predictor_config()
+            (
+                probability_threshold,
+                predicted_label_config,
+            ) = model_scores.get_predictor_config()
             _set(probability_threshold, "probability_threshold", analysis_config)
             predictor_config.update(predicted_label_config)
         else:
             _set(model_scores, "label", predictor_config)
-        analysis_config["methods"] = explainability_config.get_explainability_config()
+
+        explainability_methods = {}
+        if isinstance(explainability_config, list):
+            assert (
+                len(explainability_config) > 0
+            ), "Please provide at least one explaianbility config."
+            for config in explainability_config:
+                explain_config = config.get_explainability_config()
+                explainability_methods.update(explain_config)
+            assert len(explainability_methods.keys()) == len(
+                explainability_config
+            ), "There are duplicate explainability configs"
+        else:
+            explainability_methods = explainability_config.get_explainability_config()
+        analysis_config["methods"] = explainability_methods
         analysis_config["predictor"] = predictor_config
         if job_name is None:
             if self.job_name_prefix:
                 job_name = utils.name_from_base(self.job_name_prefix)
             else:
                 job_name = utils.name_from_base("Clarify-Explainability")
-        self._run(data_config, analysis_config, wait, logs, job_name, kms_key, experiment_config)
+        self._run(
+            data_config,
+            analysis_config,
+            wait,
+            logs,
+            job_name,
+            kms_key,
+            experiment_config,
+        )
 
 
 def _upload_analysis_config(analysis_config_file, s3_output_path, sagemaker_session, kms_key):
diff --git a/tests/integ/test_clarify.py b/tests/integ/test_clarify.py
@@ -13,7 +13,6 @@
 
 from __future__ import print_function, absolute_import
 
-
 import json
 import math
 import numpy as np
@@ -31,14 +30,14 @@
     ModelConfig,
     ModelPredictedLabelConfig,
     SHAPConfig,
+    PDPConfig,
 )
 
 from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerPredictor
 from sagemaker import utils
 from tests import integ
 from tests.integ import timeout
 
-
 CLARIFY_DEFAULT_TIMEOUT_MINUTES = 15
 
 
@@ -177,6 +176,11 @@ def shap_config():
     )
 
 
+@pytest.fixture(scope="module")
+def pdp_config():
+    return PDPConfig(features=["F1"], grid_resolution=10)
+
+
 def test_pre_training_bias(clarify_processor, data_config, data_bias_config, sagemaker_session):
     with timeout.timeout(minutes=CLARIFY_DEFAULT_TIMEOUT_MINUTES):
         clarify_processor.run_pre_training_bias(
diff --git a/tests/unit/test_clarify.py b/tests/unit/test_clarify.py