feat: Add features_to_explain to shap config (#3951)

oyangz · web-flow · commit 6c1a3a15e9d0 · 2023-07-06T11:54:42.000-07:00
diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py
@@ -267,6 +267,7 @@
                     },
                 },
                 SchemaOptional("seed"): int,
+                SchemaOptional("features_to_explain"): [Or(int, str)],
             },
             SchemaOptional("pre_training_bias"): {"methods": Or(str, [str])},
             SchemaOptional("post_training_bias"): {"methods": Or(str, [str])},
@@ -1308,6 +1309,7 @@ def __init__(
         num_clusters: Optional[int] = None,
         text_config: Optional[TextConfig] = None,
         image_config: Optional[ImageConfig] = None,
+        features_to_explain: Optional[List[Union[str, int]]] = None,
     ):
         """Initializes config for SHAP analysis.
 
@@ -1343,6 +1345,14 @@ def __init__(
                 text features. Default is None.
             image_config (:class:`~sagemaker.clarify.ImageConfig`): Config for handling image
                 features. Default is None.
+            features_to_explain: A list of names or indices of dataset features to compute SHAP
+                values for. If not provided, SHAP values are computed for all features by default.
+                Currently only supported for tabular datasets.
+
+        Raises:
+            ValueError: when ``agg_method`` is invalid, ``baseline`` and ``num_clusters`` are provided
+                together, or ``features_to_explain`` is specified when ``text_config`` or
+                ``image_config`` is provided
         """  # noqa E501  # pylint: disable=c0301
         if agg_method is not None and agg_method not in [
             "mean_abs",
@@ -1376,6 +1386,13 @@ def __init__(
                 )
         if image_config:
             _set(image_config.get_image_config(), "image_config", self.shap_config)
+        if features_to_explain is not None and (
+            text_config is not None or image_config is not None
+        ):
+            raise ValueError(
+                "`features_to_explain` is not supported for datasets containing text features or images."
+            )
+        _set(features_to_explain, "features_to_explain", self.shap_config)
 
     def get_explainability_config(self):
         """Returns a shap config dictionary."""
diff --git a/tests/unit/test_clarify.py b/tests/unit/test_clarify.py
@@ -716,6 +716,37 @@ def test_valid_shap_config(baseline):
     assert expected_config == shap_config.get_explainability_config()
 
 
+def test_shap_config_features_to_explain():
+    baseline = [1, 2, 3]
+    num_samples = 100
+    agg_method = "mean_sq"
+    use_logit = True
+    save_local_shap_values = True
+    seed = 123
+    features_to_explain = [0, 1]
+    shap_config = SHAPConfig(
+        baseline=baseline,
+        num_samples=num_samples,
+        agg_method=agg_method,
+        use_logit=use_logit,
+        save_local_shap_values=save_local_shap_values,
+        seed=seed,
+        features_to_explain=features_to_explain,
+    )
+    expected_config = {
+        "shap": {
+            "baseline": baseline,
+            "num_samples": num_samples,
+            "agg_method": agg_method,
+            "use_logit": use_logit,
+            "save_local_shap_values": save_local_shap_values,
+            "seed": seed,
+            "features_to_explain": features_to_explain,
+        }
+    }
+    assert expected_config == shap_config.get_explainability_config()
+
+
 def test_shap_config_no_baseline():
     num_samples = 100
     agg_method = "mean_sq"
@@ -852,6 +883,17 @@ def test_invalid_shap_config():
         "Baseline and num_clusters cannot be provided together. Please specify one of the two."
         in str(error.value)
     )
+    with pytest.raises(ValueError) as error:
+        SHAPConfig(
+            baseline=[[1, 2]],
+            num_samples=1,
+            text_config=TextConfig(granularity="token", language="english"),
+            features_to_explain=[0],
+        )
+    assert (
+        "`features_to_explain` is not supported for datasets containing text features or images."
+        in str(error.value)
+    )
 
 
 @pytest.fixture(scope="module")