fix: Correct Clarify API docstrings by changing JSONPath to JMESPath

xgchena · xgchena · commit 0b860d52a3bd · 2022-12-10T07:43:44.000Z
SageMaker Clarify API docstrings use term JSONPath, but actually the
Clarify processing container expects JMESPath expression. The commit
corrects the term to avoid customer confusion.
diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py
@@ -331,11 +331,11 @@ def __init__(
             s3_analysis_config_output_path (str): S3 prefix to store the analysis config output.
                 If this field is None, then the ``s3_output_path`` will be used
                 to store the ``analysis_config`` output.
-            label (str): Target attribute of the model required by bias metrics.
-                Specified as column name or index for CSV dataset or as JSONPath for JSONLines.
+            label (str): Target attribute of the model required by bias metrics. Specified as
+                column name or index for CSV dataset or as JMESPath expression for JSONLines.
                 *Required parameter* except for when the input dataset does not contain the label.
-            features (List[str]): JSONPath for locating the feature columns for bias metrics if the
-                dataset format is JSONLines.
+            features (List[str]): JMESPath expression to locate the feature columns for
+                bias metrics if the dataset format is JSONLines.
             dataset_type (str): Format of the dataset. Valid values are ``"text/csv"`` for CSV,
                 ``"application/jsonlines"`` for JSONLines, and
                 ``"application/x-parquet"`` for Parquet.
@@ -717,11 +717,11 @@ def __init__(
             ``label_headers=['cat','dog','fish']`` and infer the predicted label to be ``'fish'``.
 
         Args:
-            label (str or int): Index or JSONPath location in the model output for the prediction.
-                In case, this is a predicted label of the same type as the label in the dataset,
-                no further arguments need to be specified.
-            probability (str or int): Index or JSONPath location in the model output
-                for the predicted score(s).
+            label (str or int): Index or JMESPath expression to locate the prediction
+                in the model output. In case, this is a predicted label of the same type
+                as the label in the dataset, no further arguments need to be specified.
+            probability (str or int): Index or JMESPath expression to locate the predicted score(s)
+                in the model output.
             probability_threshold (float): An optional value for binary prediction tasks in which
                 the model returns a probability, to indicate the threshold to convert the
                 prediction to a boolean value. Default is ``0.5``.
@@ -1646,9 +1646,9 @@ def run_explainability(
                 You can request multiple methods at once by passing in a list of
                 `~sagemaker.clarify.ExplainabilityConfig`.
             model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
-                Index or JSONPath to locate the predicted scores in the model output. This is not
-                required if the model output is a single score. Alternatively, it can be an instance
-                of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
+                Index or JMESPath expression to locate the predicted scores in the model output.
+                This is not required if the model output is a single score. Alternatively,
+                it can be an instance of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
                 to provide more parameters like ``label_headers``.
             wait (bool): Whether the call should wait until the job completes (default: True).
             logs (bool): Whether to show the logs produced by the job.
@@ -1775,9 +1775,9 @@ def run_bias_and_explainability(
                 str or
                 :class:`~sagemaker.clarify.ModelPredictedLabelConfig`
             ):
-                Index or JSONPath to locate the predicted scores in the model output. This is not
-                required if the model output is a single score. Alternatively, it can be an instance
-                of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
+                Index or JMESPath expression to locate the predicted scores in the model output.
+                This is not required if the model output is a single score. Alternatively,
+                it can be an instance of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
                 to provide more parameters like ``label_headers``.
             wait (bool): Whether the call should wait until the job completes (default: True).
             logs (bool): Whether to show the logs produced by the job.
diff --git a/src/sagemaker/model_monitor/clarify_model_monitoring.py b/src/sagemaker/model_monitor/clarify_model_monitoring.py
@@ -842,8 +842,8 @@ def __init__(self, bias_config, headers=None, label=None):
             bias_config (sagemaker.clarify.BiasConfig): Config object related to bias
                 configurations.
             headers (list[str]): A list of column names in the input dataset.
-            label (str): Target attribute for the model required by bias metrics.
-                Specified as column name or index for CSV dataset, or as JSONPath for JSONLines.
+            label (str): Target attribute for the model required by bias metrics. Specified as
+                column name or index for CSV dataset, or as JMESPath expression for JSONLines.
         """
         self.analysis_config = bias_config.get_config()
         if headers is not None:
@@ -889,9 +889,10 @@ def suggest_baseline(
             model_config (:class:`~sagemaker.clarify.ModelConfig`): Config of the model and its
                 endpoint to be created.
             model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
-                Index or JSONPath to locate the predicted scores in the model output. This is not
-                required if the model output is a single score. Alternatively, it can be an instance
-                of ModelPredictedLabelConfig to provide more parameters like label_headers.
+                Index or JMESPath expression to locate the predicted scores in the model output.
+                This is not required if the model output is a single score. Alternatively,
+                it can be an instance of ModelPredictedLabelConfig to provide more parameters
+                like label_headers.
             wait (bool): Whether the call should wait until the job completes (default: False).
             logs (bool): Whether to show the logs produced by the job.
                 Only meaningful when wait is True (default: False).
@@ -1302,12 +1303,12 @@ def __init__(
         Args:
             analysis_config (BiasAnalysisConfig or ExplainabilityAnalysisConfig): analysis config
                 from configurations of the baselining job.
-            features_attribute (str): JSONpath to locate features in predictor request payload.
-                Only required when predictor content type is JSONlines.
-            inference_attribute (str): Index, header or JSONpath to locate predicted label in
-                predictor response payload.
-            probability_attribute (str): Index or JSONpath location in the model output for
-                probabilities or scores to be used for explainability.
+            features_attribute (str): JMESPath expression to locate features in predictor request
+                payload. Only required when predictor content type is JSONlines.
+            inference_attribute (str): Index, header or JMESPath expression to locate predicted
+                label in predictor response payload.
+            probability_attribute (str): Index or JMESPath expression to locate probabilities or
+                scores in the model output for computing feature attribution.
             probability_threshold_attribute (float): Value to indicate the threshold to select
                 the binary label in the case of binary classification. Default is 0.5.
         """
diff --git a/src/sagemaker/model_monitor/model_monitoring.py b/src/sagemaker/model_monitor/model_monitoring.py
@@ -1061,12 +1061,13 @@ def _generate_env_map(
             dataset_format (dict): The format of the baseline_dataset.
             dataset_source_container_path (str): The path to the dataset source.
             inference_attribute (str): Index or JSONpath to locate predicted label(s).
-                Only used for ModelQualityMonitor, ModelBiasMonitor, and ModelExplainabilityMonitor
+                Only used for ModelQualityMonitor.
             probability_attribute (str or int): Index or JSONpath to locate probabilities.
-                Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
-            ground_truth_attribute (str): Index or JSONpath to locate actual label(s).
+                Only used for ModelQualityMonitor.
+            ground_truth_attribute (str): Index to locate actual label(s).
+                Only used for ModelQualityMonitor.
             probability_threshold_attribute (float): threshold to convert probabilities to binaries
-                Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
+                Only used for ModelQualityMonitor.
 
         Returns:
             dict: Dictionary of environment keys and values.
@@ -2600,10 +2601,13 @@ def suggest_baseline(
             problem_type (str): The type of problem of this model quality monitoring. Valid
                 values are "Regression", "BinaryClassification", "MulticlassClassification".
             inference_attribute (str): Index or JSONpath to locate predicted label(s).
+                Only used for ModelQualityMonitor.
             probability_attribute (str or int): Index or JSONpath to locate probabilities.
-            ground_truth_attribute (str): Index or JSONpath to locate actual label(s).
+                Only used for ModelQualityMonitor.
+            ground_truth_attribute (str): Index to locate actual label(s).
+                Only used for ModelQualityMonitor.
             probability_threshold_attribute (float): threshold to convert probabilities to binaries
-                Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
+                Only used for ModelQualityMonitor.
             post_analytics_processor_script (str): The path to the record post-analytics processor
                 script. This can be a local path or an S3 uri.
             output_s3_uri (str): Desired S3 destination Destination of the constraint_violations
diff --git a/src/sagemaker/workflow/clarify_check_step.py b/src/sagemaker/workflow/clarify_check_step.py
@@ -132,8 +132,8 @@ class ModelExplainabilityCheckConfig(ClarifyCheckConfig):
         model_config (ModelConfig): Config of the model and its endpoint to be created.
         explainability_config (SHAPConfig): Config of the specific explainability method.
             Currently, only SHAP is supported.
-        model_scores (str or int or ModelPredictedLabelConfig): Index or JSONPath location
-            in the model output for the predicted scores to be explained (default: None).
+        model_scores (str or int or ModelPredictedLabelConfig): Index or JMESPath expression
+            to locate the predicted scores in the model output (default: None).
             This is not required if the model output is a single score. Alternatively,
             an instance of ModelPredictedLabelConfig can be provided
             but this field CANNOT be any type of the `PipelineVariable`.