diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py index 4765630ce8..58d67124c1 100644 --- a/src/sagemaker/clarify.py +++ b/src/sagemaker/clarify.py @@ -331,11 +331,11 @@ def __init__( s3_analysis_config_output_path (str): S3 prefix to store the analysis config output. If this field is None, then the ``s3_output_path`` will be used to store the ``analysis_config`` output. - label (str): Target attribute of the model required by bias metrics. - Specified as column name or index for CSV dataset or as JSONPath for JSONLines. + label (str): Target attribute of the model required by bias metrics. Specified as + column name or index for CSV dataset or as JMESPath expression for JSONLines. *Required parameter* except for when the input dataset does not contain the label. - features (List[str]): JSONPath for locating the feature columns for bias metrics if the - dataset format is JSONLines. + features (List[str]): JMESPath expression to locate the feature columns for + bias metrics if the dataset format is JSONLines. dataset_type (str): Format of the dataset. Valid values are ``"text/csv"`` for CSV, ``"application/jsonlines"`` for JSONLines, and ``"application/x-parquet"`` for Parquet. @@ -717,11 +717,11 @@ def __init__( ``label_headers=['cat','dog','fish']`` and infer the predicted label to be ``'fish'``. Args: - label (str or int): Index or JSONPath location in the model output for the prediction. - In case, this is a predicted label of the same type as the label in the dataset, - no further arguments need to be specified. - probability (str or int): Index or JSONPath location in the model output - for the predicted score(s). + label (str or int): Index or JMESPath expression to locate the prediction + in the model output. In case, this is a predicted label of the same type + as the label in the dataset, no further arguments need to be specified. + probability (str or int): Index or JMESPath expression to locate the predicted score(s) + in the model output. probability_threshold (float): An optional value for binary prediction tasks in which the model returns a probability, to indicate the threshold to convert the prediction to a boolean value. Default is ``0.5``. @@ -1646,9 +1646,9 @@ def run_explainability( You can request multiple methods at once by passing in a list of `~sagemaker.clarify.ExplainabilityConfig`. model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`): - Index or JSONPath to locate the predicted scores in the model output. This is not - required if the model output is a single score. Alternatively, it can be an instance - of :class:`~sagemaker.clarify.SageMakerClarifyProcessor` + Index or JMESPath expression to locate the predicted scores in the model output. + This is not required if the model output is a single score. Alternatively, + it can be an instance of :class:`~sagemaker.clarify.SageMakerClarifyProcessor` to provide more parameters like ``label_headers``. wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. @@ -1775,9 +1775,9 @@ def run_bias_and_explainability( str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig` ): - Index or JSONPath to locate the predicted scores in the model output. This is not - required if the model output is a single score. Alternatively, it can be an instance - of :class:`~sagemaker.clarify.SageMakerClarifyProcessor` + Index or JMESPath expression to locate the predicted scores in the model output. + This is not required if the model output is a single score. Alternatively, + it can be an instance of :class:`~sagemaker.clarify.SageMakerClarifyProcessor` to provide more parameters like ``label_headers``. wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. diff --git a/src/sagemaker/model_monitor/clarify_model_monitoring.py b/src/sagemaker/model_monitor/clarify_model_monitoring.py index 1a788a0d53..030de7c6db 100644 --- a/src/sagemaker/model_monitor/clarify_model_monitoring.py +++ b/src/sagemaker/model_monitor/clarify_model_monitoring.py @@ -842,8 +842,8 @@ def __init__(self, bias_config, headers=None, label=None): bias_config (sagemaker.clarify.BiasConfig): Config object related to bias configurations. headers (list[str]): A list of column names in the input dataset. - label (str): Target attribute for the model required by bias metrics. - Specified as column name or index for CSV dataset, or as JSONPath for JSONLines. + label (str): Target attribute for the model required by bias metrics. Specified as + column name or index for CSV dataset, or as JMESPath expression for JSONLines. """ self.analysis_config = bias_config.get_config() if headers is not None: @@ -889,9 +889,10 @@ def suggest_baseline( model_config (:class:`~sagemaker.clarify.ModelConfig`): Config of the model and its endpoint to be created. model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`): - Index or JSONPath to locate the predicted scores in the model output. This is not - required if the model output is a single score. Alternatively, it can be an instance - of ModelPredictedLabelConfig to provide more parameters like label_headers. + Index or JMESPath expression to locate the predicted scores in the model output. + This is not required if the model output is a single score. Alternatively, + it can be an instance of ModelPredictedLabelConfig to provide more parameters + like label_headers. wait (bool): Whether the call should wait until the job completes (default: False). logs (bool): Whether to show the logs produced by the job. Only meaningful when wait is True (default: False). @@ -1302,12 +1303,12 @@ def __init__( Args: analysis_config (BiasAnalysisConfig or ExplainabilityAnalysisConfig): analysis config from configurations of the baselining job. - features_attribute (str): JSONpath to locate features in predictor request payload. - Only required when predictor content type is JSONlines. - inference_attribute (str): Index, header or JSONpath to locate predicted label in - predictor response payload. - probability_attribute (str): Index or JSONpath location in the model output for - probabilities or scores to be used for explainability. + features_attribute (str): JMESPath expression to locate features in predictor request + payload. Only required when predictor content type is JSONlines. + inference_attribute (str): Index, header or JMESPath expression to locate predicted + label in predictor response payload. + probability_attribute (str): Index or JMESPath expression to locate probabilities or + scores in the model output for computing feature attribution. probability_threshold_attribute (float): Value to indicate the threshold to select the binary label in the case of binary classification. Default is 0.5. """ diff --git a/src/sagemaker/model_monitor/model_monitoring.py b/src/sagemaker/model_monitor/model_monitoring.py index 817d951255..2f8266a43a 100644 --- a/src/sagemaker/model_monitor/model_monitoring.py +++ b/src/sagemaker/model_monitor/model_monitoring.py @@ -1061,12 +1061,13 @@ def _generate_env_map( dataset_format (dict): The format of the baseline_dataset. dataset_source_container_path (str): The path to the dataset source. inference_attribute (str): Index or JSONpath to locate predicted label(s). - Only used for ModelQualityMonitor, ModelBiasMonitor, and ModelExplainabilityMonitor + Only used for ModelQualityMonitor. probability_attribute (str or int): Index or JSONpath to locate probabilities. - Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor - ground_truth_attribute (str): Index or JSONpath to locate actual label(s). + Only used for ModelQualityMonitor. + ground_truth_attribute (str): Index to locate actual label(s). + Only used for ModelQualityMonitor. probability_threshold_attribute (float): threshold to convert probabilities to binaries - Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor + Only used for ModelQualityMonitor. Returns: dict: Dictionary of environment keys and values. @@ -2600,10 +2601,13 @@ def suggest_baseline( problem_type (str): The type of problem of this model quality monitoring. Valid values are "Regression", "BinaryClassification", "MulticlassClassification". inference_attribute (str): Index or JSONpath to locate predicted label(s). + Only used for ModelQualityMonitor. probability_attribute (str or int): Index or JSONpath to locate probabilities. - ground_truth_attribute (str): Index or JSONpath to locate actual label(s). + Only used for ModelQualityMonitor. + ground_truth_attribute (str): Index to locate actual label(s). + Only used for ModelQualityMonitor. probability_threshold_attribute (float): threshold to convert probabilities to binaries - Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor + Only used for ModelQualityMonitor. post_analytics_processor_script (str): The path to the record post-analytics processor script. This can be a local path or an S3 uri. output_s3_uri (str): Desired S3 destination Destination of the constraint_violations diff --git a/src/sagemaker/workflow/clarify_check_step.py b/src/sagemaker/workflow/clarify_check_step.py index 9d350b01f3..22b6fc2051 100644 --- a/src/sagemaker/workflow/clarify_check_step.py +++ b/src/sagemaker/workflow/clarify_check_step.py @@ -132,8 +132,8 @@ class ModelExplainabilityCheckConfig(ClarifyCheckConfig): model_config (ModelConfig): Config of the model and its endpoint to be created. explainability_config (SHAPConfig): Config of the specific explainability method. Currently, only SHAP is supported. - model_scores (str or int or ModelPredictedLabelConfig): Index or JSONPath location - in the model output for the predicted scores to be explained (default: None). + model_scores (str or int or ModelPredictedLabelConfig): Index or JMESPath expression + to locate the predicted scores in the model output (default: None). This is not required if the model output is a single score. Alternatively, an instance of ModelPredictedLabelConfig can be provided but this field CANNOT be any type of the `PipelineVariable`.