diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py index 00ec6dc965..706e82aa8a 100644 --- a/src/sagemaker/clarify.py +++ b/src/sagemaker/clarify.py @@ -282,6 +282,7 @@ in ( "text/csv", "application/jsonlines", + "application/json", "image/jpeg", "image/png", "application/x-npy", @@ -296,6 +297,7 @@ SchemaOptional("probability"): Or(str, int), SchemaOptional("label_headers"): [Or(str, int)], SchemaOptional("content_template"): Or(str, {str: str}), + SchemaOptional("record_template"): str, SchemaOptional("custom_attributes"): str, }, } @@ -573,6 +575,7 @@ def __init__( accept_type: Optional[str] = None, content_type: Optional[str] = None, content_template: Optional[str] = None, + record_template: Optional[str] = None, custom_attributes: Optional[str] = None, accelerator_type: Optional[str] = None, endpoint_name_prefix: Optional[str] = None, @@ -599,14 +602,80 @@ def __init__( ``"application/jsonlines"`` for JSON Lines, and ``"application/json"`` for JSON. Default is the same as ``content_type``. content_type (str): The model input format to be used for getting inferences with the - shadow endpoint. Valid values are ``"text/csv"`` for CSV and - ``"application/jsonlines"`` for JSON Lines. Default is the same as - ``dataset_format``. + shadow endpoint. Valid values are ``"text/csv"`` for CSV, + ``"application/jsonlines"`` for JSON Lines, and ``"application/json"`` for JSON. + Default is the same as ``dataset_format``. content_template (str): A template string to be used to construct the model input from - dataset instances. It is only used when ``model_content_type`` is - ``"application/jsonlines"``. The template should have one and only one placeholder, - ``"features"``, which will be replaced by a features list to form the model - inference input. + dataset instances. It is only used, and required, when ``model_content_type`` is + ``"application/jsonlines"`` or ``"application/json"``. When ``model_content_type`` + is ``application/jsonlines``, the template should have one and only one + placeholder, ``$features``, which will be replaced by a features list for each + record to form the model inference input. When ``model_content_type`` is + ``application/json``, the template can have either placeholder ``$record``, which + will be replaced by a single record templated by ``record_template`` and only a + single record at a time will be sent to the model, or placeholder ``$records``, + which will be replaced by a list of records, each templated by ``record_template``. + record_template (str): A template string to be used to construct each record of the + model input from dataset instances. It is only used, and required, when + ``model_content_type`` is ``"application/json"``. + The template string may contain one of the following: + + * Placeholder ``$features`` that will be substituted by the array of feature values + and/or an optional placeholder ``$feature_names`` that will be substituted by the + array of feature names. + * Exactly one placeholder ``$features_kvp`` that will be substituted by the + key-value pairs of feature name and feature value. + * Or for each feature, if "A" is the feature name in the ``headers`` configuration, + then placeholder syntax ``"${A}"`` (the double-quotes are part of the + placeholder) will be substituted by the feature value. + + ``record_template`` will be used in conjunction with ``content_template`` to + construct the model input. + + **Examples:** + + Given: + + * ``headers``: ``["A", "B"]`` + * ``features``: ``[[0, 1], [3, 4]]`` + + Example model input 1:: + + { + "instances": [[0, 1], [3, 4]], + "feature_names": ["A", "B"] + } + + content_template and record_template to construct above: + + * ``content_template``: ``"{\"instances\": $records}"`` + * ``record_template``: ``"$features"`` + + Example model input 2:: + + [ + { "A": 0, "B": 1 }, + { "A": 3, "B": 4 }, + ] + + content_template and record_template to construct above: + + * ``content_template``: ``"$records"`` + * ``record_template``: ``"$features_kvp"`` + + Or, alternatively: + + * ``content_template``: ``"$records"`` + * ``record_template``: ``"{\"A\": \"${A}\", \"B\": \"${B}\"}"`` + + Example model input 3 (single record only):: + + { "A": 0, "B": 1 } + + content_template and record_template to construct above: + + * ``content_template``: ``"$record"`` + * ``record_template``: ``"$features_kvp"`` custom_attributes (str): Provides additional information about a request for an inference submitted to a model hosted at an Amazon SageMaker endpoint. The information is an opaque value that is forwarded verbatim. You could use this @@ -677,6 +746,7 @@ def __init__( if content_type not in [ "text/csv", "application/jsonlines", + "application/json", "image/jpeg", "image/jpg", "image/png", @@ -686,14 +756,32 @@ def __init__( f"Invalid content_type {content_type}." f" Please choose text/csv or application/jsonlines." ) + if content_type == "application/jsonlines": + if content_template is None: + raise ValueError( + f"content_template field is required for content_type {content_type}" + ) + if "$features" not in content_template: + raise ValueError( + f"Invalid content_template {content_template}." + f" Please include a placeholder $features." + ) + if content_type == "application/json": + if content_template is None or record_template is None: + raise ValueError( + f"content_template and record_template are required for content_type " + f"{content_type}" + ) + if "$record" not in content_template: + raise ValueError( + f"Invalid content_template {content_template}." + f" Please include either placeholder $records or $record." + ) self.predictor_config["content_type"] = content_type if content_template is not None: - if "$features" not in content_template: - raise ValueError( - f"Invalid content_template {content_template}." - f" Please include a placeholder $features." - ) self.predictor_config["content_template"] = content_template + if record_template is not None: + self.predictor_config["record_template"] = record_template _set(custom_attributes, "custom_attributes", self.predictor_config) _set(accelerator_type, "accelerator_type", self.predictor_config) _set(target_model, "target_model", self.predictor_config) diff --git a/tests/unit/sagemaker/monitor/test_clarify_model_monitor.py b/tests/unit/sagemaker/monitor/test_clarify_model_monitor.py index 33800c9a1d..59d3c9b727 100644 --- a/tests/unit/sagemaker/monitor/test_clarify_model_monitor.py +++ b/tests/unit/sagemaker/monitor/test_clarify_model_monitor.py @@ -365,6 +365,7 @@ MODEL_NAME = "xgboost-model" ACCEPT_TYPE = "text/csv" CONTENT_TYPE = "application/jsonlines" +JSONLINES_CONTENT_TEMPLATE = '{"instances":$features}' EXPLAINABILITY_ANALYSIS_CONFIG = { "headers": ANALYSIS_CONFIG_HEADERS_OF_FEATURES, "methods": { @@ -382,6 +383,7 @@ "initial_instance_count": INSTANCE_COUNT, "accept_type": ACCEPT_TYPE, "content_type": CONTENT_TYPE, + "content_template": JSONLINES_CONTENT_TEMPLATE, }, } EXPLAINABILITY_ANALYSIS_CONFIG_WITH_LABEL_HEADERS = copy.deepcopy(EXPLAINABILITY_ANALYSIS_CONFIG) @@ -489,6 +491,7 @@ def model_config(): instance_count=INSTANCE_COUNT, content_type=CONTENT_TYPE, accept_type=ACCEPT_TYPE, + content_template=JSONLINES_CONTENT_TEMPLATE, ) diff --git a/tests/unit/test_clarify.py b/tests/unit/test_clarify.py index f0e8ffad57..0c80cfe004 100644 --- a/tests/unit/test_clarify.py +++ b/tests/unit/test_clarify.py @@ -393,6 +393,9 @@ def test_facet_of_bias_config(facet_name, facet_values_or_threshold, expected_re ("text/csv", "application/json"), ("application/jsonlines", "application/json"), ("application/jsonlines", "text/csv"), + ("application/json", "application/json"), + ("application/json", "application/jsonlines"), + ("application/json", "text/csv"), ("image/jpeg", "text/csv"), ("image/jpg", "text/csv"), ("image/png", "text/csv"), @@ -406,12 +409,22 @@ def test_valid_model_config(content_type, accept_type): custom_attributes = "c000b4f9-df62-4c85-a0bf-7c525f9104a4" target_model = "target_model_name" accelerator_type = "ml.eia1.medium" + content_template = ( + '{"instances":$features}' + if content_type == "application/jsonlines" + else "$records" + if content_type == "application/json" + else None + ) + record_template = "$features_kvp" if content_type == "application/json" else None model_config = ModelConfig( model_name=model_name, instance_type=instance_type, instance_count=instance_count, accept_type=accept_type, content_type=content_type, + content_template=content_template, + record_template=record_template, custom_attributes=custom_attributes, accelerator_type=accelerator_type, target_model=target_model, @@ -426,21 +439,79 @@ def test_valid_model_config(content_type, accept_type): "accelerator_type": accelerator_type, "target_model": target_model, } + if content_template is not None: + expected_config["content_template"] = content_template + if record_template is not None: + expected_config["record_template"] = record_template assert expected_config == model_config.get_predictor_config() -def test_invalid_model_config(): - with pytest.raises(ValueError) as error: +@pytest.mark.parametrize( + ("error", "content_type", "accept_type", "content_template", "record_template"), + [ + ( + "Invalid accept_type invalid_accept_type. Please choose text/csv or application/jsonlines.", + "text/csv", + "invalid_accept_type", + None, + None, + ), + ( + "Invalid content_type invalid_content_type. Please choose text/csv or application/jsonlines.", + "invalid_content_type", + "text/csv", + None, + None, + ), + ( + "content_template field is required for content_type", + "application/jsonlines", + "text/csv", + None, + None, + ), + ( + "content_template and record_template are required for content_type", + "application/json", + "text/csv", + None, + None, + ), + ( + "content_template and record_template are required for content_type", + "application/json", + "text/csv", + "$records", + None, + ), + ( + r"Invalid content_template invalid_content_template. Please include a placeholder \$features.", + "application/jsonlines", + "text/csv", + "invalid_content_template", + None, + ), + ( + r"Invalid content_template invalid_content_template. Please include either placeholder " + r"\$records or \$record.", + "application/json", + "text/csv", + "invalid_content_template", + "$features", + ), + ], +) +def test_invalid_model_config(error, content_type, accept_type, content_template, record_template): + with pytest.raises(ValueError, match=error): ModelConfig( model_name="xgboost-model", instance_type="ml.c5.xlarge", instance_count=1, - accept_type="invalid_accept_type", + content_type=content_type, + accept_type=accept_type, + content_template=content_template, + record_template=record_template, ) - assert ( - "Invalid accept_type invalid_accept_type. Please choose text/csv or application/jsonlines." - in str(error.value) - ) def test_invalid_model_config_with_bad_endpoint_name_prefix():