Skip to content

feature: Add support for JSON model inputs for Clarify Processor #3768

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 100 additions & 12 deletions src/sagemaker/clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@
in (
"text/csv",
"application/jsonlines",
"application/json",
"image/jpeg",
"image/png",
"application/x-npy",
Expand All @@ -296,6 +297,7 @@
SchemaOptional("probability"): Or(str, int),
SchemaOptional("label_headers"): [Or(str, int)],
SchemaOptional("content_template"): Or(str, {str: str}),
SchemaOptional("record_template"): str,
SchemaOptional("custom_attributes"): str,
},
}
Expand Down Expand Up @@ -573,6 +575,7 @@ def __init__(
accept_type: Optional[str] = None,
content_type: Optional[str] = None,
content_template: Optional[str] = None,
record_template: Optional[str] = None,
custom_attributes: Optional[str] = None,
accelerator_type: Optional[str] = None,
endpoint_name_prefix: Optional[str] = None,
Expand All @@ -599,14 +602,80 @@ def __init__(
``"application/jsonlines"`` for JSON Lines, and ``"application/json"`` for JSON.
Default is the same as ``content_type``.
content_type (str): The model input format to be used for getting inferences with the
shadow endpoint. Valid values are ``"text/csv"`` for CSV and
``"application/jsonlines"`` for JSON Lines. Default is the same as
``dataset_format``.
shadow endpoint. Valid values are ``"text/csv"`` for CSV,
``"application/jsonlines"`` for JSON Lines, and ``"application/json"`` for JSON.
Default is the same as ``dataset_format``.
content_template (str): A template string to be used to construct the model input from
dataset instances. It is only used when ``model_content_type`` is
``"application/jsonlines"``. The template should have one and only one placeholder,
``"features"``, which will be replaced by a features list to form the model
inference input.
dataset instances. It is only used, and required, when ``model_content_type`` is
``"application/jsonlines"`` or ``"application/json"``. When ``model_content_type``
is ``application/jsonlines``, the template should have one and only one
placeholder, ``$features``, which will be replaced by a features list for each
record to form the model inference input. When ``model_content_type`` is
``application/json``, the template can have either placeholder ``$record``, which
will be replaced by a single record templated by ``record_template`` and only a
single record at a time will be sent to the model, or placeholder ``$records``,
which will be replaced by a list of records, each templated by ``record_template``.
record_template (str): A template string to be used to construct each record of the
model input from dataset instances. It is only used, and required, when
``model_content_type`` is ``"application/json"``.
The template string may contain one of the following:

* Placeholder ``$features`` that will be substituted by the array of feature values
and/or an optional placeholder ``$feature_names`` that will be substituted by the
array of feature names.
* Exactly one placeholder ``$features_kvp`` that will be substituted by the
key-value pairs of feature name and feature value.
* Or for each feature, if "A" is the feature name in the ``headers`` configuration,
then placeholder syntax ``"${A}"`` (the double-quotes are part of the
placeholder) will be substituted by the feature value.

``record_template`` will be used in conjunction with ``content_template`` to
construct the model input.

**Examples:**

Given:

* ``headers``: ``["A", "B"]``
* ``features``: ``[[0, 1], [3, 4]]``

Example model input 1::

{
"instances": [[0, 1], [3, 4]],
"feature_names": ["A", "B"]
}

content_template and record_template to construct above:

* ``content_template``: ``"{\"instances\": $records}"``
* ``record_template``: ``"$features"``

Example model input 2::

[
{ "A": 0, "B": 1 },
{ "A": 3, "B": 4 },
]

content_template and record_template to construct above:

* ``content_template``: ``"$records"``
* ``record_template``: ``"$features_kvp"``

Or, alternatively:

* ``content_template``: ``"$records"``
* ``record_template``: ``"{\"A\": \"${A}\", \"B\": \"${B}\"}"``

Example model input 3 (single record only)::

{ "A": 0, "B": 1 }

content_template and record_template to construct above:

* ``content_template``: ``"$record"``
* ``record_template``: ``"$features_kvp"``
custom_attributes (str): Provides additional information about a request for an
inference submitted to a model hosted at an Amazon SageMaker endpoint. The
information is an opaque value that is forwarded verbatim. You could use this
Expand Down Expand Up @@ -677,6 +746,7 @@ def __init__(
if content_type not in [
"text/csv",
"application/jsonlines",
"application/json",
"image/jpeg",
"image/jpg",
"image/png",
Expand All @@ -686,14 +756,32 @@ def __init__(
f"Invalid content_type {content_type}."
f" Please choose text/csv or application/jsonlines."
)
if content_type == "application/jsonlines":
if content_template is None:
raise ValueError(
f"content_template field is required for content_type {content_type}"
)
if "$features" not in content_template:
raise ValueError(
f"Invalid content_template {content_template}."
f" Please include a placeholder $features."
)
if content_type == "application/json":
if content_template is None or record_template is None:
raise ValueError(
f"content_template and record_template are required for content_type "
f"{content_type}"
)
if "$record" not in content_template:
raise ValueError(
f"Invalid content_template {content_template}."
f" Please include either placeholder $records or $record."
)
self.predictor_config["content_type"] = content_type
if content_template is not None:
if "$features" not in content_template:
raise ValueError(
f"Invalid content_template {content_template}."
f" Please include a placeholder $features."
)
self.predictor_config["content_template"] = content_template
if record_template is not None:
self.predictor_config["record_template"] = record_template
_set(custom_attributes, "custom_attributes", self.predictor_config)
_set(accelerator_type, "accelerator_type", self.predictor_config)
_set(target_model, "target_model", self.predictor_config)
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/sagemaker/monitor/test_clarify_model_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@
MODEL_NAME = "xgboost-model"
ACCEPT_TYPE = "text/csv"
CONTENT_TYPE = "application/jsonlines"
JSONLINES_CONTENT_TEMPLATE = '{"instances":$features}'
EXPLAINABILITY_ANALYSIS_CONFIG = {
"headers": ANALYSIS_CONFIG_HEADERS_OF_FEATURES,
"methods": {
Expand All @@ -382,6 +383,7 @@
"initial_instance_count": INSTANCE_COUNT,
"accept_type": ACCEPT_TYPE,
"content_type": CONTENT_TYPE,
"content_template": JSONLINES_CONTENT_TEMPLATE,
},
}
EXPLAINABILITY_ANALYSIS_CONFIG_WITH_LABEL_HEADERS = copy.deepcopy(EXPLAINABILITY_ANALYSIS_CONFIG)
Expand Down Expand Up @@ -489,6 +491,7 @@ def model_config():
instance_count=INSTANCE_COUNT,
content_type=CONTENT_TYPE,
accept_type=ACCEPT_TYPE,
content_template=JSONLINES_CONTENT_TEMPLATE,
)


Expand Down
85 changes: 78 additions & 7 deletions tests/unit/test_clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,9 @@ def test_facet_of_bias_config(facet_name, facet_values_or_threshold, expected_re
("text/csv", "application/json"),
("application/jsonlines", "application/json"),
("application/jsonlines", "text/csv"),
("application/json", "application/json"),
("application/json", "application/jsonlines"),
("application/json", "text/csv"),
("image/jpeg", "text/csv"),
("image/jpg", "text/csv"),
("image/png", "text/csv"),
Expand All @@ -406,12 +409,22 @@ def test_valid_model_config(content_type, accept_type):
custom_attributes = "c000b4f9-df62-4c85-a0bf-7c525f9104a4"
target_model = "target_model_name"
accelerator_type = "ml.eia1.medium"
content_template = (
'{"instances":$features}'
if content_type == "application/jsonlines"
else "$records"
if content_type == "application/json"
else None
)
record_template = "$features_kvp" if content_type == "application/json" else None
model_config = ModelConfig(
model_name=model_name,
instance_type=instance_type,
instance_count=instance_count,
accept_type=accept_type,
content_type=content_type,
content_template=content_template,
record_template=record_template,
custom_attributes=custom_attributes,
accelerator_type=accelerator_type,
target_model=target_model,
Expand All @@ -426,21 +439,79 @@ def test_valid_model_config(content_type, accept_type):
"accelerator_type": accelerator_type,
"target_model": target_model,
}
if content_template is not None:
expected_config["content_template"] = content_template
if record_template is not None:
expected_config["record_template"] = record_template
assert expected_config == model_config.get_predictor_config()


def test_invalid_model_config():
with pytest.raises(ValueError) as error:
@pytest.mark.parametrize(
("error", "content_type", "accept_type", "content_template", "record_template"),
[
(
"Invalid accept_type invalid_accept_type. Please choose text/csv or application/jsonlines.",
"text/csv",
"invalid_accept_type",
None,
None,
),
(
"Invalid content_type invalid_content_type. Please choose text/csv or application/jsonlines.",
"invalid_content_type",
"text/csv",
None,
None,
),
(
"content_template field is required for content_type",
"application/jsonlines",
"text/csv",
None,
None,
),
(
"content_template and record_template are required for content_type",
"application/json",
"text/csv",
None,
None,
),
(
"content_template and record_template are required for content_type",
"application/json",
"text/csv",
"$records",
None,
),
(
r"Invalid content_template invalid_content_template. Please include a placeholder \$features.",
"application/jsonlines",
"text/csv",
"invalid_content_template",
None,
),
(
r"Invalid content_template invalid_content_template. Please include either placeholder "
r"\$records or \$record.",
"application/json",
"text/csv",
"invalid_content_template",
"$features",
),
],
)
def test_invalid_model_config(error, content_type, accept_type, content_template, record_template):
with pytest.raises(ValueError, match=error):
ModelConfig(
model_name="xgboost-model",
instance_type="ml.c5.xlarge",
instance_count=1,
accept_type="invalid_accept_type",
content_type=content_type,
accept_type=accept_type,
content_template=content_template,
record_template=record_template,
)
assert (
"Invalid accept_type invalid_accept_type. Please choose text/csv or application/jsonlines."
in str(error.value)
)


def test_invalid_model_config_with_bad_endpoint_name_prefix():
Expand Down