Skip to content

documentation: Minor updates to Clarify API documentation #2550

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 10, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 31 additions & 27 deletions src/sagemaker/clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,17 @@ def __init__(
headers (list[str]): A list of column names in the input dataset.
features (str): JSONPath for locating the feature columns for bias metrics if the
dataset format is JSONLines.
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV
and "application/jsonlines" for JSONLines.
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
"application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
s3_data_distribution_type (str): Valid options are "FullyReplicated" or
"ShardedByS3Key".
s3_compression_type (str): Valid options are "None" or "Gzip".
"""
if dataset_type not in ["text/csv", "application/jsonlines", "application/x-parquet"]:
raise ValueError(
f"Invalid dataset_type '{dataset_type}'."
f" Please check the API documentation for the supported dataset types."
)
self.s3_data_input_path = s3_data_input_path
self.s3_output_path = s3_output_path
self.s3_data_distribution_type = s3_data_distribution_type
Expand Down Expand Up @@ -508,7 +513,7 @@ def run_pre_training_bias(
kms_key=None,
experiment_config=None,
):
"""Runs a ProcessingJob to compute the requested bias 'methods' of the input data.
"""Runs a ProcessingJob to compute the pre-training bias methods of the input data.

Computes the requested methods that compare 'methods' (e.g. fraction of examples) for the
sensitive group vs the other examples.
Expand All @@ -517,14 +522,14 @@ def run_pre_training_bias(
data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
data_bias_config (:class:`~sagemaker.clarify.BiasConfig`): Config of sensitive groups.
methods (str or list[str]): Selector of a subset of potential metrics:
["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
"`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
"`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
"`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
"`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
"`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
"`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
"`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
"`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
Defaults to computing all.
wait (bool): Whether the call should wait until the job completes (default: True).
logs (bool): Whether to show the logs produced by the job.
Expand All @@ -538,7 +543,7 @@ def run_pre_training_bias(
experiment_config (dict[str, str]): Experiment management configuration.
Dictionary contains three optional keys:
'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
"""
""" # noqa E501
analysis_config = data_config.get_config()
analysis_config.update(data_bias_config.get_config())
analysis_config["methods"] = {"pre_training_bias": {"methods": methods}}
Expand All @@ -562,7 +567,7 @@ def run_post_training_bias(
kms_key=None,
experiment_config=None,
):
"""Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
"""Runs a ProcessingJob to compute the post-training bias methods of the model predictions.

Spins up a model endpoint, runs inference over the input example in the
's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
Expand Down Expand Up @@ -633,12 +638,11 @@ def run_bias(
kms_key=None,
experiment_config=None,
):
"""Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
"""Runs a ProcessingJob to compute the requested bias methods.

Spins up a model endpoint, runs inference over the input example in the
's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
compare 'methods' (e.g. accuracy, precision, recall) for the sensitive group vs the other
examples.
It computes the metrics of both the pre-training methods and the post-training methods.
To calculate post-training methods, it needs to spin up a model endpoint, runs inference
over the input example in the 's3_data_input_path' to obtain predicted labels.

Args:
data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
Expand All @@ -648,14 +652,14 @@ def run_bias(
model_predicted_label_config (:class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
Config of how to extract the predicted label from the model output.
pre_training_methods (str or list[str]): Selector of a subset of potential metrics:
["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
"`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
"`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
"`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
"`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
"`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
"`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
"`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
"`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
Defaults to computing all.
post_training_methods (str or list[str]): Selector of a subset of potential metrics:
["`DPPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dppl.html>`_"
Expand All @@ -682,7 +686,7 @@ def run_bias(
experiment_config (dict[str, str]): Experiment management configuration.
Dictionary contains three optional keys:
'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
"""
""" # noqa E501
analysis_config = data_config.get_config()
analysis_config.update(bias_config.get_config())
analysis_config["predictor"] = model_config.get_predictor_config()
Expand Down
9 changes: 9 additions & 0 deletions tests/unit/test_clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ def test_data_config():
assert "FullyReplicated" == data_config.s3_data_distribution_type


def test_invalid_data_config():
with pytest.raises(ValueError, match=r"^Invalid dataset_type"):
DataConfig(
s3_data_input_path="s3://bucket/inputpath",
s3_output_path="s3://bucket/outputpath",
dataset_type="whatnot_type",
)


def test_data_bias_config():
label_values = [1]
facet_name = "F1"
Expand Down