Merge branch 'dev' into support-for-pytorch-1-10-0

ahsan-z-khan · web-flow · commit ab0a3fee7e98 · 2022-01-07T12:43:00.000-08:00
diff --git a/.gitignore b/.gitignore
@@ -27,4 +27,5 @@ venv/
 *.swp
 .docker/
 env/
-.vscode/
+.vscode/
+.python-version
diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py
@@ -290,11 +290,15 @@ def __init__(
             probability_threshold (float): An optional value for binary prediction tasks in which
                 the model returns a probability, to indicate the threshold to convert the
                 prediction to a boolean value. Default is 0.5.
-            label_headers (list): List of label values - one for each score of the ``probability``.
+            label_headers (list[str]): List of headers, each for a predicted score in model output.
+                For bias analysis, it is used to extract the label value with the highest score as
+                predicted label. For explainability job, It is used to beautify the analysis report
+                by replacing placeholders like "label0".
         """
         self.label = label
         self.probability = probability
         self.probability_threshold = probability_threshold
+        self.label_headers = label_headers
         if probability_threshold is not None:
             try:
                 float(probability_threshold)
@@ -1060,10 +1064,10 @@ def run_explainability(
             explainability_config (:class:`~sagemaker.clarify.ExplainabilityConfig` or list):
                 Config of the specific explainability method or a list of ExplainabilityConfig
                 objects. Currently, SHAP and PDP are the two methods supported.
-            model_scores(str|int|ModelPredictedLabelConfig):  Index or JSONPath location in the
-                model output for the predicted scores to be explained. This is not required if the
-                model output is a single score. Alternatively, an instance of
-                ModelPredictedLabelConfig can be provided.
+            model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
+                Index or JSONPath to locate the predicted scores in the model output. This is not
+                required if the model output is a single score. Alternatively, it can be an instance
+                of ModelPredictedLabelConfig to provide more parameters like label_headers.
             wait (bool): Whether the call should wait until the job completes (default: True).
             logs (bool): Whether to show the logs produced by the job.
                 Only meaningful when ``wait`` is True (default: True).
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -2343,6 +2343,7 @@ def _stage_user_code_in_s3(self):
             dependencies=self.dependencies,
             kms_key=kms_key,
             s3_resource=self.sagemaker_session.s3_resource,
+            settings=self.sagemaker_session.settings,
         )
 
     def _model_source_dir(self):
diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py
@@ -19,8 +19,10 @@
 import shutil
 import tempfile
 from collections import namedtuple
+from typing import Optional
 
 import sagemaker.image_uris
+from sagemaker.session_settings import SessionSettings
 import sagemaker.utils
 
 from sagemaker.deprecations import renamed_warning
@@ -216,6 +218,7 @@ def tar_and_upload_dir(
     dependencies=None,
     kms_key=None,
     s3_resource=None,
+    settings: Optional[SessionSettings] = None,
 ):
     """Package source files and upload a compress tar file to S3.
 
@@ -243,6 +246,9 @@ def tar_and_upload_dir(
         s3_resource (boto3.resource("s3")): Optional. Pre-instantiated Boto3 Resource
             for S3 connections, can be used to customize the configuration,
             e.g. set the endpoint URL (default: None).
+        settings (sagemaker.session_settings.SessionSettings): Optional. The settings
+            of the SageMaker ``Session``, can be used to override the default encryption
+            behavior (default: None).
     Returns:
         sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and
             script name.
@@ -254,6 +260,7 @@ def tar_and_upload_dir(
     dependencies = dependencies or []
     key = "%s/sourcedir.tar.gz" % s3_key_prefix
     tmp = tempfile.mkdtemp()
+    encrypt_artifact = True if settings is None else settings.encrypt_repacked_artifacts
 
     try:
         source_files = _list_files_to_compress(script, directory) + dependencies
@@ -263,6 +270,10 @@ def tar_and_upload_dir(
 
         if kms_key:
             extra_args = {"ServerSideEncryption": "aws:kms", "SSEKMSKeyId": kms_key}
+        elif encrypt_artifact:
+            # encrypt the tarball at rest in S3 with the default AWS managed KMS key for S3
+            # see https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#API_PutObject_RequestSyntax
+            extra_args = {"ServerSideEncryption": "aws:kms"}
         else:
             extra_args = None
 
diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py
@@ -1131,6 +1131,7 @@ def _upload_code(self, key_prefix, repack=False):
                 script=self.entry_point,
                 directory=self.source_dir,
                 dependencies=self.dependencies,
+                settings=self.sagemaker_session.settings,
             )
 
         if repack and self.model_data is not None and self.entry_point is not None:
diff --git a/src/sagemaker/model_monitor/clarify_model_monitoring.py b/src/sagemaker/model_monitor/clarify_model_monitoring.py
@@ -26,7 +26,7 @@
 from sagemaker import image_uris, s3
 from sagemaker.session import Session
 from sagemaker.utils import name_from_base
-from sagemaker.clarify import SageMakerClarifyProcessor
+from sagemaker.clarify import SageMakerClarifyProcessor, ModelPredictedLabelConfig
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -833,9 +833,10 @@ def suggest_baseline(
                 specific explainability method. Currently, only SHAP is supported.
             model_config (:class:`~sagemaker.clarify.ModelConfig`): Config of the model and its
                 endpoint to be created.
-            model_scores (int or str): Index or JSONPath location in the model output for the
-                predicted scores to be explained. This is not required if the model output is
-                a single score.
+            model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
+                Index or JSONPath to locate the predicted scores in the model output. This is not
+                required if the model output is a single score. Alternatively, it can be an instance
+                of ModelPredictedLabelConfig to provide more parameters like label_headers.
             wait (bool): Whether the call should wait until the job completes (default: False).
             logs (bool): Whether to show the logs produced by the job.
                 Only meaningful when wait is True (default: False).
@@ -865,14 +866,24 @@ def suggest_baseline(
         headers = copy.deepcopy(data_config.headers)
         if headers and data_config.label in headers:
             headers.remove(data_config.label)
+        if model_scores is None:
+            inference_attribute = None
+            label_headers = None
+        elif isinstance(model_scores, ModelPredictedLabelConfig):
+            inference_attribute = str(model_scores.label)
+            label_headers = model_scores.label_headers
+        else:
+            inference_attribute = str(model_scores)
+            label_headers = None
         self.latest_baselining_job_config = ClarifyBaseliningConfig(
             analysis_config=ExplainabilityAnalysisConfig(
                 explainability_config=explainability_config,
                 model_config=model_config,
                 headers=headers,
+                label_headers=label_headers,
             ),
             features_attribute=data_config.features,
-            inference_attribute=model_scores if model_scores is None else str(model_scores),
+            inference_attribute=inference_attribute,
         )
         self.latest_baselining_job_name = baselining_job_name
         self.latest_baselining_job = ClarifyBaseliningJob(
@@ -1166,7 +1177,7 @@ def attach(cls, monitor_schedule_name, sagemaker_session=None):
 class ExplainabilityAnalysisConfig:
     """Analysis configuration for ModelExplainabilityMonitor."""
 
-    def __init__(self, explainability_config, model_config, headers=None):
+    def __init__(self, explainability_config, model_config, headers=None, label_headers=None):
         """Creates an analysis config dictionary.
 
         Args:
@@ -1175,13 +1186,19 @@ def __init__(self, explainability_config, model_config, headers=None):
             model_config (sagemaker.clarify.ModelConfig): Config object related to bias
                 configurations.
             headers (list[str]): A list of feature names (without label) of model/endpint input.
+            label_headers (list[str]): List of headers, each for a predicted score in model output.
+                It is used to beautify the analysis report by replacing placeholders like "label0".
+
         """
+        predictor_config = model_config.get_predictor_config()
         self.analysis_config = {
             "methods": explainability_config.get_explainability_config(),
-            "predictor": model_config.get_predictor_config(),
+            "predictor": predictor_config,
         }
         if headers is not None:
             self.analysis_config["headers"] = headers
+        if label_headers is not None:
+            predictor_config["label_headers"] = label_headers
 
     def _to_dict(self):
         """Generates a request dictionary using the parameters provided to the class."""
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -42,6 +42,7 @@
     sts_regional_endpoint,
 )
 from sagemaker import exceptions
+from sagemaker.session_settings import SessionSettings
 
 LOGGER = logging.getLogger("sagemaker")
 
@@ -85,6 +86,7 @@ def __init__(
         sagemaker_runtime_client=None,
         sagemaker_featurestore_runtime_client=None,
         default_bucket=None,
+        settings=SessionSettings(),
     ):
         """Initialize a SageMaker ``Session``.
 
@@ -110,13 +112,16 @@ def __init__(
                 If not provided, a default bucket will be created based on the following format:
                 "sagemaker-{region}-{aws-account-id}".
                 Example: "sagemaker-my-custom-bucket".
+            settings (sagemaker.session_settings.SessionSettings): Optional. Set of optional
+                parameters to apply to the session.
         """
         self._default_bucket = None
         self._default_bucket_name_override = default_bucket
         self.s3_resource = None
         self.s3_client = None
         self.config = None
         self.lambda_client = None
+        self.settings = settings
 
         self._initialize(
             boto_session=boto_session,
diff --git a/src/sagemaker/session_settings.py b/src/sagemaker/session_settings.py
@@ -0,0 +1,34 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Defines classes to parametrize a SageMaker ``Session``."""
+
+from __future__ import absolute_import
+
+
+class SessionSettings(object):
+    """Optional container class for settings to apply to a SageMaker session."""
+
+    def __init__(self, encrypt_repacked_artifacts=True) -> None:
+        """Initialize the ``SessionSettings`` of a SageMaker ``Session``.
+
+        Args:
+            encrypt_repacked_artifacts (bool): Flag to indicate whether to encrypt the artifacts
+                at rest in S3 using the default AWS managed KMS key for S3 when a custom KMS key
+                is not provided (Default: True).
+        """
+        self._encrypt_repacked_artifacts = encrypt_repacked_artifacts
+
+    @property
+    def encrypt_repacked_artifacts(self) -> bool:
+        """Return True if repacked artifacts at rest in S3 should be encrypted by default."""
+        return self._encrypt_repacked_artifacts
diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py
@@ -29,6 +29,7 @@
 from six.moves.urllib import parse
 
 from sagemaker import deprecations
+from sagemaker.session_settings import SessionSettings
 
 
 ECR_URI_PATTERN = r"^(\d+)(\.)dkr(\.)ecr(\.)(.+)(\.)(.*)(/)(.*:.*)$"
@@ -429,8 +430,15 @@ def _save_model(repacked_model_uri, tmp_model_path, sagemaker_session, kms_key):
         bucket, key = url.netloc, url.path.lstrip("/")
         new_key = key.replace(os.path.basename(key), os.path.basename(repacked_model_uri))
 
+        settings = (
+            sagemaker_session.settings if sagemaker_session is not None else SessionSettings()
+        )
+        encrypt_artifact = settings.encrypt_repacked_artifacts
+
         if kms_key:
             extra_args = {"ServerSideEncryption": "aws:kms", "SSEKMSKeyId": kms_key}
+        elif encrypt_artifact:
+            extra_args = {"ServerSideEncryption": "aws:kms"}
         else:
             extra_args = None
         sagemaker_session.boto_session.resource(
diff --git a/tests/integ/test_clarify_model_monitor.py b/tests/integ/test_clarify_model_monitor.py
@@ -53,6 +53,7 @@
 HEADER_OF_LABEL = "Label"
 HEADERS_OF_FEATURES = ["F1", "F2", "F3", "F4", "F5", "F6", "F7"]
 ALL_HEADERS = [*HEADERS_OF_FEATURES, HEADER_OF_LABEL]
+HEADER_OF_PREDICTION = "Decision"
 DATASET_TYPE = "text/csv"
 CONTENT_TYPE = DATASET_TYPE
 ACCEPT_TYPE = DATASET_TYPE
@@ -325,7 +326,7 @@ def scheduled_explainability_monitor(
 ):
     monitor_schedule_name = utils.unique_name_from_base("explainability-monitor")
     analysis_config = ExplainabilityAnalysisConfig(
-        shap_config, model_config, headers=HEADERS_OF_FEATURES
+        shap_config, model_config, headers=HEADERS_OF_FEATURES, label_headers=[HEADER_OF_PREDICTION]
     )
     s3_uri_monitoring_output = os.path.join(
         "s3://",
diff --git a/tests/unit/sagemaker/monitor/test_clarify_model_monitor.py b/tests/unit/sagemaker/monitor/test_clarify_model_monitor.py
@@ -279,6 +279,7 @@
 # for bias
 ANALYSIS_CONFIG_LABEL = "Label"
 ANALYSIS_CONFIG_HEADERS_OF_FEATURES = ["F1", "F2", "F3"]
+ANALYSIS_CONFIG_LABEL_HEADERS = ["Decision"]
 ANALYSIS_CONFIG_ALL_HEADERS = [*ANALYSIS_CONFIG_HEADERS_OF_FEATURES, ANALYSIS_CONFIG_LABEL]
 ANALYSIS_CONFIG_LABEL_VALUES = [1]
 ANALYSIS_CONFIG_FACET_NAME = "F1"
@@ -330,6 +331,11 @@
         "content_type": CONTENT_TYPE,
     },
 }
+EXPLAINABILITY_ANALYSIS_CONFIG_WITH_LABEL_HEADERS = copy.deepcopy(EXPLAINABILITY_ANALYSIS_CONFIG)
+# noinspection PyTypeChecker
+EXPLAINABILITY_ANALYSIS_CONFIG_WITH_LABEL_HEADERS["predictor"][
+    "label_headers"
+] = ANALYSIS_CONFIG_LABEL_HEADERS
 
 
 @pytest.fixture()
@@ -1048,25 +1054,44 @@ def test_explainability_analysis_config(shap_config, model_config):
         explainability_config=shap_config,
         model_config=model_config,
         headers=ANALYSIS_CONFIG_HEADERS_OF_FEATURES,
+        label_headers=ANALYSIS_CONFIG_LABEL_HEADERS,
     )
-    assert EXPLAINABILITY_ANALYSIS_CONFIG == config._to_dict()
+    assert EXPLAINABILITY_ANALYSIS_CONFIG_WITH_LABEL_HEADERS == config._to_dict()
 
 
+@pytest.mark.parametrize(
+    "model_scores,explainability_analysis_config",
+    [
+        (INFERENCE_ATTRIBUTE, EXPLAINABILITY_ANALYSIS_CONFIG),
+        (
+            ModelPredictedLabelConfig(
+                label=INFERENCE_ATTRIBUTE, label_headers=ANALYSIS_CONFIG_LABEL_HEADERS
+            ),
+            EXPLAINABILITY_ANALYSIS_CONFIG_WITH_LABEL_HEADERS,
+        ),
+    ],
+)
 def test_model_explainability_monitor_suggest_baseline(
-    model_explainability_monitor, sagemaker_session, data_config, shap_config, model_config
+    model_explainability_monitor,
+    sagemaker_session,
+    data_config,
+    shap_config,
+    model_config,
+    model_scores,
+    explainability_analysis_config,
 ):
     clarify_model_monitor = model_explainability_monitor
     # suggest baseline
     clarify_model_monitor.suggest_baseline(
         data_config=data_config,
         explainability_config=shap_config,
         model_config=model_config,
-        model_scores=INFERENCE_ATTRIBUTE,
+        model_scores=model_scores,
         job_name=BASELINING_JOB_NAME,
     )
     assert isinstance(clarify_model_monitor.latest_baselining_job, ClarifyBaseliningJob)
     assert (
-        EXPLAINABILITY_ANALYSIS_CONFIG
+        explainability_analysis_config
         == clarify_model_monitor.latest_baselining_job_config.analysis_config._to_dict()
     )
     clarify_baselining_job = clarify_model_monitor.latest_baselining_job
@@ -1081,6 +1106,7 @@ def test_model_explainability_monitor_suggest_baseline(
         analysis_config=None,  # will pick up config from baselining job
         baseline_job_name=BASELINING_JOB_NAME,
         endpoint_input=ENDPOINT_NAME,
+        explainability_analysis_config=explainability_analysis_config,
         #  will pick up attributes from baselining job
     )
 
@@ -1133,6 +1159,7 @@ def test_model_explainability_monitor_created_with_config(
         sagemaker_session=sagemaker_session,
         analysis_config=analysis_config,
         constraints=CONSTRAINTS,
+        explainability_analysis_config=EXPLAINABILITY_ANALYSIS_CONFIG,
     )
 
     # update schedule
@@ -1263,6 +1290,7 @@ def _test_model_explainability_monitor_create_schedule(
         features_attribute=FEATURES_ATTRIBUTE,
         inference_attribute=str(INFERENCE_ATTRIBUTE),
     ),
+    explainability_analysis_config=None,
 ):
     # create schedule
     with patch(
@@ -1278,7 +1306,7 @@ def _test_model_explainability_monitor_create_schedule(
         )
         if not isinstance(analysis_config, str):
             upload.assert_called_once()
-            assert json.loads(upload.call_args[0][0]) == EXPLAINABILITY_ANALYSIS_CONFIG
+            assert json.loads(upload.call_args[0][0]) == explainability_analysis_config
 
     # validation
     expected_arguments = {
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
diff --git a/tox.ini b/tox.ini

Original file line number	Diff line number	Diff line change
`@@ -2343,6 +2343,7 @@ def _stage_user_code_in_s3(self):`
`2343`	`2343`	`dependencies=self.dependencies,`
`2344`	`2344`	`kms_key=kms_key,`
`2345`	`2345`	`s3_resource=self.sagemaker_session.s3_resource,`
	`2346`	`+ settings=self.sagemaker_session.settings,`
`2346`	`2347`	`)`
`2347`	`2348`
`2348`	`2349`	`def _model_source_dir(self):`
Original file line number	Diff line number	Diff line change
`@@ -1131,6 +1131,7 @@ def _upload_code(self, key_prefix, repack=False):`
`1131`	`1131`	`script=self.entry_point,`
`1132`	`1132`	`directory=self.source_dir,`
`1133`	`1133`	`dependencies=self.dependencies,`
	`1134`	`+ settings=self.sagemaker_session.settings,`
`1134`	`1135`	`)`
`1135`	`1136`
`1136`	`1137`	`if repack and self.model_data is not None and self.entry_point is not None:`