Merge branch 'master' into mask-creds-local-mode

Dan · web-flow · commit 08a31b41175f · 2021-02-26T18:36:58.000-08:00
diff --git a/doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_pytorch.rst b/doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_pytorch.rst
@@ -155,7 +155,7 @@ PyTorch API
 
 **Supported versions:**
 
--  PyTorch 1.6
+-  PyTorch 1.6.0
 
 
 .. function:: smdistributed.dataparallel.torch.distributed.is_available()
diff --git a/doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_tensorflow.rst b/doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_tensorflow.rst
@@ -414,7 +414,7 @@ TensorFlow API
 
 .. function:: smdistributed.dataparallel.tensorflow.DistributedOptimizer
 
-   Applicable if you use the ``tf.estimator`` API in TensorFlow 2.x (2.3).
+   Applicable if you use the ``tf.estimator`` API in TensorFlow 2.x (2.3.1).
    ​
    Construct a new ``DistributedOptimizer`` , which uses TensorFlow
    optimizer under the hood for computing single-process gradient values
@@ -489,7 +489,7 @@ TensorFlow API
 
 .. function:: smdistributed.dataparallel.tensorflow.BroadcastGlobalVariablesHook
 
-   Applicable if you use the ``tf.estimator`` API in TensorFlow 2.x (2.3).
+   Applicable if you use the ``tf.estimator`` API in TensorFlow 2.x (2.3.1).
 
 
    ``SessionRunHook`` that will broadcast all global variables from root
diff --git a/doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.md b/doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.md
@@ -8,14 +8,18 @@
 
 ### PyTorch
 
-#### Add support for PyTorch 1.7
+#### Add support for PyTorch 1.7.1
 
-- Adds support for `gradient_as_bucket_view` (PyTorch 1.7 only), `find_unused_parameters` (PyTorch 1.7 only) and `broadcast_buffers` options to `smp.DistributedModel`. These options behave the same as the corresponding options (with the same names) in
+- Adds support for `gradient_as_bucket_view` (PyTorch 1.7.1 only), `find_unused_parameters` (PyTorch 1.7.1 only) and `broadcast_buffers` options to `smp.DistributedModel`. These options behave the same as the corresponding options (with the same names) in
 `torch.DistributedDataParallel` API. Please refer to the [SageMaker distributed model parallel API documentation](https://sagemaker.readthedocs.io/en/stable/api/training/smd_model_parallel_pytorch.html#smp.DistributedModel) for more information.
 
-- Adds support for `join` (PyTorch 1.7 only) context manager, which is to be used in conjunction with an instance of `smp.DistributedModel` to be able to train with uneven inputs across participating processes.
+- Adds support for `join` (PyTorch 1.7.1 only) context manager, which is to be used in conjunction with an instance of `smp.DistributedModel` to be able to train with uneven inputs across participating processes.
 
-- Adds support for `_register_comm_hook` (PyTorch 1.7 only) which will register the callable as a communication hook for DDP. NOTE: Like in DDP, this is an experimental API and subject to change.
+- Adds support for `_register_comm_hook` (PyTorch 1.7.1 only) which will register the callable as a communication hook for DDP. NOTE: Like in DDP, this is an experimental API and subject to change.
+
+### Tensorflow
+
+- Adds support for Tensorflow 2.4.1
 
 ## Bug Fixes
 
@@ -32,7 +36,7 @@ regular dicts.
 
 ### PyTorch
 
-- A performance regression was observed when training on SMP with PyTorch 1.7.1 compared to 1.6. The rootcause was found to be the slowdown in performance of `.grad` method calls in PyTorch 1.7.1 compared to 1.6. Please see the related discussion: https://github.com/pytorch/pytorch/issues/50636.
+- A performance regression was observed when training on SMP with PyTorch 1.7.1 compared to 1.6.0. The rootcause was found to be the slowdown in performance of `.grad` method calls in PyTorch 1.7.1 compared to 1.6.0. Please see the related discussion: https://github.com/pytorch/pytorch/issues/50636.
 
 
 # Sagemaker Distributed Model Parallel 1.1.0 Release Notes
diff --git a/doc/api/training/smp_versions/v1.1.0/smd_model_parallel_tensorflow.rst b/doc/api/training/smp_versions/v1.1.0/smd_model_parallel_tensorflow.rst
@@ -1,7 +1,7 @@
 TensorFlow API
 ==============
 
-**Supported version: 2.3**
+**Supported version: 2.3.1**
 
 **Important**: This API document assumes you use the following import statement in your training scripts.
 
diff --git a/doc/api/training/smp_versions/v1.2.0/smd_model_parallel_pytorch.rst b/doc/api/training/smp_versions/v1.2.0/smd_model_parallel_pytorch.rst
@@ -6,7 +6,7 @@
 PyTorch API
 ===========
 
-**Supported versions: 1.7.1, 1.6**
+**Supported versions: 1.7.1, 1.6.0**
 
 This API document assumes you use the following import statements in your training scripts.
 
@@ -159,7 +159,7 @@ This API document assumes you use the following import statements in your traini
       This parameter is forwarded to the underlying ``DistributedDataParallel`` wrapper.
       Please see: `broadcast_buffer <https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html#torch.nn.parallel.DistributedDataParallel>`__.
 
-   -  ``gradient_as_bucket_view (PyTorch 1.7 only)`` (default: False): To be
+   -  ``gradient_as_bucket_view (PyTorch 1.7.1 only)`` (default: False): To be
       used with ``ddp=True``. This parameter is forwarded to the underlying
       ``DistributedDataParallel`` wrapper. Please see `gradient_as_bucket_view <https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html#torch.nn.parallel.DistributedDataParallel>`__.
 
@@ -257,7 +257,7 @@ This API document assumes you use the following import statements in your traini
 
    .. function:: join( )
 
-      **Available for PyTorch 1.7 only**
+      **Available for PyTorch 1.7.1 only**
 
       A context manager to be used in conjunction with an instance of
       ``smp.DistributedModel`` to be able to train with uneven inputs across
diff --git a/doc/api/training/smp_versions/v1.2.0/smd_model_parallel_tensorflow.rst b/doc/api/training/smp_versions/v1.2.0/smd_model_parallel_tensorflow.rst
@@ -1,7 +1,7 @@
 TensorFlow API
 ==============
 
-**Supported version: 2.3**
+**Supported version: 2.4.1, 2.3.1**
 
 **Important**: This API document assumes you use the following import statement in your training scripts.
 
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -349,6 +349,9 @@ def __init__(
         self.profiler_config = profiler_config
         self.disable_profiler = disable_profiler
 
+        if not _region_supports_profiler(self.sagemaker_session.boto_region_name):
+            self.disable_profiler = True
+
         self.profiler_rule_configs = None
         self.profiler_rules = None
         self.debugger_rules = None
diff --git a/src/sagemaker/local/local_session.py b/src/sagemaker/local/local_session.py
@@ -426,17 +426,27 @@ def invoke_endpoint(
         CustomAttributes=None,
         TargetModel=None,
         TargetVariant=None,
+        InferenceId=None,
     ):
         """Invoke the endpoint.
 
         Args:
-          Body:
-          EndpointName:
-          Accept:  (Default value = None)
-          CustomAttributes:  (Default value = None)
+            Body: Input data for which you want the model to provide inference.
+            EndpointName: The name of the endpoint that you specified when you
+                created the endpoint using the CreateEndpoint API.
+            ContentType: The MIME type of the input data in the request body (Default value = None)
+            Accept: The desired MIME type of the inference in the response (Default value = None)
+            CustomAttributes: Provides additional information about a request for an inference
+                submitted to a model hosted at an Amazon SageMaker endpoint (Default value = None)
+            TargetModel: The model to request for inference when invoking a multi-model endpoint
+                (Default value = None)
+            TargetVariant: Specify the production variant to send the inference request to when
+                invoking an endpoint that is running two or more variants (Default value = None)
+            InferenceId: If you provide a value, it is added to the captured data when you enable
+               data capture on the endpoint (Default value = None)
 
         Returns:
-
+            object: Inference for the given input.
         """
         url = "http://localhost:%s/invocations" % self.serving_port
         headers = {}
@@ -456,6 +466,9 @@ def invoke_endpoint(
         if TargetVariant is not None:
             headers["X-Amzn-SageMaker-Target-Variant"] = TargetVariant
 
+        if InferenceId is not None:
+            headers["X-Amzn-SageMaker-Inference-Id"] = InferenceId
+
         r = self.http.request("POST", url, body=Body, preload_content=False, headers=headers)
 
         return {"Body": r, "ContentType": Accept}
diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py
@@ -95,7 +95,9 @@ def __init__(
         self._model_names = self._get_model_names()
         self._context = None
 
-    def predict(self, data, initial_args=None, target_model=None, target_variant=None):
+    def predict(
+        self, data, initial_args=None, target_model=None, target_variant=None, inference_id=None
+    ):
         """Return the inference from the specified endpoint.
 
         Args:
@@ -111,8 +113,10 @@ def predict(self, data, initial_args=None, target_model=None, target_variant=Non
                 in case of a multi model endpoint. Does not apply to endpoints hosting
                 single model (Default: None)
             target_variant (str): The name of the production variant to run an inference
-            request on (Default: None). Note that the ProductionVariant identifies the model
-            you want to host and the resources you want to deploy for hosting it.
+                request on (Default: None). Note that the ProductionVariant identifies the
+                model you want to host and the resources you want to deploy for hosting it.
+            inference_id (str): If you provide a value, it is added to the captured data
+                when you enable data capture on the endpoint (Default: None).
 
         Returns:
             object: Inference for the given input. If a deserializer was specified when creating
@@ -121,7 +125,9 @@ def predict(self, data, initial_args=None, target_model=None, target_variant=Non
                 as is.
         """
 
-        request_args = self._create_request_args(data, initial_args, target_model, target_variant)
+        request_args = self._create_request_args(
+            data, initial_args, target_model, target_variant, inference_id
+        )
         response = self.sagemaker_session.sagemaker_runtime_client.invoke_endpoint(**request_args)
         return self._handle_response(response)
 
@@ -131,7 +137,9 @@ def _handle_response(self, response):
         content_type = response.get("ContentType", "application/octet-stream")
         return self.deserializer.deserialize(response_body, content_type)
 
-    def _create_request_args(self, data, initial_args=None, target_model=None, target_variant=None):
+    def _create_request_args(
+        self, data, initial_args=None, target_model=None, target_variant=None, inference_id=None
+    ):
         """Placeholder docstring"""
         args = dict(initial_args) if initial_args else {}
 
@@ -150,6 +158,9 @@ def _create_request_args(self, data, initial_args=None, target_model=None, targe
         if target_variant:
             args["TargetVariant"] = target_variant
 
+        if inference_id:
+            args["InferenceId"] = inference_id
+
         data = self.serializer.serialize(data)
 
         args["Body"] = data
diff --git a/tests/integ/test_predict_with_inference_id.py b/tests/integ/test_predict_with_inference_id.py
@@ -0,0 +1,86 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os
+import pytest
+
+import tests.integ
+import tests.integ.timeout
+
+from sagemaker import image_uris
+from sagemaker.model import Model
+from sagemaker.predictor import Predictor
+from sagemaker.serializers import CSVSerializer
+from sagemaker.utils import unique_name_from_base
+
+from tests.integ import DATA_DIR
+
+
+ROLE = "SageMakerRole"
+INSTANCE_COUNT = 1
+INSTANCE_TYPE = "ml.c5.xlarge"
+TEST_CSV_DATA = "42,42,42,42,42,42,42"
+XGBOOST_DATA_PATH = os.path.join(DATA_DIR, "xgboost_model")
+
+
+@pytest.yield_fixture(scope="module")
+def endpoint_name(sagemaker_session):
+    endpoint_name = unique_name_from_base("model-inference-id-integ")
+    xgb_model_data = sagemaker_session.upload_data(
+        path=os.path.join(XGBOOST_DATA_PATH, "xgb_model.tar.gz"),
+        key_prefix="integ-test-data/xgboost/model",
+    )
+
+    xgb_image = image_uris.retrieve(
+        "xgboost",
+        sagemaker_session.boto_region_name,
+        version="1",
+        image_scope="inference",
+    )
+
+    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
+        endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, hours=2
+    ):
+        xgb_model = Model(
+            model_data=xgb_model_data,
+            image_uri=xgb_image,
+            name=endpoint_name,  # model name
+            role=ROLE,
+            sagemaker_session=sagemaker_session,
+        )
+        xgb_model.deploy(INSTANCE_COUNT, INSTANCE_TYPE, endpoint_name=endpoint_name)
+        yield endpoint_name
+
+
+def test_predict_with_inference_id(sagemaker_session, endpoint_name):
+    predictor = Predictor(
+        endpoint_name=endpoint_name,
+        sagemaker_session=sagemaker_session,
+        serializer=CSVSerializer(),
+    )
+
+    # Validate that no exception is raised when the target_variant is specified.
+    response = predictor.predict(TEST_CSV_DATA, inference_id="foo")
+    assert response
+
+
+def test_invoke_endpoint_with_inference_id(sagemaker_session, endpoint_name):
+    response = sagemaker_session.sagemaker_runtime_client.invoke_endpoint(
+        EndpointName=endpoint_name,
+        Body=TEST_CSV_DATA,
+        ContentType="text/csv",
+        Accept="text/csv",
+        InferenceId="foo",
+    )
+    assert response
diff --git a/tests/unit/test_predictor.py b/tests/unit/test_predictor.py
@@ -31,6 +31,7 @@
 RETURN_VALUE = 0
 CSV_RETURN_VALUE = "1,2,3\r\n"
 PRODUCTION_VARIANT_1 = "PRODUCTION_VARIANT_1"
+INFERENCE_ID = "inference-id"
 
 ENDPOINT_DESC = {"EndpointArn": "foo", "EndpointConfigName": ENDPOINT}
 
@@ -98,6 +99,29 @@ def test_predict_call_with_target_variant():
     assert result == RETURN_VALUE
 
 
+def test_predict_call_with_inference_id():
+    sagemaker_session = empty_sagemaker_session()
+    predictor = Predictor(ENDPOINT, sagemaker_session)
+
+    data = "untouched"
+    result = predictor.predict(data, inference_id=INFERENCE_ID)
+
+    assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called
+
+    expected_request_args = {
+        "Accept": DEFAULT_ACCEPT,
+        "Body": data,
+        "ContentType": DEFAULT_CONTENT_TYPE,
+        "EndpointName": ENDPOINT,
+        "InferenceId": INFERENCE_ID,
+    }
+
+    call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args
+    assert kwargs == expected_request_args
+
+    assert result == RETURN_VALUE
+
+
 def test_multi_model_predict_call():
     sagemaker_session = empty_sagemaker_session()
     predictor = Predictor(ENDPOINT, sagemaker_session)