aws
diff --git a/‎doc/api/inference/async_inference.rst
+19 b/‎doc/api/inference/async_inference.rst
+19
diff --git a/‎doc/api/inference/predictor_async.rst
+9 b/‎doc/api/inference/predictor_async.rst
+9
diff --git a/‎src/sagemaker/async_inference/__init__.py
+19 b/‎src/sagemaker/async_inference/__init__.py
+19
diff --git a/‎src/sagemaker/async_inference/async_inference_config.py
+81 b/‎src/sagemaker/async_inference/async_inference_config.py
+81
diff --git a/‎src/sagemaker/async_inference/async_inference_response.py
+98 b/‎src/sagemaker/async_inference/async_inference_response.py
+98
diff --git a/‎src/sagemaker/async_inference/waiter_config.py
+46 b/‎src/sagemaker/async_inference/waiter_config.py
+46
diff --git a/‎src/sagemaker/estimator.py
+7 b/‎src/sagemaker/estimator.py
+7
diff --git a/‎src/sagemaker/exceptions.py
+40 b/‎src/sagemaker/exceptions.py
+40
@@ -0,0 +1,19 @@
+Async Inference
+-----------------
+
+This module contains classes related to Amazon Sagemaker Async Inference
+
+.. automodule:: sagemaker.async_inference.async_inference_config
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: sagemaker.async_inference.async_inference_response
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: sagemaker.async_inference.waiter_config
+    :members:
+    :undoc-members:
+    :show-inheritance:
@@ -0,0 +1,9 @@
+AsyncPredictor
+--------------------
+
+Make async predictions against SageMaker endpoints with Python objects
+
+.. autoclass:: sagemaker.predictor_async.AsyncPredictor
+    :members:
+    :undoc-members:
+    :show-inheritance:
@@ -0,0 +1,19 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Imports the classes in this module to simplify customer imports"""
+
+from __future__ import absolute_import
+
+from sagemaker.async_inference.async_inference_config import AsyncInferenceConfig  # noqa: F401
+from sagemaker.async_inference.waiter_config import WaiterConfig  # noqa: F401
+from sagemaker.async_inference.async_inference_response import AsyncInferenceResponse  # noqa: F401
@@ -0,0 +1,81 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""A class for AsyncInferenceConfig
+
+Codes are used for configuring async inference endpoint. Use it when deploying
+the model to the endpoints.
+"""
+from __future__ import print_function, absolute_import
+
+
+class AsyncInferenceConfig(object):
+    """Configuration object passed in when deploying models to Amazon SageMaker Endpoints.
+
+    This object specifies configuration related to async endpoint. Use this configuration
+    when trying to create async endpoint and make async inference
+    """
+
+    def __init__(
+        self,
+        output_path=None,
+        max_concurrent_invocations_per_instance=None,
+        kms_key_id=None,
+        notification_config=None,
+    ):
+        """Initialize an AsyncInferenceConfig object for async inference related configuration.
+
+        Args:
+            output_path (str): Optional. The Amazon S3 location that endpoints upload
+                inference responses to. If no value is provided, Amazon SageMaker will
+                use default Amazon S3 Async Inference output path. (Default: None)
+            max_concurrent_invocations_per_instance (int): Optional. The maximum number of
+                concurrent requests sent by the SageMaker client to the model container. If
+                no value is provided, Amazon SageMaker will choose an optimal value for you.
+                (Default: None)
+            kms_key_id (str): Optional. The Amazon Web Services Key Management Service
+                (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the
+                asynchronous inference output in Amazon S3. (Default: None)
+            notification_config (dict): Optional. Specifies the configuration for notifications
+                of inference results for asynchronous inference (Default: None):
+                * success_topic (str): Amazon SNS topic to post a notification to when inference
+                completes successfully. If no topic is provided, no notification is sent on success.
+                The key in notification_config is 'SuccessTopic'.
+                * error_topic (str): Amazon SNS topic to post a notification to when inference
+                fails. If no topic is provided, no notification is sent on failure.
+                The key in notification_config is 'ErrorTopic'.
+        """
+        self.output_path = output_path
+        self.max_concurrent_invocations_per_instance = max_concurrent_invocations_per_instance
+        self.kms_key_id = kms_key_id
+        self.notification_config = notification_config
+
+    def _to_request_dict(self):
+        """Generates a request dictionary using the parameters provided to the class."""
+        request_dict = {
+            "OutputConfig": {
+                "S3OutputPath": self.output_path,
+            },
+        }
+
+        if self.max_concurrent_invocations_per_instance:
+            request_dict["ClientConfig"] = {
+                "MaxConcurrentInvocationsPerInstance": self.max_concurrent_invocations_per_instance
+            }
+
+        if self.kms_key_id:
+            request_dict["OutputConfig"]["KmsKeyId"] = self.kms_key_id
+
+        if self.notification_config:
+            request_dict["OutputConfig"]["NotificationConfig"] = self.notification_config
+
+        return request_dict
@@ -0,0 +1,98 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""A class for AsyncInferenceResponse"""
+
+from __future__ import print_function, absolute_import
+
+from botocore.exceptions import ClientError
+from sagemaker.s3 import parse_s3_url
+from sagemaker.async_inference import WaiterConfig
+from sagemaker.exceptions import ObjectNotExistedError, UnexpectedClientError
+
+
+class AsyncInferenceResponse(object):
+    """Response from Async Inference endpoint
+
+    This response object provides a method to check the async Amazon S3
+    output path. If result object exists in that path, decode and return
+    the result
+    """
+
+    def __init__(
+        self,
+        predictor_async,
+        output_path,
+    ):
+        """Initialize an AsyncInferenceResponse object.
+
+        AsyncInferenceResponse can help users to get async inference result
+        from the Amazon S3 output path
+
+        Args:
+            predictor_async (sagemaker.predictor.AsyncPredictor): The ``AsyncPredictor``
+                that return this response.
+            output_path (str): The Amazon S3 location that endpoints upload inference responses
+                to.
+        """
+        self.predictor_async = predictor_async
+        self.output_path = output_path
+        self._result = None
+
+    def get_result(
+        self,
+        waiter_config=None,
+    ):
+        """Get result from the async Amazon S3 output path
+
+        Args:
+            waiter_config (sagemaker.async_inference.waiter_config.WaiterConfig): Configuration
+                for the waiter. The pre-defined value for the delay between poll is 15 seconds
+                and the default max attempts is 60
+        Raises:
+            ValueError: If a wrong type of object is provided as ``waiter_config``
+        Returns:
+            object: Inference result in the given Amazon S3 output path. If a deserializer was
+                specified when creating the AsyncPredictor, the result of the deserializer is
+                returned. Otherwise the response returns the sequence of bytes
+                as is.
+        """
+        if waiter_config is not None and not isinstance(waiter_config, WaiterConfig):
+            raise ValueError("waiter_config should be a WaiterConfig object")
+
+        if self._result is None:
+            if waiter_config is None:
+                self._result = self._get_result_from_s3(self.output_path)
+            else:
+                self._result = self.predictor_async._wait_for_output(
+                    self.output_path, waiter_config
+                )
+        return self._result
+
+    def _get_result_from_s3(
+        self,
+        output_path,
+    ):
+        """Get inference result from the output Amazon S3 path"""
+        bucket, key = parse_s3_url(output_path)
+        try:
+            response = self.predictor_async.s3_client.get_object(Bucket=bucket, Key=key)
+            return self.predictor_async.predictor._handle_response(response)
+        except ClientError as ex:
+            if ex.response["Error"]["Code"] == "NoSuchKey":
+                raise ObjectNotExistedError(
+                    message="Inference could still be running",
+                    output_path=output_path,
+                )
+            raise UnexpectedClientError(
+                message=ex.response["Error"]["Message"],
+            )
@@ -0,0 +1,46 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""A class for WaiterConfig used in async inference
+
+Use it when using async inference and wait for the result.
+"""
+
+from __future__ import absolute_import
+
+
+class WaiterConfig(object):
+    """Configuration object passed in when using async inference and wait for the result."""
+
+    def __init__(
+        self,
+        max_attempts=60,
+        delay=15,
+    ):
+        """Initialize a WaiterConfig object that provides parameters to control waiting behavior.
+
+        Args:
+            max_attempts (int): The maximum number of attempts to be made. (Default: 60)
+            delay (int): The amount of time in seconds to wait between attempts. (Default: 15)
+        """
+
+        self.max_attempts = max_attempts
+        self.delay = delay
+
+    def _to_waiter_dict(self):
+        """Generates a dictionary using the parameters provided to the class."""
+        waiter_dict = {
+            "Delay": self.delay,
+            "MaxAttempts": self.max_attempts,
+        }
+
+        return waiter_dict
@@ -864,6 +864,7 @@ def deploy(
         kms_key=None,
         data_capture_config=None,
         tags=None,
+        async_inference_config=None,
         **kwargs,
     ):
         """Deploy the trained model to an Amazon SageMaker endpoint.
@@ -910,6 +911,11 @@ def deploy(
             data_capture_config (sagemaker.model_monitor.DataCaptureConfig): Specifies
                 configuration related to Endpoint data capture for use with
                 Amazon SageMaker Model Monitoring. Default: None.
+            async_inference_config (sagemaker.model_monitor.AsyncInferenceConfig): Specifies
+                configuration related to async endpoint. Use this configuration when trying
+                to create async endpoint and make async inference. If empty config object
+                passed through, we will use default config to deploy async endpoint
+                (default: None)
             tags(List[dict[str, str]]): Optional. The list of tags to attach to this specific
                 endpoint. Example:
                 >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -959,6 +965,7 @@ def deploy(
             wait=wait,
             kms_key=kms_key,
             data_capture_config=data_capture_config,
+            async_inference_config=async_inference_config,
         )
 
     def register(
 
@@ -21,3 +21,43 @@ def __init__(self, message, allowed_statuses, actual_status):
         self.allowed_statuses = allowed_statuses
         self.actual_status = actual_status
         super(UnexpectedStatusException, self).__init__(message)
+
+
+class AsyncInferenceError(Exception):
+    """The base exception class for Async Inference exceptions."""
+
+    fmt = "An unspecified error occurred"
+
+    def __init__(self, **kwargs):
+        msg = self.fmt.format(**kwargs)
+        Exception.__init__(self, msg)
+        self.kwargs = kwargs
+
+
+class ObjectNotExistedError(AsyncInferenceError):
+    """Raised when Amazon S3 object not exist in the given path"""
+
+    fmt = "Object not exist at {output_path}. {message}"
+
+    def __init__(self, message, output_path):
+        super(ObjectNotExistedError, self).__init__(message=message, output_path=output_path)
+
+
+class PollingTimeoutError(AsyncInferenceError):
+    """Raised when wait longer than expected and no result object in Amazon S3 bucket yet"""
+
+    fmt = "No result at {output_path} after polling for {seconds} seconds. {message}"
+
+    def __init__(self, message, output_path, seconds):
+        super(PollingTimeoutError, self).__init__(
+            message=message, output_path=output_path, seconds=seconds
+        )
+
+
+class UnexpectedClientError(AsyncInferenceError):
+    """Raised when ClientError's error code is not expected"""
+
+    fmt = "Encountered unexpected client error: {message}"
+
+    def __init__(self, message):
+        super(UnexpectedClientError, self).__init__(message=message)