aws
diff --git a/‎CHANGELOG.md
Lines changed: 31 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 31 additions & 0 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 1 deletion b/‎VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/api/inference/explainer.rst
Lines changed: 16 additions & 0 deletions b/‎doc/api/inference/explainer.rst
Lines changed: 16 additions & 0 deletions
diff --git a/‎doc/api/prep_data/feature_store.rst
Lines changed: 38 additions & 2 deletions b/‎doc/api/prep_data/feature_store.rst
Lines changed: 38 additions & 2 deletions
diff --git a/‎doc/frameworks/djl/using_djl.rst
Lines changed: 44 additions & 5 deletions b/‎doc/frameworks/djl/using_djl.rst
Lines changed: 44 additions & 5 deletions
diff --git a/‎doc/overview.rst
Lines changed: 7 additions & 3 deletions b/‎doc/overview.rst
Lines changed: 7 additions & 3 deletions
diff --git a/‎src/sagemaker/async_inference/async_inference_config.py
Lines changed: 11 additions & 0 deletions b/‎src/sagemaker/async_inference/async_inference_config.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/sagemaker/async_inference/async_inference_response.py
Lines changed: 30 additions & 16 deletions b/‎src/sagemaker/async_inference/async_inference_response.py
Lines changed: 30 additions & 16 deletions
@@ -1,5 +1,36 @@
 # Changelog
 
+## v2.145.0 (2023-04-06)
+
+### Features
+
+ * add support for async inline error notifications
+ * Add methods for feature group to list feature metadata parameters and tags
+ * Support huggingface hub model_id for DJL Models
+
+### Bug Fixes and Other Changes
+
+ * load_sagemaker_config should lazy initialize a default S3 resource
+
+## v2.144.0 (2023-04-05)
+
+### Features
+
+ * support create Clarify explainer enabled endpoint for Clarify Online Explainability
+ * Combined inference and training script artifact
+ * jumpstart instance types
+ * Deprecation warning for framework profiling for TF 2.12 and on, PT 2.0 and on
+
+### Bug Fixes and Other Changes
+
+ * always delete temporary directory even during exception
+ * Fixes the completion_criteria_config dict in the to_input_req method
+ * Update CHANGELOG.md
+
+### Documentation Changes
+
+ * Update SageMaker Debugger doc
+
 ## v2.143.0 (2023-03-29)
 
 ### Features
 
@@ -1 +1 @@
-2.143.1.dev0
+2.145.1.dev0
@@ -0,0 +1,16 @@
+Online Explainability
+---------------------
+
+This module contains classes related to Amazon Sagemaker Clarify Online Explainability
+
+.. automodule:: sagemaker.explainer.explainer_config
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: sagemaker.explainer.clarify_explainer_config
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
@@ -1,7 +1,7 @@
 Feature Store APIs
 ------------------
 
-Feature group
+Feature Group
 *************
 
 .. autoclass:: sagemaker.feature_store.feature_group.FeatureGroup
@@ -18,7 +18,7 @@ Feature group
     :show-inheritance:
 
 
-Feature definition
+Feature Definition
 ******************
 
 .. autoclass:: sagemaker.feature_store.feature_definition.FeatureDefinition
@@ -77,10 +77,46 @@ Inputs
     :members:
     :show-inheritance:
 
+.. autoclass:: sagemaker.feature_store.inputs.ResourceEnum
+    :members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.feature_store.inputs.SearchOperatorEnum
+    :members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.feature_store.inputs.SortOrderEnum
+    :members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.feature_store.inputs.FilterOperatorEnum
+    :members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.feature_store.inputs.Filter
+    :members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.feature_store.inputs.Identifier
+    :members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.feature_store.inputs.FeatureParameter
+    :members:
+    :show-inheritance:
+
 
 Dataset Builder
 ***************
 
 .. autoclass:: sagemaker.feature_store.dataset_builder.DatasetBuilder
     :members:
     :show-inheritance:
+
+
+Feature Store
+*************
+
+.. autoclass:: sagemaker.feature_store.feature_store.FeatureStore
+    :members:
+    :show-inheritance:
@@ -29,7 +29,7 @@ You can either deploy your model using DeepSpeed or HuggingFace Accelerate, or l
 
     # Create a DJL Model, backend is chosen automatically
     djl_model = DJLModel(
-        "s3://my_bucket/my_saved_model_artifacts/",
+        "s3://my_bucket/my_saved_model_artifacts/", # This can also be a HuggingFace Hub model id
         "my_sagemaker_role",
         data_type="fp16",
         task="text-generation",
@@ -46,7 +46,7 @@ If you want to use a specific backend, then you can create an instance of the co
 
     # Create a model using the DeepSpeed backend
     deepspeed_model = DeepSpeedModel(
-        "s3://my_bucket/my_saved_model_artifacts/",
+        "s3://my_bucket/my_saved_model_artifacts/", # This can also be a HuggingFace Hub model id
         "my_sagemaker_role",
         data_type="bf16",
         task="text-generation",
@@ -56,7 +56,7 @@ If you want to use a specific backend, then you can create an instance of the co
     # Create a model using the HuggingFace Accelerate backend
 
     hf_accelerate_model = HuggingFaceAccelerateModel(
-        "s3://my_bucket/my_saved_model_artifacts/",
+        "s3://my_bucket/my_saved_model_artifacts/", # This can also be a HuggingFace Hub model id
         "my_sagemaker_role",
         data_type="fp16",
         task="text-generation",
@@ -91,9 +91,37 @@ model server configuration.
 Model Artifacts
 ---------------
 
+DJL Serving supports two ways to load models for inference.
+1. A HuggingFace Hub model id.
+2. Uncompressed model artifacts stored in a S3 bucket.
+
+HuggingFace Hub model id
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Using a HuggingFace Hub model id is the easiest way to get started with deploying Large Models via DJL Serving on SageMaker.
+DJL Serving will use this model id to download the model at runtime via the HuggingFace Transformers ``from_pretrained`` API.
+This method makes it easy to deploy models quickly, but for very large models the download time can become unreasonable.
+
+For example, you can deploy the EleutherAI gpt-j-6B model like this:
+
+.. code::
+
+    model = DJLModel(
+        "EleutherAI/gpt-j-6B",
+        "my_sagemaker_role",
+        data_type="fp16",
+        number_of_partitions=2
+    )
+
+    predictor = model.deploy("ml.g5.12xlarge")
+
+Uncompressed Model Artifacts stored in a S3 bucket
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For models that are larger than 20GB (total checkpoint size), we recommend that you store the model in S3.
+Download times will be much faster compared to downloading from the HuggingFace Hub at runtime.
 DJL Serving Models expect a different model structure than most of the other frameworks in the SageMaker Python SDK.
 Specifically, DJLModels do not support loading models stored in tar.gz format.
-You must provide an Amazon S3 url pointing to uncompressed model artifacts (bucket and prefix).
 This is because DJL Serving is optimized for large models, and it implements a fast downloading mechanism for large models that require the artifacts be uncompressed.
 
 For example, lets say you want to deploy the EleutherAI/gpt-j-6B model available on the HuggingFace Hub.
@@ -107,7 +135,18 @@ You can download the model and upload to S3 like this:
     # Upload to S3
     aws s3 sync gpt-j-6B s3://my_bucket/gpt-j-6B
 
-You would then pass "s3://my_bucket/gpt-j-6B" as ``model_s3_uri`` to the ``DJLModel``.
+You would then pass "s3://my_bucket/gpt-j-6B" as ``model_id`` to the ``DJLModel`` like this:
+
+.. code::
+
+    model = DJLModel(
+        "s3://my_bucket/gpt-j-6B",
+        "my_sagemaker_role",
+        data_type="fp16",
+        number_of_partitions=2
+    )
+
+    predictor = model.deploy("ml.g5.12xlarge")
 
 For language models we expect that the model weights, model config, and tokenizer config are provided in S3. The model
 should be loadable from the HuggingFace Transformers AutoModelFor<Task>.from_pretrained API, where task
 
@@ -1164,7 +1164,8 @@ More information about SageMaker Asynchronous Inference can be found in the `AWS
 
 To deploy asynchronous inference endpoint, you will need to create a ``AsyncInferenceConfig`` object.
 If you create ``AsyncInferenceConfig`` without specifying its arguments, the default ``S3OutputPath`` will
-be ``s3://sagemaker-{REGION}-{ACCOUNTID}/async-endpoint-outputs/{UNIQUE-JOB-NAME}``. (example shown below):
+be ``s3://sagemaker-{REGION}-{ACCOUNTID}/async-endpoint-outputs/{UNIQUE-JOB-NAME}``, ``S3FailurePath`` will
+be ``s3://sagemaker-{REGION}-{ACCOUNTID}/async-endpoint-failures/{UNIQUE-JOB-NAME}`` (example shown below):
 
 .. code:: python
 
@@ -1174,18 +1175,21 @@ be ``s3://sagemaker-{REGION}-{ACCOUNTID}/async-endpoint-outputs/{UNIQUE-JOB-NAME
     async_config = AsyncInferenceConfig()
 
 Or you can specify configurations in ``AsyncInferenceConfig`` as you like. All of those configuration parameters
-are optional but if you don’t specify the ``output_path``, Amazon SageMaker will use the default ``S3OutputPath``
+are optional but if you don’t specify the ``output_path`` or ``failure_path``, Amazon SageMaker will use the
+default ``S3OutputPath`` or ``S3FailurePath``
 mentioned above (example shown below):
 
 .. code:: python
 
-    # Specify S3OutputPath, MaxConcurrentInvocationsPerInstance and NotificationConfig in the async config object
+    # Specify S3OutputPath, S3FailurePath, MaxConcurrentInvocationsPerInstance and NotificationConfig
+    # in the async config object
     async_config = AsyncInferenceConfig(
         output_path="s3://{s3_bucket}/{bucket_prefix}/output",
         max_concurrent_invocations_per_instance=10,
         notification_config = {
             "SuccessTopic": "arn:aws:sns:aws-region:account-id:topic-name",
             "ErrorTopic": "arn:aws:sns:aws-region:account-id:topic-name",
+            "IncludeInferenceResponseIn": ["SUCCESS_NOTIFICATION_TOPIC","ERROR_NOTIFICATION_TOPIC"],
         }
     )
 
 
@@ -31,6 +31,7 @@ def __init__(
         max_concurrent_invocations_per_instance=None,
         kms_key_id=None,
         notification_config=None,
+        failure_path=None,
     ):
         """Initialize an AsyncInferenceConfig object for async inference configuration.
 
@@ -45,6 +46,9 @@ def __init__(
             kms_key_id (str): Optional. The Amazon Web Services Key Management Service
                 (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the
                 asynchronous inference output in Amazon S3. (Default: None)
+            failure_path (str): Optional. The Amazon S3 location that endpoints upload model
+                responses for failed requests. If no value is provided, Amazon SageMaker will
+                use default Amazon S3 Async Inference failure path. (Default: None)
             notification_config (dict): Optional. Specifies the configuration for notifications
                 of inference results for asynchronous inference. Only one notification is generated
                 per invocation request (Default: None):
@@ -54,17 +58,24 @@ def __init__(
                 * error_topic (str): Amazon SNS topic to post a notification to when inference
                 fails. If no topic is provided, no notification is sent on failure.
                 The key in notification_config is 'ErrorTopic'.
+                * include_inference_response_in (list): Optional. When provided the inference
+                response will be included in the notification topics. If not provided,
+                a notification will still be generated on success/error, but will not
+                contain the inference response.
+                Valid options are SUCCESS_NOTIFICATION_TOPIC, ERROR_NOTIFICATION_TOPIC
         """
         self.output_path = output_path
         self.max_concurrent_invocations_per_instance = max_concurrent_invocations_per_instance
         self.kms_key_id = kms_key_id
         self.notification_config = notification_config
+        self.failure_path = failure_path
 
     def _to_request_dict(self):
         """Generates a request dictionary using the parameters provided to the class."""
         request_dict = {
             "OutputConfig": {
                 "S3OutputPath": self.output_path,
+                "S3FailurePath": self.failure_path,
             },
         }
 
 
@@ -17,7 +17,11 @@
 from botocore.exceptions import ClientError
 from sagemaker.s3 import parse_s3_url
 from sagemaker.async_inference import WaiterConfig
-from sagemaker.exceptions import ObjectNotExistedError, UnexpectedClientError
+from sagemaker.exceptions import (
+    ObjectNotExistedError,
+    UnexpectedClientError,
+    AsyncInferenceModelError,
+)
 
 
 class AsyncInferenceResponse(object):
@@ -32,6 +36,7 @@ def __init__(
         self,
         predictor_async,
         output_path,
+        failure_path,
     ):
         """Initialize an AsyncInferenceResponse object.
 
@@ -43,10 +48,13 @@ def __init__(
                 that return this response.
             output_path (str): The Amazon S3 location that endpoints upload inference responses
                 to.
+            failure_path (str): The Amazon S3 location that endpoints upload model errors
+                for failed requests.
         """
         self.predictor_async = predictor_async
         self.output_path = output_path
         self._result = None
+        self.failure_path = failure_path
 
     def get_result(
         self,
@@ -71,28 +79,34 @@ def get_result(
 
         if self._result is None:
             if waiter_config is None:
-                self._result = self._get_result_from_s3(self.output_path)
+                self._result = self._get_result_from_s3(self.output_path, self.failure_path)
             else:
                 self._result = self.predictor_async._wait_for_output(
-                    self.output_path, waiter_config
+                    self.output_path, self.failure_path, waiter_config
                 )
         return self._result
 
-    def _get_result_from_s3(
-        self,
-        output_path,
-    ):
+    def _get_result_from_s3(self, output_path, failure_path):
         """Get inference result from the output Amazon S3 path"""
         bucket, key = parse_s3_url(output_path)
         try:
             response = self.predictor_async.s3_client.get_object(Bucket=bucket, Key=key)
             return self.predictor_async.predictor._handle_response(response)
-        except ClientError as ex:
-            if ex.response["Error"]["Code"] == "NoSuchKey":
-                raise ObjectNotExistedError(
-                    message="Inference could still be running",
-                    output_path=output_path,
-                )
-            raise UnexpectedClientError(
-                message=ex.response["Error"]["Message"],
-            )
+        except ClientError as e:
+            if e.response["Error"]["Code"] == "NoSuchKey":
+                try:
+                    failure_bucket, failure_key = parse_s3_url(failure_path)
+                    failure_response = self.predictor_async.s3_client.get_object(
+                        Bucket=failure_bucket, Key=failure_key
+                    )
+                    failure_response = self.predictor_async.predictor._handle_response(
+                        failure_response
+                    )
+                    raise AsyncInferenceModelError(message=failure_response)
+                except ClientError as ex:
+                    if ex.response["Error"]["Code"] == "NoSuchKey":
+                        raise ObjectNotExistedError(
+                            message="Inference could still be running", output_path=output_path
+                        )
+                    raise UnexpectedClientError(message=ex.response["Error"]["Message"])
+            raise UnexpectedClientError(message=e.response["Error"]["Message"])