aws
diff --git a/‎CHANGELOG.md
+32 b/‎CHANGELOG.md
+32
diff --git a/‎VERSION
+1-1 b/‎VERSION
+1-1
diff --git a/‎doc/api/training/sdp_versions/latest.rst
+2-2 b/‎doc/api/training/sdp_versions/latest.rst
+2-2
diff --git a/‎doc/api/training/sdp_versions/v1.2.x/smd_data_parallel_pytorch.rst
+1-1 b/‎doc/api/training/sdp_versions/v1.2.x/smd_data_parallel_pytorch.rst
+1-1
diff --git a/‎doc/api/training/smd_data_parallel_release_notes/smd_data_parallel_change_log.rst
+38-7 b/‎doc/api/training/smd_data_parallel_release_notes/smd_data_parallel_change_log.rst
+38-7
diff --git a/‎doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.rst
+34-7 b/‎doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.rst
+34-7
diff --git a/‎doc/api/training/smp_versions/latest.rst
+2-2 b/‎doc/api/training/smp_versions/latest.rst
+2-2
diff --git a/‎src/sagemaker/fw_utils.py
+6-2 b/‎src/sagemaker/fw_utils.py
+6-2
diff --git a/‎src/sagemaker/lambda_helper.py
+26-22 b/‎src/sagemaker/lambda_helper.py
+26-22
diff --git a/‎src/sagemaker/workflow/_utils.py
+12-7 b/‎src/sagemaker/workflow/_utils.py
+12-7
diff --git a/‎src/sagemaker/workflow/callback_step.py
+6-4 b/‎src/sagemaker/workflow/callback_step.py
+6-4
@@ -1,5 +1,37 @@
 # Changelog
 
+## v2.91.1 (2022-05-19)
+
+### Bug Fixes and Other Changes
+
+ * Revert Prevent passing PipelineVariable object into image_uris.retrieve
+
+## v2.91.0 (2022-05-19)
+
+### Features
+
+ * Support Properties for StepCollection
+
+### Bug Fixes and Other Changes
+
+ * Prevent passing PipelineVariable object into image_uris.retrieve
+ * support image_uri being property ref for model
+ * ResourceConflictException from AWS Lambda on pipeline upsert
+
+### Documentation Changes
+
+ * release notes for SMDDP 1.4.1 and SMDMP 1.9.0
+
+## v2.90.0 (2022-05-16)
+
+### Features
+
+ * Add ModelStep for SageMaker Model Building Pipeline
+
+### Bug Fixes and Other Changes
+
+ * update setup.py to add minimum python requirement of 3.6
+
 ## v2.89.0 (2022-05-11)
 
 ### Features
 
@@ -1 +1 @@
-2.89.1.dev0
+2.91.2.dev0
@@ -26,8 +26,8 @@ depending on the version of the library you use.
    <https://docs.aws.amazon.com/sagemaker/latest/dg/data-parallel-use-api.html#data-parallel-use-python-skd-api>`_
    for more information.
 
-Version 1.4.0 (Latest)
-======================
+Version 1.4.0, 1.4.1 (Latest)
+=============================
 
 .. toctree::
    :maxdepth: 1
 
@@ -266,7 +266,7 @@ PyTorch API
       .. note::
 
         The ``no_sync()`` context manager is available from smdistributed-dataparallel v1.2.2.
-        To find the release note, see :ref:`sdp_1.2.2_release_note`.
+        To find the release note, see :ref:`sdp_release_note`.
 
       **Example:**
 
 
@@ -1,4 +1,4 @@
-.. _sdp_1.2.2_release_note:
+.. _sdp_release_note:
 
 #############
 Release Notes
@@ -7,9 +7,45 @@ Release Notes
 New features, bug fixes, and improvements are regularly made to the SageMaker
 distributed data parallel library.
 
-SageMaker Distributed Data Parallel 1.4.0 Release Notes
+SageMaker Distributed Data Parallel 1.4.1 Release Notes
 =======================================================
 
+*Date: May. 3. 2022*
+
+**Currency Updates**
+
+* Added support for PyTorch 1.11.0
+
+**Known Issues**
+
+* The library currently does not support the PyTorch sub-process groups API (torch.distributed.new_group (https://pytorch.org/docs/stable/distributed.html#torch.distributed.new_group)).
+
+
+**Migration to AWS Deep Learning Containers**
+
+This version passed benchmark testing and is migrated to the following AWS Deep Learning Containers (DLC):
+
+- PyTorch 1.11.0 DLC
+
+  .. code::
+
+    763104351884.dkr.ecr.<region>.amazonaws.com/pytorch-training:1.11.0-gpu-py38-cu113-ubuntu20.04-sagemaker
+
+Binary file of this version of the library for custom container users:
+
+  .. code::
+
+    https://smdataparallel.s3.amazonaws.com/binary/pytorch/1.11.0/cu113/2022-04-14/smdistributed_dataparallel-1.4.1-cp38-cp38-linux_x86_64.whl
+
+
+----
+
+Release History
+===============
+
+SageMaker Distributed Data Parallel 1.4.0 Release Notes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 *Date: Feb. 24. 2022*
 
 **New Features**
@@ -72,11 +108,6 @@ This version passed benchmark testing and is migrated to the following AWS Deep
     763104351884.dkr.ecr.<region>.amazonaws.com/pytorch-training:1.10.2-gpu-py38-cu113-ubuntu20.04-sagemaker
 
 
-----
-
-Release History
-===============
-
 SageMaker Distributed Data Parallel 1.2.2 Release Notes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
@@ -5,9 +5,41 @@ Release Notes
 New features, bug fixes, and improvements are regularly made to the SageMaker
 distributed model parallel library.
 
-SageMaker Distributed Model Parallel 1.8.1 Release Notes
+SageMaker Distributed Model Parallel 1.9.0 Release Notes
 ========================================================
 
+*Date: May. 3. 2022*
+
+**Currency Updates**
+
+* Added support for PyTorch 1.11.0
+
+**Migration to AWS Deep Learning Containers**
+
+This version passed benchmark testing and is migrated to the following AWS Deep Learning Containers (DLC):
+
+- PyTorch 1.11.0 DLC
+
+  .. code::
+
+    763104351884.dkr.ecr.<region>.amazonaws.com/pytorch-training:1.11.0-gpu-py38-cu113-ubuntu20.04-sagemaker
+
+Binary file of this version of the library for custom container users:
+
+  .. code::
+
+    https://sagemaker-distributed-model-parallel.s3.us-west-2.amazonaws.com/pytorch-1.11.0/build-artifacts/2022-04-20-17-05/smdistributed_modelparallel-1.9.0-cp38-cp38-linux_x86_64.whl
+
+
+
+----
+
+Release History
+===============
+
+SageMaker Distributed Model Parallel 1.8.1 Release Notes
+--------------------------------------------------------
+
 *Date: April. 23. 2022*
 
 **New Features**
@@ -59,11 +91,6 @@ This version passed benchmark testing and is migrated to the following AWS Deep
       https://sagemaker-distributed-model-parallel.s3.us-west-2.amazonaws.com/pytorch-1.10.0/build-artifacts/2022-04-14-03-58/smdistributed_modelparallel-1.8.1-cp38-cp38-linux_x86_64.whl
 
 
-----
-
-Release History
-===============
-
 SageMaker Distributed Model Parallel 1.8.0 Release Notes
 --------------------------------------------------------
 
@@ -91,7 +118,7 @@ This version passed benchmark testing and is migrated to the following AWS Deep
       763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04
 
 
-* The binary file of this version of the library for custom container users
+The binary file of this version of the library for custom container users:
 
     .. code::
 
 
@@ -10,8 +10,8 @@ depending on which version of the library you need to use.
 To use the library, reference the
 **Common API** documentation alongside the framework specific API documentation.
 
-Version 1.7.0, 1.8.0, 1.8.1 (Latest)
-====================================
+Version 1.7.0, 1.8.0, 1.8.1, 1.9.0 (Latest)
+===========================================
 
 To use the library, reference the Common API documentation alongside the framework specific API documentation.
 
 
@@ -16,6 +16,7 @@
 import logging
 import os
 import re
+import time
 import shutil
 import tempfile
 from collections import namedtuple
@@ -24,6 +25,7 @@
 import sagemaker.image_uris
 from sagemaker.session_settings import SessionSettings
 import sagemaker.utils
+from sagemaker.workflow import is_pipeline_variable
 
 from sagemaker.deprecations import renamed_warning
 
@@ -395,8 +397,10 @@ def model_code_key_prefix(code_location_key_prefix, model_name, image):
     Returns:
         str: the key prefix to be used in uploading code
     """
-    training_job_name = sagemaker.utils.name_from_image(image)
-    return "/".join(filter(None, [code_location_key_prefix, model_name or training_job_name]))
+    name_from_image = f"/model_code/{int(time.time())}"
+    if not is_pipeline_variable(image):
+        name_from_image = sagemaker.utils.name_from_image(image)
+    return "/".join(filter(None, [code_location_key_prefix, model_name or name_from_image]))
 
 
 def warn_if_parameter_server_with_multi_gpu(training_instance_type, distribution):
 
@@ -15,6 +15,7 @@
 
 from io import BytesIO
 import zipfile
+import time
 from botocore.exceptions import ClientError
 from sagemaker.session import Session
 
@@ -134,32 +135,35 @@ def update(self):
         Returns: boto3 response from Lambda's update_function method.
         """
         lambda_client = _get_lambda_client(self.session)
-
-        if self.script is not None:
-            try:
-                response = lambda_client.update_function_code(
-                    FunctionName=self.function_name, ZipFile=_zip_lambda_code(self.script)
-                )
-                return response
-            except ClientError as e:
-                error = e.response["Error"]
-                raise ValueError(error)
-        else:
+        retry_attempts = 7
+        for i in range(retry_attempts):
             try:
-                response = lambda_client.update_function_code(
-                    FunctionName=(self.function_name or self.function_arn),
-                    S3Bucket=self.s3_bucket,
-                    S3Key=_upload_to_s3(
-                        s3_client=_get_s3_client(self.session),
-                        function_name=self.function_name,
-                        zipped_code_dir=self.zipped_code_dir,
-                        s3_bucket=self.s3_bucket,
-                    ),
-                )
+                if self.script is not None:
+                    response = lambda_client.update_function_code(
+                        FunctionName=self.function_name, ZipFile=_zip_lambda_code(self.script)
+                    )
+                else:
+                    response = lambda_client.update_function_code(
+                        FunctionName=(self.function_name or self.function_arn),
+                        S3Bucket=self.s3_bucket,
+                        S3Key=_upload_to_s3(
+                            s3_client=_get_s3_client(self.session),
+                            function_name=self.function_name,
+                            zipped_code_dir=self.zipped_code_dir,
+                            s3_bucket=self.s3_bucket,
+                        ),
+                    )
                 return response
             except ClientError as e:
                 error = e.response["Error"]
-                raise ValueError(error)
+                code = error["Code"]
+                if code == "ResourceConflictException":
+                    if i == retry_attempts - 1:
+                        raise ValueError(error)
+                    # max wait time = 2**0 + 2**1 + .. + 2**6 = 127 seconds
+                    time.sleep(2**i)
+                else:
+                    raise ValueError(error)
 
     def upsert(self):
         """Method to create a lambda function or update it if it already exists
 
@@ -17,7 +17,7 @@
 import shutil
 import tarfile
 import tempfile
-from typing import List, Union, Optional
+from typing import List, Union, Optional, TYPE_CHECKING
 from sagemaker import image_uris
 from sagemaker.inputs import TrainingInput
 from sagemaker.estimator import EstimatorBase
@@ -34,6 +34,9 @@
 from sagemaker.utils import _save_model, download_file_from_url
 from sagemaker.workflow.retry import RetryPolicy
 
+if TYPE_CHECKING:
+    from sagemaker.workflow.step_collections import StepCollection
+
 FRAMEWORK_VERSION = "0.23-1"
 INSTANCE_TYPE = "ml.m5.large"
 REPACK_SCRIPT = "_repack_model.py"
@@ -57,7 +60,7 @@ def __init__(
         description: str = None,
         source_dir: str = None,
         dependencies: List = None,
-        depends_on: Union[List[str], List[Step]] = None,
+        depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None,
         retry_policies: List[RetryPolicy] = None,
         subnets=None,
         security_group_ids=None,
@@ -124,8 +127,9 @@ def __init__(
                         >>>     |------ virtual-env
 
                     This is not supported with "local code" in Local Mode.
-            depends_on (List[str] or List[Step]): A list of step names or instances
-                    this step depends on (default: None).
+            depends_on (List[Union[str, Step, StepCollection]]): The list of `Step`/`StepCollection`
+                names or `Step` instances or `StepCollection` instances that the current `Step`
+                depends on (default: None).
             retry_policies (List[RetryPolicy]): The list of retry policies for the current step
                 (default: None).
             subnets (list[str]): List of subnet ids. If not specified, the re-packing
@@ -274,7 +278,7 @@ def __init__(
         compile_model_family=None,
         display_name: str = None,
         description=None,
-        depends_on: Optional[Union[List[str], List[Step]]] = None,
+        depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None,
         retry_policies: Optional[List[RetryPolicy]] = None,
         tags=None,
         container_def_list=None,
@@ -311,8 +315,9 @@ def __init__(
                 if specified, a compiled model will be used (default: None).
             display_name (str): The display name of this `_RegisterModelStep` step (default: None).
             description (str): Model Package description (default: None).
-            depends_on (List[str] or List[Step]): A list of step names or instances
-                this step depends on (default: None).
+            depends_on (List[Union[str, Step, StepCollection]]): The list of `Step`/`StepCollection`
+                names or `Step` instances or `StepCollection` instances that the current `Step`
+                depends on (default: None).
             retry_policies (List[RetryPolicy]): The list of retry policies for the current step
                 (default: None).
             tags (List[dict[str, str]]): A list of dictionaries containing key-value pairs used to
 
@@ -13,7 +13,7 @@
 """The step definitions for workflow."""
 from __future__ import absolute_import
 
-from typing import List, Dict, Union
+from typing import List, Dict, Union, Optional
 from enum import Enum
 
 import attr
@@ -27,6 +27,7 @@
 from sagemaker.workflow.entities import (
     DefaultEnumMeta,
 )
+from sagemaker.workflow.step_collections import StepCollection
 from sagemaker.workflow.steps import Step, StepTypeEnum, CacheConfig
 
 
@@ -86,7 +87,7 @@ def __init__(
         display_name: str = None,
         description: str = None,
         cache_config: CacheConfig = None,
-        depends_on: Union[List[str], List[Step]] = None,
+        depends_on: Optional[List[Union[str, Step, StepCollection]]] = None,
     ):
         """Constructs a CallbackStep.
 
@@ -99,8 +100,9 @@ def __init__(
             display_name (str): The display name of the callback step.
             description (str): The description of the callback step.
             cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance.
-            depends_on (List[str] or List[Step]): A list of step names or step instances
-                this `sagemaker.workflow.steps.CallbackStep` depends on
+            depends_on (List[Union[str, Step, StepCollection]]): A list of `Step`/`StepCollection`
+                names or `Step` instances or `StepCollection` instances that this `CallbackStep`
+                depends on.
         """
         super(CallbackStep, self).__init__(
             name, display_name, description, StepTypeEnum.CALLBACK, depends_on