aws
diff --git a/‎CHANGELOG.md
Lines changed: 42 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 42 additions & 0 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 1 deletion b/‎VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/api/training/sdp_versions/smd_data_parallel_pytorch.rst renamed to ‎doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_pytorch.rst b/‎doc/api/training/sdp_versions/smd_data_parallel_pytorch.rst renamed to ‎doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_pytorch.rst
diff --git a/‎doc/api/training/sdp_versions/smd_data_parallel_tensorflow.rst renamed to ‎doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_tensorflow.rst b/‎doc/api/training/sdp_versions/smd_data_parallel_tensorflow.rst renamed to ‎doc/api/training/sdp_versions/v1.0.0/smd_data_parallel_tensorflow.rst
diff --git a/‎doc/api/training/sdp_versions/v1_0_0.rst
Lines changed: 9 additions & 0 deletions b/‎doc/api/training/sdp_versions/v1_0_0.rst
Lines changed: 9 additions & 0 deletions
diff --git a/‎doc/api/training/smd_data_parallel.rst
Lines changed: 69 additions & 35 deletions b/‎doc/api/training/smd_data_parallel.rst
Lines changed: 69 additions & 35 deletions
diff --git a/‎doc/api/training/smd_data_parallel_release_notes/smd_data_parallel_change_log.md
Lines changed: 20 additions & 0 deletions b/‎doc/api/training/smd_data_parallel_release_notes/smd_data_parallel_change_log.md
Lines changed: 20 additions & 0 deletions
diff --git a/‎doc/api/training/smd_model_parallel.rst
Lines changed: 9 additions & 9 deletions b/‎doc/api/training/smd_model_parallel.rst
Lines changed: 9 additions & 9 deletions
diff --git a/‎doc/api/training/smp_versions/v1.2.0/smd_model_parallel_pytorch.rst
Lines changed: 3 additions & 3 deletions b/‎doc/api/training/smp_versions/v1.2.0/smd_model_parallel_pytorch.rst
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/sagemaker/estimator.py
Lines changed: 3 additions & 2 deletions b/‎src/sagemaker/estimator.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/sagemaker/fw_utils.py
Lines changed: 15 additions & 0 deletions b/‎src/sagemaker/fw_utils.py
Lines changed: 15 additions & 0 deletions
diff --git a/‎src/sagemaker/processing.py
Lines changed: 5 additions & 0 deletions b/‎src/sagemaker/processing.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/sagemaker/workflow/functions.py
Lines changed: 46 additions & 0 deletions b/‎src/sagemaker/workflow/functions.py
Lines changed: 46 additions & 0 deletions
diff --git a/‎tests/data/multimodel/container/Dockerfile
Lines changed: 1 addition & 1 deletion b/‎tests/data/multimodel/container/Dockerfile
Lines changed: 1 addition & 1 deletion
@@ -1,5 +1,47 @@
 # Changelog
 
+## v2.24.3 (2021-02-04)
+
+### Bug Fixes and Other Changes
+
+ * Remove pytest fixture and fix test_tag/s method
+
+## v2.24.2 (2021-02-03)
+
+### Bug Fixes and Other Changes
+
+ * use 3.5 version of get-pip.py
+ * SM DDP release notes/changelog files
+
+### Documentation Changes
+
+ * adding versioning to sm distributed data parallel docs
+
+## v2.24.1 (2021-01-28)
+
+### Bug Fixes and Other Changes
+
+ * fix collect-tests tox env
+ * create profiler specific unsupported regions
+ * Update smd_model_parallel_pytorch.rst
+
+## v2.24.0 (2021-01-22)
+
+### Features
+
+ * add support for Std:Join for pipelines
+ * Map image name to image uri
+ * friendly names for short URIs
+
+### Bug Fixes and Other Changes
+
+ * increase allowed time for search to get updated
+ * refactor distribution config construction
+
+### Documentation Changes
+
+ * Add SMP 1.2.0 API docs
+
 ## v2.23.6 (2021-01-20)
 
 ### Bug Fixes and Other Changes
 
@@ -1 +1 @@
-2.23.7.dev0
+2.24.4.dev0
@@ -0,0 +1,9 @@
+
+Version 1.0.0 (Latest)
+======================
+
+.. toctree::
+   :maxdepth: 1
+
+   v1.0.0/smd_data_parallel_pytorch.rst
+   v1.0.0/smd_data_parallel_tensorflow.rst
@@ -6,39 +6,36 @@ SageMaker's distributed data parallel library extends SageMaker’s training
 capabilities on deep learning models with near-linear scaling efficiency,
 achieving fast time-to-train with minimal code changes.
 
-- optimizes your training job for AWS network infrastructure and EC2 instance topology.
-- takes advantage of gradient update to communicate between nodes with a custom AllReduce algorithm.
-
 When training a model on a large amount of data, machine learning practitioners
 will often turn to distributed training to reduce the time to train.
 In some cases, where time is of the essence,
 the business requirement is to finish training as quickly as possible or at
 least within a constrained time period.
 Then, distributed training is scaled to use a cluster of multiple nodes,
 meaning not just multiple GPUs in a computing instance, but multiple instances
-with multiple GPUs. As the cluster size increases, so does the significant drop
-in performance. This drop in performance is primarily caused the communications
-overhead between nodes in a cluster.
+with multiple GPUs. However, as the cluster size increases, it is possible to see a significant drop
+in performance due to communications overhead between nodes in a cluster.
 
-.. important::
-   The distributed data parallel library only supports training jobs using CUDA 11. When you define a PyTorch or TensorFlow
-   ``Estimator`` with ``dataparallel`` parameter ``enabled`` set to ``True``,
-   it uses CUDA 11. When you extend or customize your own training image
-   you must use a CUDA 11 base image. See
-   `SageMaker Python SDK's distributed data parallel library APIs
-   <https://docs.aws.amazon.com/sagemaker/latest/dg/data-parallel-use-api.html#data-parallel-use-python-skd-api>`__
-   for more information.
+SageMaker's distributed data parallel library addresses communications overhead in two ways:
 
-.. rubric:: Customize your training script
+1. The library performs AllReduce, a key operation during distributed training that is responsible for a
+   large portion of communication overhead.
+2. The library performs optimized node-to-node communication by fully utilizing AWS’s network
+   infrastructure and Amazon EC2 instance topology.
 
-To customize your own training script, you will need the following:
+To learn more about the core features of this library, see
+`Introduction to SageMaker's Distributed Data Parallel Library
+<https://docs.aws.amazon.com/sagemaker/latest/dg/data-parallel-intro.html>`_
+in the SageMaker Developer Guide.
 
-.. raw:: html
+Use with the SageMaker Python SDK
+=================================
 
-   <div data-section-style="5" style="">
+To use the SageMaker distributed data parallel library with the SageMaker Python SDK, you will need the following:
 
--  You must provide TensorFlow / PyTorch training scripts that are
-   adapted to use the distributed data parallel library.
+-  A TensorFlow or PyTorch training script that is
+   adapted to use the distributed data parallel library. The :ref:`sdp_api_docs` includes
+   framework specific examples of training scripts that are adapted to use this library.
 -  Your input data must be in an S3 bucket or in FSx in the AWS region
    that you will use to launch your training job. If you use the Jupyter
    notebooks provided, create a SageMaker notebook instance in the same
@@ -47,26 +44,63 @@ To customize your own training script, you will need the following:
    the `SageMaker Python SDK data
    inputs <https://sagemaker.readthedocs.io/en/stable/overview.html#use-file-systems-as-training-inputs>`__ documentation.
 
-.. raw:: html
+When you define
+a Pytorch or TensorFlow ``Estimator`` using the SageMaker Python SDK,
+you must select ``dataparallel`` as your ``distribution`` strategy:
+
+.. code::
+
+   distribution = { "smdistributed": { "dataparallel": { "enabled": True } } }
+
+We recommend you use one of the example notebooks as your template to launch a training job. When
+you use an example notebook you’ll need to swap your training script with the one that came with the
+notebook and modify any input functions as necessary. For instructions on how to get started using a
+Jupyter Notebook example, see `Distributed Training Jupyter Notebook Examples
+<https://docs.aws.amazon.com/sagemaker/latest/dg/distributed-training-notebook-examples.html>`_.
+
+Once you have launched a training job, you can monitor it using CloudWatch. To learn more, see
+`Monitor and Analyze Training Jobs Using Metrics
+<https://docs.aws.amazon.com/sagemaker/latest/dg/training-metrics.html>`_.
+
 
-   </div>
+After you train a model, you can see how to deploy your trained model to an endpoint for inference by
+following one of the `example notebooks for deploying a model
+<https://sagemaker-examples.readthedocs.io/en/latest/inference/index.html>`_.
+For more information, see `Deploy Models for Inference
+<https://docs.aws.amazon.com/sagemaker/latest/dg/deploy-model.html>`_.
 
-Use the API guides for each framework to see
-examples of training scripts that can be used to convert your training scripts.
-Then use one of the example notebooks as your template to launch a training job.
-You’ll need to swap your training script with the one that came with the
-notebook and modify any input functions as necessary.
-Once you have launched a training job, you can monitor it using CloudWatch.
+.. _sdp_api_docs:
 
-Then you can see how to deploy your trained model to an endpoint by
-following one of the example notebooks for deploying a model. Finally,
-you can follow an example notebook to test inference on your deployed
-model.
+API Documentation
+=================
 
+This section contains the SageMaker distributed data parallel API documentation. If you are a
+new user of this library, it is recommended you use this guide alongside
+`SageMaker's Distributed Data Parallel Library
+<https://docs.aws.amazon.com/sagemaker/latest/dg/data-parallel.html>`_.
 
+Select a version to see the API documentation for version.
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
+
+   sdp_versions/v1_0_0.rst
+
+.. important::
+   The distributed data parallel library only supports training jobs using CUDA 11. When you define a PyTorch or TensorFlow
+   ``Estimator`` with ``dataparallel`` parameter ``enabled`` set to ``True``,
+   it uses CUDA 11. When you extend or customize your own training image
+   you must use a CUDA 11 base image. See
+   `SageMaker Python SDK's distributed data parallel library APIs
+   <https://docs.aws.amazon.com/sagemaker/latest/dg/data-parallel-use-api.html#data-parallel-use-python-skd-api>`_
+   for more information.
+
+
+Release Notes
+=============
+
+New features, bug fixes, and improvements are regularly made to the SageMaker distributed data parallel library.
 
-   sdp_versions/smd_data_parallel_pytorch
-   sdp_versions/smd_data_parallel_tensorflow
+To see the the latest changes made to the library, refer to the library
+`Release Notes
+<https://github.com/aws/sagemaker-python-sdk/blob/master/doc/api/training/smd_data_parallel_release_notes/>`_.
@@ -0,0 +1,20 @@
+# Sagemaker Distributed Data Parallel 1.0.0 Release Notes
+
+- First Release
+- Getting Started
+
+## First Release
+
+SageMaker's distributed data parallel library extends SageMaker’s training
+capabilities on deep learning models with near-linear scaling efficiency,
+achieving fast time-to-train with minimal code changes.
+SageMaker Distributed Data Parallel:
+
+- optimizes your training job for AWS network infrastructure and EC2 instance topology.
+- takes advantage of gradient update to communicate between nodes with a custom AllReduce algorithm.
+
+The library currently supports TensorFlow v2 and PyTorch via [AWS Deep Learning Containers](https://aws.amazon.com/machine-learning/containers/).
+
+## Getting Started
+
+For getting started, refer to [SageMaker Distributed Data Parallel Python SDK Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/data-parallel-use-api.html#data-parallel-use-python-skd-api).
@@ -11,15 +11,6 @@ across multiple GPUs with minimal code changes. The library's API can be accesse
 
 Use the following sections to learn more about the model parallelism and the library.
 
-.. important::
-   The model parallel library only supports training jobs using CUDA 11. When you define a PyTorch or TensorFlow
-   ``Estimator`` with ``modelparallel`` parameter ``enabled`` set to ``True``,
-   it uses CUDA 11. When you extend or customize your own training image
-   you must use a CUDA 11 base image. See
-   `Extend or Adapt A Docker Container that Contains the Model Parallel Library
-   <https://integ-docs-aws.amazon.com/sagemaker/latest/dg/model-parallel-use-api.html#model-parallel-customize-container>`__
-   for more information.
-
 Use with the SageMaker Python SDK
 =================================
 
@@ -61,6 +52,15 @@ developer guide. This developer guide documentation includes:
       <https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-customize-tips-pitfalls.html>`__
 
 
+.. important::
+   The model parallel library only supports training jobs using CUDA 11. When you define a PyTorch or TensorFlow
+   ``Estimator`` with ``modelparallel`` parameter ``enabled`` set to ``True``,
+   it uses CUDA 11. When you extend or customize your own training image
+   you must use a CUDA 11 base image. See
+   `Extend or Adapt A Docker Container that Contains the Model Parallel Library
+   <https://integ-docs-aws.amazon.com/sagemaker/latest/dg/model-parallel-use-api.html#model-parallel-customize-container>`__
+   for more information.
+
 Release Notes
 =============
 
 
@@ -140,16 +140,16 @@ This API document assumes you use the following import statements in your traini
       computation. \ ``bucket_cap_mb``\ controls the bucket size in MegaBytes
       (MB).
 
-    - ``trace_memory_usage`` (default: False): When set to True, the library attempts
+   -  ``trace_memory_usage`` (default: False): When set to True, the library attempts
       to measure memory usage per module during tracing. If this is disabled,
       memory usage will be estimated through the sizes of tensors returned from
       the module.
 
-    - ``broadcast_buffers`` (default: True): Flag to be used with ``ddp=True``.
+   -  ``broadcast_buffers`` (default: True): Flag to be used with ``ddp=True``.
       This parameter is forwarded to the underlying ``DistributedDataParallel`` wrapper.
       Please see: `broadcast_buffer <https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html#torch.nn.parallel.DistributedDataParallel>`__.
 
-    - ``gradient_as_bucket_view (PyTorch 1.7 only)`` (default: False): To be
+   -  ``gradient_as_bucket_view (PyTorch 1.7 only)`` (default: False): To be
       used with ``ddp=True``. This parameter is forwarded to the underlying
       ``DistributedDataParallel`` wrapper. Please see `gradient_as_bucket_view <https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html#torch.nn.parallel.DistributedDataParallel>`__.
 
 
@@ -49,6 +49,7 @@
     UploadedCode,
     validate_source_dir,
     _region_supports_debugger,
+    _region_supports_profiler,
     get_mp_parameters,
 )
 from sagemaker.inputs import TrainingInput
@@ -494,7 +495,7 @@ def _prepare_profiler_for_training(self):
         """Set necessary values and do basic validations in profiler config and profiler rules.
 
         When user explicitly set rules to an empty list, default profiler rule won't be enabled.
-        Default profiler rule will be enabled when either:
+        Default profiler rule will be enabled in supported regions when either:
         1. user doesn't specify any rules, i.e., rules=None; or
         2. user only specify debugger rules, i.e., rules=[Rule.sagemaker(...)]
         """
@@ -503,7 +504,7 @@ def _prepare_profiler_for_training(self):
                 raise RuntimeError("profiler_config cannot be set when disable_profiler is True.")
             if self.profiler_rules:
                 raise RuntimeError("ProfilerRule cannot be set when disable_profiler is True.")
-        elif _region_supports_debugger(self.sagemaker_session.boto_region_name):
+        elif _region_supports_profiler(self.sagemaker_session.boto_region_name):
             if self.profiler_config is None:
                 self.profiler_config = ProfilerConfig(s3_output_path=self.output_path)
             if self.rules is None or (self.rules and not self.profiler_rules):
 
@@ -49,6 +49,8 @@
 )
 
 DEBUGGER_UNSUPPORTED_REGIONS = ("us-iso-east-1",)
+PROFILER_UNSUPPORTED_REGIONS = ("us-iso-east-1", "cn-north-1", "cn-northwest-1")
+
 SINGLE_GPU_INSTANCE_TYPES = ("ml.p2.xlarge", "ml.p3.2xlarge")
 SM_DATAPARALLEL_SUPPORTED_INSTANCE_TYPES = (
     "ml.p3.16xlarge",
@@ -550,6 +552,19 @@ def _region_supports_debugger(region_name):
     return region_name.lower() not in DEBUGGER_UNSUPPORTED_REGIONS
 
 
+def _region_supports_profiler(region_name):
+    """Returns bool indicating whether region supports Amazon SageMaker Debugger profiling feature.
+
+    Args:
+        region_name (str): Name of the region to check against.
+
+    Returns:
+        bool: Whether or not the region supports Amazon SageMaker Debugger profiling feature.
+
+    """
+    return region_name.lower() not in PROFILER_UNSUPPORTED_REGIONS
+
+
 def validate_version_or_image_args(framework_version, py_version, image_uri):
     """Checks if version or image arguments are specified.
 
 
@@ -31,6 +31,7 @@
 from sagemaker.session import Session
 from sagemaker.network import NetworkConfig  # noqa: F401 # pylint: disable=unused-import
 from sagemaker.workflow.properties import Properties
+from sagemaker.workflow.entities import Expression
 from sagemaker.dataset_definition.inputs import S3Input, DatasetDefinition
 from sagemaker.apiutils._base_types import ApiObject
 
@@ -338,6 +339,10 @@ def _normalize_outputs(self, outputs=None):
                 # Generate a name for the ProcessingOutput if it doesn't have one.
                 if output.output_name is None:
                     output.output_name = "output-{}".format(count)
+                # if the output's destination is a workflow expression, do no normalization
+                if isinstance(output.destination, Expression):
+                    normalized_outputs.append(output)
+                    continue
                 # If the output's destination is not an s3_uri, create one.
                 parse_result = urlparse(output.destination)
                 if parse_result.scheme != "s3":
 
@@ -0,0 +1,46 @@
+# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""The step definitions for workflow."""
+from __future__ import absolute_import
+
+from typing import List
+
+import attr
+
+from sagemaker.workflow.entities import Expression
+
+
+@attr.s
+class Join(Expression):
+    """Join together properties.
+
+    Attributes:
+        values (List[Union[PrimitiveType, Parameter]]): The primitive types
+            and parameters to join.
+        on_str (str): The string to join the values on (Defaults to "").
+    """
+
+    on: str = attr.ib(factory=str)
+    values: List = attr.ib(factory=list)
+
+    @property
+    def expr(self):
+        """The expression dict for a `Join` function."""
+        return {
+            "Std:Join": {
+                "On": self.on,
+                "Values": [
+                    value.expr if hasattr(value, "expr") else value for value in self.values
+                ],
+            },
+        }
@@ -15,7 +15,7 @@ RUN apt-get update && \
     curl \
     vim \
     && rm -rf /var/lib/apt/lists/* \
-    && curl -O https://bootstrap.pypa.io/get-pip.py \
+    && curl -O https://bootstrap.pypa.io/3.5/get-pip.py \
     && python3 get-pip.py
 
 RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1