Merge branch 'dev' of https://github.com/aws/sagemaker-python-sdk into smddp-1.4.0-doc

mchoi8739 · mchoi8739 · commit 8095031348aa · 2022-03-08T12:03:11.000-08:00
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -5,7 +5,7 @@
 version: 2
 
 python:
-  version: 3.6
+  version: 3.9
   install:
     - method: pip
       path: .
diff --git a/doc/api/training/sdp_versions/latest/smd_data_parallel_tensorflow.rst b/doc/api/training/sdp_versions/latest/smd_data_parallel_tensorflow.rst
@@ -245,16 +245,25 @@ TensorFlow API
 
 .. function:: smdistributed.dataparallel.tensorflow.allreduce(tensor, param_index, num_params, compression=Compression.none, op=ReduceOp.AVERAGE)
 
-   Performs an all-reduce operation on a tensor (``tf.Tensor``).
+   Performs an ``allreduce`` operation on a tensor (``tf.Tensor``).
+
+   The ``smdistributed.dataparallel`` package's AllReduce API for TensorFlow to allreduce
+   gradient tensors. By default, ``smdistributed.dataparallel`` allreduce averages the
+   gradient tensors across participating workers.
+
+   .. note::
+
+    :class:`smdistributed.dataparallel.tensorflow.allreduce()` should
+    only be used to allreduce gradient tensors.
+    For other (non-gradient) tensors, you must use
+    :class:`smdistributed.dataparallel.tensorflow.oob_allreduce()`.
+    If you use :class:`smdistributed.dataparallel.tensorflow.allreduce()`
+    for non-gradient tensors,
+    the distributed training job might stall or stop.
 
-   ``smdistributed.dataparallel`` AllReduce API can be used for all
-   reducing gradient tensors or any other tensors. By
-   default, ``smdistributed.dataparallel`` AllReduce averages the
-   tensors across the participating workers.
-   ​
    **Inputs:**
 
-   - ``tensor (tf.Tensor)(required)``: The tensor to be all-reduced. The shape of the input must be identical across all ranks.
+   - ``tensor (tf.Tensor)(required)``: The tensor to be allreduced. The shape of the input must be identical across all ranks.
    - ``param_index (int)(required):`` 0 if you are reducing a single tensor. Index of the tensor if you are reducing a list of tensors.
    - ``num_params (int)(required):`` len(tensor).
    - ``compression (smdistributed.dataparallel.tensorflow.Compression)(optional)``: Compression algorithm used to reduce the amount of data sent and received by each worker node. Defaults to not using compression.
@@ -308,9 +317,9 @@ TensorFlow API
 
 .. function:: smdistributed.dataparallel.tensorflow.oob_allreduce(tensor, compression=Compression.none, op=ReduceOp.AVERAGE)
 
-   OutOfBand (oob) AllReduce is simplified AllReduce function for use cases
+   Out-of-band (oob) AllReduce is simplified AllReduce function for use-cases
    such as calculating total loss across all the GPUs in the training.
-   oob_allreduce average the tensors, as reduction operation, across the
+   ``oob_allreduce`` average the tensors, as reduction operation, across the
    worker nodes.
 
    **Inputs:**
@@ -328,15 +337,25 @@ TensorFlow API
 
    -  ``None``
 
-   .. rubric:: Notes
-
-   ``smdistributed.dataparallel.tensorflow.oob_allreduce``, in most
-   cases, is ~2x slower
-   than ``smdistributed.dataparallel.tensorflow.allreduce``  so it is not
-   recommended to be used for performing gradient reduction during the
-   training
-   process. ``smdistributed.dataparallel.tensorflow.oob_allreduce`` internally
-   uses NCCL AllReduce with ``ncclSum`` as the reduction operation.
+   .. note::
+
+      In most cases, the :class:`smdistributed.dataparallel.tensorflow.oob_allreduce()`
+      function is ~2x slower
+      than :class:`smdistributed.dataparallel.tensorflow.allreduce()`. It is not
+      recommended to use the :class:`smdistributed.dataparallel.tensorflow.oob_allreduce()`
+      function for performing gradient
+      reduction during the training process.
+      ``smdistributed.dataparallel.tensorflow.oob_allreduce`` internally
+      uses NCCL AllReduce with ``ncclSum`` as the reduction operation.
+
+   .. note::
+
+      :class:`smdistributed.dataparallel.tensorflow.oob_allreduce()` should
+      only be used to allreduce non-gradient tensors.
+      If you use :class:`smdistributed.dataparallel.tensorflow.allreduce()`
+      for non-gradient tensors,
+      the distributed training job might stall or stop.
+      To allreduce gradients, use :class:`smdistributed.dataparallel.tensorflow.allreduce()`.
 
 
 .. function:: smdistributed.dataparallel.tensorflow.overlap(tensor)
diff --git a/doc/conf.py b/doc/conf.py
@@ -10,7 +10,7 @@
 # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
-"""Placeholder docstring"""
+"""Configuration for generating readthedocs docstrings."""
 from __future__ import absolute_import
 
 import pkg_resources
diff --git a/src/sagemaker/huggingface/estimator.py b/src/sagemaker/huggingface/estimator.py
@@ -50,14 +50,15 @@ def __init__(
         compiler_config=None,
         **kwargs,
     ):
-        """This ``Estimator`` executes a HuggingFace script in a managed execution environment.
+        """This estimator runs a Hugging Face training script in a SageMaker training environment.
 
-        The managed HuggingFace environment is an Amazon-built Docker container that executes
-        functions defined in the supplied ``entry_point`` Python script within a SageMaker
-        Training Job.
+        The estimator initiates the SageMaker-managed Hugging Face environment
+        by using the pre-built Hugging Face Docker container and runs
+        the Hugging Face training script that user provides through
+        the ``entry_point`` argument.
 
-        Training is started by calling
-        :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator.
+        After configuring the estimator class, use the class method
+        :meth:`~sagemaker.amazon.estimator.Framework.fit()` to start a training job.
 
         Args:
             py_version (str): Python version you want to use for executing your model training
diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py
@@ -466,7 +466,7 @@ def _upload_code(self, key_prefix: str, repack: bool = False) -> None:
             )
 
     def _script_mode_env_vars(self):
-        """Placeholder docstring"""
+        """Returns a mapping of environment variables for script mode execution"""
         script_name = None
         dir_name = None
         if self.uploaded_code:
@@ -478,8 +478,11 @@ def _script_mode_env_vars(self):
         elif self.entry_point is not None:
             script_name = self.entry_point
             if self.source_dir is not None:
-                dir_name = "file://" + self.source_dir
-
+                dir_name = (
+                    self.source_dir
+                    if self.source_dir.startswith("s3://")
+                    else "file://" + self.source_dir
+                )
         return {
             SCRIPT_PARAM_NAME.upper(): script_name or str(),
             DIR_PARAM_NAME.upper(): dir_name or str(),
diff --git a/src/sagemaker/training_compiler/config.py b/src/sagemaker/training_compiler/config.py
@@ -18,11 +18,7 @@
 
 
 class TrainingCompilerConfig(object):
-    """The configuration class for accelerating SageMaker training jobs through compilation.
-
-    SageMaker Training Compiler speeds up training by optimizing the model execution graph.
-
-    """
+    """The SageMaker Training Compiler configuration class."""
 
     DEBUG_PATH = "/opt/ml/output/data/compiler/"
     SUPPORTED_INSTANCE_CLASS_PREFIXES = ["p3", "g4dn", "p4"]
@@ -37,9 +33,15 @@ def __init__(
     ):
         """This class initializes a ``TrainingCompilerConfig`` instance.
 
-        Pass the output of it to the ``compiler_config``
+        `Amazon SageMaker Training Compiler
+        <https://docs.aws.amazon.com/sagemaker/latest/dg/training-compiler.html>`_
+        is a feature of SageMaker Training
+        and speeds up training jobs by optimizing model execution graphs.
+
+        You can compile Hugging Face models
+        by passing the object of this configuration class to the ``compiler_config``
         parameter of the :class:`~sagemaker.huggingface.HuggingFace`
-        class.
+        estimator.
 
         Args:
             enabled (bool): Optional. Switch to enable SageMaker Training Compiler.
@@ -48,13 +50,28 @@ def __init__(
                 This comes with a potential performance slowdown.
                 The default is ``False``.
 
-        **Example**: The following example shows the basic ``compiler_config``
-        parameter configuration, enabling compilation with default parameter values.
+        **Example**: The following code shows the basic usage of the
+        :class:`sagemaker.huggingface.TrainingCompilerConfig()` class
+        to run a HuggingFace training job with the compiler.
 
         .. code-block:: python
 
-            from sagemaker.huggingface import TrainingCompilerConfig
-            compiler_config = TrainingCompilerConfig()
+            from sagemaker.huggingface import HuggingFace, TrainingCompilerConfig
+
+            huggingface_estimator=HuggingFace(
+                ...
+                compiler_config=TrainingCompilerConfig()
+            )
+
+        .. seealso::
+
+            For more information about how to enable SageMaker Training Compiler
+            for various training settings such as using TensorFlow-based models,
+            PyTorch-based models, and distributed training,
+            see `Enable SageMaker Training Compiler
+            <https://docs.aws.amazon.com/sagemaker/latest/dg/training-compiler-enable.html>`_
+            in the `Amazon SageMaker Training Compiler developer guide
+            <https://docs.aws.amazon.com/sagemaker/latest/dg/training-compiler.html>`_.
 
         """