aws
diff --git a/‎.readthedocs.yml
Lines changed: 1 addition & 1 deletion b/‎.readthedocs.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/api/training/sdp_versions/latest/smd_data_parallel_tensorflow.rst
Lines changed: 37 additions & 18 deletions b/‎doc/api/training/sdp_versions/latest/smd_data_parallel_tensorflow.rst
Lines changed: 37 additions & 18 deletions
diff --git a/‎doc/conf.py
Lines changed: 1 addition & 1 deletion b/‎doc/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/huggingface/estimator.py
Lines changed: 7 additions & 6 deletions b/‎src/sagemaker/huggingface/estimator.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎src/sagemaker/image_uri_config/neo-tensorflow.json
Lines changed: 32 additions & 1 deletion b/‎src/sagemaker/image_uri_config/neo-tensorflow.json
Lines changed: 32 additions & 1 deletion
diff --git a/‎src/sagemaker/model.py
Lines changed: 6 additions & 3 deletions b/‎src/sagemaker/model.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎src/sagemaker/serializers.py
Lines changed: 34 additions & 1 deletion b/‎src/sagemaker/serializers.py
Lines changed: 34 additions & 1 deletion
diff --git a/‎src/sagemaker/training_compiler/config.py
Lines changed: 28 additions & 11 deletions b/‎src/sagemaker/training_compiler/config.py
Lines changed: 28 additions & 11 deletions
diff --git a/‎src/sagemaker/workflow/steps.py
Lines changed: 2 additions & 0 deletions b/‎src/sagemaker/workflow/steps.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/data/cuteCat.raw
6.43 KB b/‎tests/data/cuteCat.raw
6.43 KB
diff --git a/‎tests/integ/__init__.py
Lines changed: 0 additions & 6 deletions b/‎tests/integ/__init__.py
Lines changed: 0 additions & 6 deletions
diff --git a/‎tests/integ/sagemaker/lineage/conftest.py
Lines changed: 3 additions & 1 deletion b/‎tests/integ/sagemaker/lineage/conftest.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎tests/integ/sagemaker/workflow/__init__.py b/‎tests/integ/sagemaker/workflow/__init__.py
@@ -5,7 +5,7 @@
 version: 2
 
 python:
-  version: 3.6
+  version: 3.9
   install:
     - method: pip
       path: .
 
@@ -243,16 +243,25 @@ TensorFlow API
 
 .. function:: smdistributed.dataparallel.tensorflow.allreduce(tensor, param_index, num_params, compression=Compression.none, op=ReduceOp.AVERAGE)
 
-   Performs an all-reduce operation on a tensor (``tf.Tensor``).
+   Performs an ``allreduce`` operation on a tensor (``tf.Tensor``).
+
+   The ``smdistributed.dataparallel`` package's AllReduce API for TensorFlow to allreduce
+   gradient tensors. By default, ``smdistributed.dataparallel`` allreduce averages the
+   gradient tensors across participating workers.
+
+   .. note::
+
+    :class:`smdistributed.dataparallel.tensorflow.allreduce()` should
+    only be used to allreduce gradient tensors.
+    For other (non-gradient) tensors, you must use
+    :class:`smdistributed.dataparallel.tensorflow.oob_allreduce()`.
+    If you use :class:`smdistributed.dataparallel.tensorflow.allreduce()`
+    for non-gradient tensors,
+    the distributed training job might stall or stop.
 
-   ``smdistributed.dataparallel`` AllReduce API can be used for all
-   reducing gradient tensors or any other tensors. By
-   default, ``smdistributed.dataparallel`` AllReduce averages the
-   tensors across the participating workers.
-   
    **Inputs:**
 
-   - ``tensor (tf.Tensor)(required)``: The tensor to be all-reduced. The shape of the input must be identical across all ranks.
+   - ``tensor (tf.Tensor)(required)``: The tensor to be allreduced. The shape of the input must be identical across all ranks.
    - ``param_index (int)(required):`` 0 if you are reducing a single tensor. Index of the tensor if you are reducing a list of tensors.
    - ``num_params (int)(required):`` len(tensor).
    - ``compression (smdistributed.dataparallel.tensorflow.Compression)(optional)``: Compression algorithm used to reduce the amount of data sent and received by each worker node. Defaults to not using compression.
@@ -306,9 +315,9 @@ TensorFlow API
 
 .. function:: smdistributed.dataparallel.tensorflow.oob_allreduce(tensor, compression=Compression.none, op=ReduceOp.AVERAGE)
 
-   OutOfBand (oob) AllReduce is simplified AllReduce function for use cases
+   Out-of-band (oob) AllReduce is simplified AllReduce function for use-cases
    such as calculating total loss across all the GPUs in the training.
-   oob_allreduce average the tensors, as reduction operation, across the
+   ``oob_allreduce`` average the tensors, as reduction operation, across the
    worker nodes.
 
    **Inputs:**
@@ -326,15 +335,25 @@ TensorFlow API
 
    -  ``None``
 
-   .. rubric:: Notes
-
-   ``smdistributed.dataparallel.tensorflow.oob_allreduce``, in most
-   cases, is ~2x slower
-   than ``smdistributed.dataparallel.tensorflow.allreduce``  so it is not
-   recommended to be used for performing gradient reduction during the
-   training
-   process. ``smdistributed.dataparallel.tensorflow.oob_allreduce`` internally
-   uses NCCL AllReduce with ``ncclSum`` as the reduction operation.
+   .. note::
+
+      In most cases, the :class:`smdistributed.dataparallel.tensorflow.oob_allreduce()`
+      function is ~2x slower
+      than :class:`smdistributed.dataparallel.tensorflow.allreduce()`. It is not
+      recommended to use the :class:`smdistributed.dataparallel.tensorflow.oob_allreduce()`
+      function for performing gradient
+      reduction during the training process.
+      ``smdistributed.dataparallel.tensorflow.oob_allreduce`` internally
+      uses NCCL AllReduce with ``ncclSum`` as the reduction operation.
+
+   .. note::
+
+      :class:`smdistributed.dataparallel.tensorflow.oob_allreduce()` should
+      only be used to allreduce non-gradient tensors.
+      If you use :class:`smdistributed.dataparallel.tensorflow.allreduce()`
+      for non-gradient tensors,
+      the distributed training job might stall or stop.
+      To allreduce gradients, use :class:`smdistributed.dataparallel.tensorflow.allreduce()`.
 
 
 .. function:: smdistributed.dataparallel.tensorflow.overlap(tensor)
 
@@ -10,7 +10,7 @@
 # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
-"""Placeholder docstring"""
+"""Configuration for generating readthedocs docstrings."""
 from __future__ import absolute_import
 
 import pkg_resources
 
@@ -50,14 +50,15 @@ def __init__(
         compiler_config=None,
         **kwargs,
     ):
-        """This ``Estimator`` executes a HuggingFace script in a managed execution environment.
+        """This estimator runs a Hugging Face training script in a SageMaker training environment.
 
-        The managed HuggingFace environment is an Amazon-built Docker container that executes
-        functions defined in the supplied ``entry_point`` Python script within a SageMaker
-        Training Job.
+        The estimator initiates the SageMaker-managed Hugging Face environment
+        by using the pre-built Hugging Face Docker container and runs
+        the Hugging Face training script that user provides through
+        the ``entry_point`` argument.
 
-        Training is started by calling
-        :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator.
+        After configuring the estimator class, use the class method
+        :meth:`~sagemaker.amazon.estimator.Framework.fit()` to start a training job.
 
         Args:
             py_version (str): Python version you want to use for executing your model training
 
@@ -12,7 +12,8 @@
         "1.11.0": "1.15.3",
         "1.12.0": "1.15.3",
         "1.13.0": "1.15.3",
-        "1.14.0": "1.15.3"
+        "1.14.0": "1.15.3",
+        "2.4.2": "2.4.2"
     },
     "versions": {
         "1.15.3": {
@@ -44,6 +45,36 @@
                 "us-west-2": "301217895009"
             },
             "repository": "sagemaker-inference-tensorflow"
+        },
+        "2.4.2": {
+            "py_versions": ["py3"],
+            "registries": {
+                "af-south-1": "774647643957",
+                "ap-east-1": "110948597952",
+                "ap-northeast-1": "941853720454",
+                "ap-northeast-2": "151534178276",
+                "ap-northeast-3": "925152966179",
+                "ap-south-1": "763008648453",
+                "ap-southeast-1": "324986816169",
+                "ap-southeast-2": "355873309152",
+                "ca-central-1": "464438896020",
+                "cn-north-1": "472730292857",
+                "cn-northwest-1": "474822919863",
+                "eu-central-1": "746233611703",
+                "eu-north-1": "601324751636",
+                "eu-south-1": "966458181534",
+                "eu-west-1": "802834080501",
+                "eu-west-2": "205493899709",
+                "eu-west-3": "254080097072",
+                "me-south-1": "836785723513",
+                "sa-east-1": "756306329178",
+                "us-east-1": "785573368785",
+                "us-east-2": "007439368137",
+                "us-gov-west-1": "263933020539",
+                "us-west-1": "710691900526",
+                "us-west-2": "301217895009"
+            },
+            "repository": "sagemaker-inference-tensorflow"
         }
     }
 }
@@ -466,7 +466,7 @@ def _upload_code(self, key_prefix: str, repack: bool = False) -> None:
             )
 
     def _script_mode_env_vars(self):
-        """Placeholder docstring"""
+        """Returns a mapping of environment variables for script mode execution"""
         script_name = None
         dir_name = None
         if self.uploaded_code:
@@ -478,8 +478,11 @@ def _script_mode_env_vars(self):
         elif self.entry_point is not None:
             script_name = self.entry_point
             if self.source_dir is not None:
-                dir_name = "file://" + self.source_dir
-
+                dir_name = (
+                    self.source_dir
+                    if self.source_dir.startswith("s3://")
+                    else "file://" + self.source_dir
+                )
         return {
             SCRIPT_PARAM_NAME.upper(): script_name or str(),
             DIR_PARAM_NAME.upper(): dir_name or str(),
 
@@ -18,7 +18,6 @@
 import csv
 import io
 import json
-
 import numpy as np
 from six import with_metaclass
 
@@ -357,3 +356,37 @@ def serialize(self, data):
             return data.read()
 
         raise ValueError("Unable to handle input format: %s" % type(data))
+
+
+class DataSerializer(SimpleBaseSerializer):
+    """Serialize data in any file by extracting raw bytes from the file."""
+
+    def __init__(self, content_type="file-path/raw-bytes"):
+        """Initialize a ``DataSerializer`` instance.
+
+        Args:
+            content_type (str): The MIME type to signal to the inference endpoint when sending
+                request data (default: "file-path/raw-bytes").
+        """
+        super(DataSerializer, self).__init__(content_type=content_type)
+
+    def serialize(self, data):
+        """Serialize file data to a raw bytes.
+
+        Args:
+            data (object): Data to be serialized. The data can be a string
+                representing file-path or the raw bytes from a file.
+        Returns:
+            raw-bytes: The data serialized as raw-bytes from the input.
+        """
+        if isinstance(data, str):
+            try:
+                with open(data, "rb") as data_file:
+                    data_file_info = data_file.read()
+                    return data_file_info
+            except Exception as e:
+                raise ValueError(f"Could not open/read file: {data}. {e}")
+        if isinstance(data, bytes):
+            return data
+
+        raise ValueError(f"Object of type {type(data)} is not Data serializable.")
@@ -18,11 +18,7 @@
 
 
 class TrainingCompilerConfig(object):
-    """The configuration class for accelerating SageMaker training jobs through compilation.
-
-    SageMaker Training Compiler speeds up training by optimizing the model execution graph.
-
-    """
+    """The SageMaker Training Compiler configuration class."""
 
     DEBUG_PATH = "/opt/ml/output/data/compiler/"
     SUPPORTED_INSTANCE_CLASS_PREFIXES = ["p3", "g4dn", "p4"]
@@ -37,9 +33,15 @@ def __init__(
     ):
         """This class initializes a ``TrainingCompilerConfig`` instance.
 
-        Pass the output of it to the ``compiler_config``
+        `Amazon SageMaker Training Compiler
+        <https://docs.aws.amazon.com/sagemaker/latest/dg/training-compiler.html>`_
+        is a feature of SageMaker Training
+        and speeds up training jobs by optimizing model execution graphs.
+
+        You can compile Hugging Face models
+        by passing the object of this configuration class to the ``compiler_config``
         parameter of the :class:`~sagemaker.huggingface.HuggingFace`
-        class.
+        estimator.
 
         Args:
             enabled (bool): Optional. Switch to enable SageMaker Training Compiler.
@@ -48,13 +50,28 @@ def __init__(
                 This comes with a potential performance slowdown.
                 The default is ``False``.
 
-        **Example**: The following example shows the basic ``compiler_config``
-        parameter configuration, enabling compilation with default parameter values.
+        **Example**: The following code shows the basic usage of the
+        :class:`sagemaker.huggingface.TrainingCompilerConfig()` class
+        to run a HuggingFace training job with the compiler.
 
         .. code-block:: python
 
-            from sagemaker.huggingface import TrainingCompilerConfig
-            compiler_config = TrainingCompilerConfig()
+            from sagemaker.huggingface import HuggingFace, TrainingCompilerConfig
+
+            huggingface_estimator=HuggingFace(
+                ...
+                compiler_config=TrainingCompilerConfig()
+            )
+
+        .. seealso::
+
+            For more information about how to enable SageMaker Training Compiler
+            for various training settings such as using TensorFlow-based models,
+            PyTorch-based models, and distributed training,
+            see `Enable SageMaker Training Compiler
+            <https://docs.aws.amazon.com/sagemaker/latest/dg/training-compiler-enable.html>`_
+            in the `Amazon SageMaker Training Compiler developer guide
+            <https://docs.aws.amazon.com/sagemaker/latest/dg/training-compiler.html>`_.
 
         """
 
 
@@ -301,6 +301,8 @@ def arguments(self) -> RequestType:
         )
         request_dict = self.estimator.sagemaker_session._get_train_request(**train_args)
         request_dict.pop("TrainingJobName")
+        if "HyperParameters" in request_dict:
+            request_dict["HyperParameters"].pop("sagemaker_job_name", None)
 
         return request_dict
 
 
@@ -148,12 +148,6 @@
     "eu-west-2",
     "us-east-1",
 ]
-NO_SM_PIPELINE_MM_CLARIFY_CHECK_STEP_REGIONS = [
-    "ap-northeast-3",
-    "ap-south-1",
-    "eu-north-1",
-    "sa-east-1",
-]
 EDGE_PACKAGING_SUPPORTED_REGIONS = [
     "us-east-2",
     "us-west-2",
 
@@ -26,7 +26,9 @@
     artifact,
 )
 from sagemaker.model import ModelPackage
-from tests.integ.test_workflow import test_end_to_end_pipeline_successful_execution
+from tests.integ.sagemaker.workflow.test_workflow import (
+    test_end_to_end_pipeline_successful_execution,
+)
 from sagemaker.workflow.pipeline import _PipelineExecution
 from sagemaker.session import get_execution_role
 from smexperiments import trial_component, trial, experiment
Original file line number	Diff line number	Diff line change
`@@ -301,6 +301,8 @@ def arguments(self) -> RequestType:`
`301`	`301`	`)`
`302`	`302`	`request_dict = self.estimator.sagemaker_session._get_train_request(**train_args)`
`303`	`303`	`request_dict.pop("TrainingJobName")`
	`304`	`+ if "HyperParameters" in request_dict:`
	`305`	`+ request_dict["HyperParameters"].pop("sagemaker_job_name", None)`
`304`	`306`
`305`	`307`	`return request_dict`
`306`	`308`