aws
diff --git a/‎CHANGELOG.md
+38 b/‎CHANGELOG.md
+38
diff --git a/‎VERSION
+1-1 b/‎VERSION
+1-1
diff --git a/‎doc/api/inference/model.rst
+5 b/‎doc/api/inference/model.rst
+5
diff --git a/‎doc/api/inference/predictors.rst
+5 b/‎doc/api/inference/predictors.rst
+5
diff --git a/‎doc/overview.rst
+44 b/‎doc/overview.rst
+44
diff --git a/‎doc/workflows/pipelines/sagemaker.workflow.pipelines.rst
-1 b/‎doc/workflows/pipelines/sagemaker.workflow.pipelines.rst
-1
diff --git a/‎src/sagemaker/clarify.py
+31-27 b/‎src/sagemaker/clarify.py
+31-27
diff --git a/‎src/sagemaker/dataset_definition/inputs.py
+2-2 b/‎src/sagemaker/dataset_definition/inputs.py
+2-2
diff --git a/‎src/sagemaker/huggingface/__init__.py
+1 b/‎src/sagemaker/huggingface/__init__.py
+1
diff --git a/‎src/sagemaker/huggingface/processing.py
+132 b/‎src/sagemaker/huggingface/processing.py
+132
@@ -1,5 +1,43 @@
 # Changelog
 
+## v2.52.2.post0 (2021-08-11)
+
+### Documentation Changes
+
+ * clarify that default_bucket creates a bucket
+ * Minor updates to Clarify API documentation
+
+## v2.52.2 (2021-08-10)
+
+### Bug Fixes and Other Changes
+
+ * sklearn integ tests, remove swallowing exception on feature group delete attempt
+ * sklearn integ test for custom bucket
+
+### Documentation Changes
+
+ * Fix dataset_definition links
+ * Document LambdaModel and LambdaPredictor classes
+
+## v2.52.1 (2021-08-06)
+
+### Bug Fixes and Other Changes
+
+ * revert #2251 changes for sklearn processor
+
+## v2.52.0 (2021-08-05)
+
+### Features
+
+ * processors that support multiple Python files, requirements.txt, and dependencies.
+ * support step object in step depends on list
+
+### Bug Fixes and Other Changes
+
+ * enable isolation while creating model from job
+ * update `sagemaker.serverless` integration test
+ * Use correct boto model name for RegisterModelStep properties
+
 ## v2.51.0 (2021-08-03)
 
 ### Features
 
@@ -1 +1 @@
-2.51.1.dev0
+2.52.3.dev0
@@ -15,3 +15,8 @@ Model
     :members:
     :undoc-members:
     :show-inheritance:
+
+.. autoclass:: sagemaker.serverless.model.LambdaModel
+    :members:
+    :undoc-members:
+    :show-inheritance:
@@ -7,3 +7,8 @@ Make real-time predictions against SageMaker endpoints with Python objects
     :members:
     :undoc-members:
     :show-inheritance:
+
+.. autoclass:: sagemaker.serverless.predictor.LambdaPredictor
+    :members:
+    :undoc-members:
+    :show-inheritance:
@@ -1063,6 +1063,50 @@ You can also find these notebooks in the **Advanced Functionality** section of t
 For information about using sample notebooks in a SageMaker notebook instance, see `Use Example Notebooks <https://docs.aws.amazon.com/sagemaker/latest/dg/howitworks-nbexamples.html>`__
 in the AWS documentation.
 
+********************
+Serverless Inference
+********************
+
+You can use the SageMaker Python SDK to perform serverless inference on Lambda.
+
+To deploy models to Lambda, you must complete the following prerequisites:
+
+- `Package your model and inference code as a container image. <https://docs.aws.amazon.com/lambda/latest/dg/images-create.html>`_
+- `Create a role that lists Lambda as a trusted entity. <https://docs.aws.amazon.com/lambda/latest/dg/lambda-intro-execution-role.html#permissions-executionrole-console>`_
+
+After completing the prerequisites, you can deploy your model to Lambda using
+the `LambdaModel`_ class.
+
+.. code:: python
+
+   from sagemaker.serverless import LambdaModel
+
+   image_uri = "123456789012.dkr.ecr.us-west-2.amazonaws.com/my-lambda-repository:latest"
+   role = "arn:aws:iam::123456789012:role/MyLambdaExecutionRole"
+
+   model = LambdaModel(image_uri=image_uri, role=role)
+   predictor = model.deploy("my-lambda-function", timeout=20, memory_size=4092)
+
+The ``deploy`` method returns a `LambdaPredictor`_  instance. Use the
+`LambdaPredictor`_ ``predict`` method to perform inference on Lambda.
+
+.. code:: python
+
+   url = "https://example.com/cat.jpeg"
+   predictor.predict({"url": url})  # {'class': 'tabby'}
+
+Once you are done performing inference on Lambda, free the `LambdaModel`_ and
+`LambdaPredictor`_ resources using the ``delete_model`` and ``delete_predictor``
+methods.
+
+.. code:: python
+
+   model.delete_model()
+   predictor.delete_predictor()
+
+.. _LambdaModel : https://sagemaker.readthedocs.io/en/stable/api/inference/model.html#sagemaker.serverless.model.LambdaModel
+.. _LambdaPredictor : https://sagemaker.readthedocs.io/en/stable/api/inference/predictors.html#sagemaker.serverless.predictor.LambdaPredictor
+
 ******************
 SageMaker Workflow
 ******************
 
@@ -5,7 +5,6 @@ ConditionStep
 -------------
 
 .. autoclass:: sagemaker.workflow.condition_step.ConditionStep
-
 .. deprecated:: sagemaker.workflow.condition_step.JsonGet
 
 Conditions
 
@@ -48,12 +48,17 @@ def __init__(
             headers (list[str]): A list of column names in the input dataset.
             features (str): JSONPath for locating the feature columns for bias metrics if the
                 dataset format is JSONLines.
-            dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV
-                and "application/jsonlines" for JSONLines.
+            dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
+                "application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
             s3_data_distribution_type (str): Valid options are "FullyReplicated" or
                 "ShardedByS3Key".
             s3_compression_type (str): Valid options are "None" or "Gzip".
         """
+        if dataset_type not in ["text/csv", "application/jsonlines", "application/x-parquet"]:
+            raise ValueError(
+                f"Invalid dataset_type '{dataset_type}'."
+                f" Please check the API documentation for the supported dataset types."
+            )
         self.s3_data_input_path = s3_data_input_path
         self.s3_output_path = s3_output_path
         self.s3_data_distribution_type = s3_data_distribution_type
@@ -508,7 +513,7 @@ def run_pre_training_bias(
         kms_key=None,
         experiment_config=None,
     ):
-        """Runs a ProcessingJob to compute the requested bias 'methods' of the input data.
+        """Runs a ProcessingJob to compute the pre-training bias methods of the input data.
 
         Computes the requested methods that compare 'methods' (e.g. fraction of examples) for the
         sensitive group vs the other examples.
@@ -517,14 +522,14 @@ def run_pre_training_bias(
             data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
             data_bias_config (:class:`~sagemaker.clarify.BiasConfig`): Config of sensitive groups.
             methods (str or list[str]): Selector of a subset of potential metrics:
-                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
-                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
-                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
-                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
-                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
-                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
-                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
-                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
+                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
+                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
+                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
+                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
+                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
+                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
+                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
+                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
                 Defaults to computing all.
             wait (bool): Whether the call should wait until the job completes (default: True).
             logs (bool): Whether to show the logs produced by the job.
@@ -538,7 +543,7 @@ def run_pre_training_bias(
             experiment_config (dict[str, str]): Experiment management configuration.
                 Dictionary contains three optional keys:
                 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
-        """
+        """  # noqa E501
         analysis_config = data_config.get_config()
         analysis_config.update(data_bias_config.get_config())
         analysis_config["methods"] = {"pre_training_bias": {"methods": methods}}
@@ -562,7 +567,7 @@ def run_post_training_bias(
         kms_key=None,
         experiment_config=None,
     ):
-        """Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
+        """Runs a ProcessingJob to compute the post-training bias methods of the model predictions.
 
         Spins up a model endpoint, runs inference over the input example in the
         's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
@@ -633,12 +638,11 @@ def run_bias(
         kms_key=None,
         experiment_config=None,
     ):
-        """Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
+        """Runs a ProcessingJob to compute the requested bias methods.
 
-        Spins up a model endpoint, runs inference over the input example in the
-        's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
-        compare 'methods' (e.g. accuracy, precision, recall) for the sensitive group vs the other
-        examples.
+        It computes the metrics of both the pre-training methods and the post-training methods.
+        To calculate post-training methods, it needs to spin up a model endpoint, runs inference
+        over the input example in the 's3_data_input_path' to obtain predicted labels.
 
         Args:
             data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
@@ -648,14 +652,14 @@ def run_bias(
             model_predicted_label_config (:class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
                 Config of how to extract the predicted label from the model output.
             pre_training_methods (str or list[str]): Selector of a subset of potential metrics:
-                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
-                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
-                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
-                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
-                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
-                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
-                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
-                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
+                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
+                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
+                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
+                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
+                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
+                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
+                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
+                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
                 Defaults to computing all.
             post_training_methods (str or list[str]): Selector of a subset of potential metrics:
                 ["`DPPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dppl.html>`_"
@@ -682,7 +686,7 @@ def run_bias(
             experiment_config (dict[str, str]): Experiment management configuration.
                 Dictionary contains three optional keys:
                 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
-        """
+        """  # noqa E501
         analysis_config = data_config.get_config()
         analysis_config.update(bias_config.get_config())
         analysis_config["predictor"] = model_config.get_predictor_config()
 
@@ -99,9 +99,9 @@ class DatasetDefinition(ApiObject):
             Definition inputs to run a processing job. LocalPath is an absolute path to the input
             data. This is a required parameter when `AppManaged` is False (default).
         redshift_dataset_definition
-            (:class:`~sagemaker.dataset_definition.RedshiftDatasetDefinition`): Redshift
+            (:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`): Redshift
             dataset definition.
-        athena_dataset_definition (:class:`~sagemaker.dataset_definition.AthenaDatasetDefinition`):
+        athena_dataset_definition (:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`):
             Configuration for Athena Dataset Definition input.
     """
 
 
@@ -15,3 +15,4 @@
 
 from sagemaker.huggingface.estimator import HuggingFace  # noqa: F401
 from sagemaker.huggingface.model import HuggingFaceModel, HuggingFacePredictor  # noqa: F401
+from sagemaker.huggingface.processing import HuggingFaceProcessor  # noqa:F401
@@ -0,0 +1,132 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""This module contains code related to HuggingFace Processors which are used for Processing jobs.
+
+These jobs let customers perform data pre-processing, post-processing, feature engineering,
+data validation, and model evaluation and interpretation on SageMaker.
+"""
+from __future__ import absolute_import
+
+from sagemaker.processing import FrameworkProcessor
+from sagemaker.huggingface.estimator import HuggingFace
+
+
+class HuggingFaceProcessor(FrameworkProcessor):
+    """Handles Amazon SageMaker processing tasks for jobs using HuggingFace containers."""
+
+    estimator_cls = HuggingFace
+
+    def __init__(
+        self,
+        role,
+        instance_count,
+        instance_type,
+        transformers_version=None,
+        tensorflow_version=None,
+        pytorch_version=None,
+        py_version="py36",
+        image_uri=None,
+        command=None,
+        volume_size_in_gb=30,
+        volume_kms_key=None,
+        output_kms_key=None,
+        code_location=None,
+        max_runtime_in_seconds=None,
+        base_job_name=None,
+        sagemaker_session=None,
+        env=None,
+        tags=None,
+        network_config=None,
+    ):
+        """This processor executes a Python script in a HuggingFace execution environment.
+
+        Unless ``image_uri`` is specified, the environment is an Amazon-built Docker container
+        that executes functions defined in the supplied ``code`` Python script.
+
+        The arguments have the same meaning as in ``FrameworkProcessor``, with the following
+        exceptions.
+
+        Args:
+            transformers_version (str): Transformers version you want to use for
+                executing your model training code. Defaults to ``None``. Required unless
+                ``image_uri`` is provided. The current supported version is ``4.4.2``.
+            tensorflow_version (str): TensorFlow version you want to use for
+                executing your model training code. Defaults to ``None``. Required unless
+                ``pytorch_version`` is provided. The current supported version is ``1.6.0``.
+            pytorch_version (str): PyTorch version you want to use for
+                executing your model training code. Defaults to ``None``. Required unless
+                ``tensorflow_version`` is provided. The current supported version is ``2.4.1``.
+            py_version (str): Python version you want to use for executing your model training
+                code. Defaults to ``None``. Required unless ``image_uri`` is provided.  If
+                using PyTorch, the current supported version is ``py36``. If using TensorFlow,
+                the current supported version is ``py37``.
+
+        .. tip::
+
+            You can find additional parameters for initializing this class at
+            :class:`~sagemaker.processing.FrameworkProcessor`.
+        """
+        self.pytorch_version = pytorch_version
+        self.tensorflow_version = tensorflow_version
+        super().__init__(
+            self.estimator_cls,
+            transformers_version,
+            role,
+            instance_count,
+            instance_type,
+            py_version,
+            image_uri,
+            command,
+            volume_size_in_gb,
+            volume_kms_key,
+            output_kms_key,
+            code_location,
+            max_runtime_in_seconds,
+            base_job_name,
+            sagemaker_session,
+            env,
+            tags,
+            network_config,
+        )
+
+    def _create_estimator(
+        self,
+        entry_point="",
+        source_dir=None,
+        dependencies=None,
+        git_config=None,
+    ):
+        """Override default estimator factory function for HuggingFace's different parameters
+
+        HuggingFace estimators have 3 framework version parameters instead of one: The version for
+        Transformers, PyTorch, and TensorFlow.
+        """
+        return self.estimator_cls(
+            transformers_version=self.framework_version,
+            tensorflow_version=self.tensorflow_version,
+            pytorch_version=self.pytorch_version,
+            py_version=self.py_version,
+            entry_point=entry_point,
+            source_dir=source_dir,
+            dependencies=dependencies,
+            git_config=git_config,
+            code_location=self.code_location,
+            enable_network_isolation=False,
+            image_uri=self.image_uri,
+            role=self.role,
+            instance_count=self.instance_count,
+            instance_type=self.instance_type,
+            sagemaker_session=self.sagemaker_session,
+            debugger_hook_config=False,
+            disable_profiler=True,
+        )
Original file line number	Diff line number	Diff line change
`@@ -15,3 +15,4 @@`
`15`	`15`
`16`	`16`	`from sagemaker.huggingface.estimator import HuggingFace # noqa: F401`
`17`	`17`	`from sagemaker.huggingface.model import HuggingFaceModel, HuggingFacePredictor # noqa: F401`
	`18`	`+from sagemaker.huggingface.processing import HuggingFaceProcessor # noqa:F401`