aws
diff --git a/‎CHANGELOG.md
Lines changed: 54 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 54 additions & 0 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 1 deletion b/‎VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/api/utility/inputs.rst
Lines changed: 5 additions & 1 deletion b/‎doc/api/utility/inputs.rst
Lines changed: 5 additions & 1 deletion
diff --git a/‎doc/workflows/pipelines/sagemaker.workflow.pipelines.rst
Lines changed: 0 additions & 1 deletion b/‎doc/workflows/pipelines/sagemaker.workflow.pipelines.rst
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/sagemaker/automl/automl.py
Lines changed: 1 addition & 0 deletions b/‎src/sagemaker/automl/automl.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/sagemaker/clarify.py
Lines changed: 31 additions & 27 deletions b/‎src/sagemaker/clarify.py
Lines changed: 31 additions & 27 deletions
diff --git a/‎src/sagemaker/dataset_definition/inputs.py
Lines changed: 7 additions & 8 deletions b/‎src/sagemaker/dataset_definition/inputs.py
Lines changed: 7 additions & 8 deletions
diff --git a/‎src/sagemaker/feature_store/feature_group.py
Lines changed: 32 additions & 10 deletions b/‎src/sagemaker/feature_store/feature_group.py
Lines changed: 32 additions & 10 deletions
diff --git a/‎src/sagemaker/image_uri_config/model-monitor.json
Lines changed: 1 addition & 0 deletions b/‎src/sagemaker/image_uri_config/model-monitor.json
Lines changed: 1 addition & 0 deletions
@@ -1,5 +1,59 @@
 # Changelog
 
+## v2.55.0 (2021-08-25)
+
+### Features
+
+ * Add information of Amazon-provided analysis image used by Mo…
+
+### Bug Fixes and Other Changes
+
+ * Update Changelog to fix release
+ * Fixing the order of populating container list
+ * pass network isolation config to pipelineModel
+ * Deference symbolic link when create tar file
+ * multiprocess issue in feature_group.py
+ * deprecate tag logic on Association
+
+### Documentation Changes
+
+ * add dataset_definition to processing page
+
+## v2.54.0 (2021-08-16)
+
+### Features
+
+ * add pytorch 1.5.1 eia configuration
+
+### Bug Fixes and Other Changes
+
+ * issue #2253 where Processing job in Local mode would call Describe API
+
+## v2.53.0 (2021-08-12)
+
+### Features
+
+ * support tuning step parameter range parameterization + support retry strategy in tuner
+
+## v2.52.2.post0 (2021-08-11)
+
+### Documentation Changes
+
+ * clarify that default_bucket creates a bucket
+ * Minor updates to Clarify API documentation
+
+## v2.52.2 (2021-08-10)
+
+### Bug Fixes and Other Changes
+
+ * sklearn integ tests, remove swallowing exception on feature group delete attempt
+ * sklearn integ test for custom bucket
+
+### Documentation Changes
+
+ * Fix dataset_definition links
+ * Document LambdaModel and LambdaPredictor classes
+
 ## v2.52.1 (2021-08-06)
 
 ### Bug Fixes and Other Changes
 
@@ -1 +1 @@
-2.52.2.dev0
+2.55.1.dev0
@@ -5,4 +5,8 @@ Inputs
     :members:
     :undoc-members:
     :show-inheritance:
-    :noindex:
+
+.. automodule:: sagemaker.dataset_definition.inputs
+    :members:
+    :undoc-members:
+    :show-inheritance:
@@ -5,7 +5,6 @@ ConditionStep
 -------------
 
 .. autoclass:: sagemaker.workflow.condition_step.ConditionStep
-
 .. deprecated:: sagemaker.workflow.condition_step.JsonGet
 
 Conditions
 
@@ -328,6 +328,7 @@ def create_model(
             predictor_cls=predictor_cls,
             name=name,
             vpc_config=vpc_config,
+            enable_network_isolation=enable_network_isolation,
             sagemaker_session=sagemaker_session or self.sagemaker_session,
         )
         return pipeline
 
@@ -48,12 +48,17 @@ def __init__(
             headers (list[str]): A list of column names in the input dataset.
             features (str): JSONPath for locating the feature columns for bias metrics if the
                 dataset format is JSONLines.
-            dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV
-                and "application/jsonlines" for JSONLines.
+            dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
+                "application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
             s3_data_distribution_type (str): Valid options are "FullyReplicated" or
                 "ShardedByS3Key".
             s3_compression_type (str): Valid options are "None" or "Gzip".
         """
+        if dataset_type not in ["text/csv", "application/jsonlines", "application/x-parquet"]:
+            raise ValueError(
+                f"Invalid dataset_type '{dataset_type}'."
+                f" Please check the API documentation for the supported dataset types."
+            )
         self.s3_data_input_path = s3_data_input_path
         self.s3_output_path = s3_output_path
         self.s3_data_distribution_type = s3_data_distribution_type
@@ -508,7 +513,7 @@ def run_pre_training_bias(
         kms_key=None,
         experiment_config=None,
     ):
-        """Runs a ProcessingJob to compute the requested bias 'methods' of the input data.
+        """Runs a ProcessingJob to compute the pre-training bias methods of the input data.
 
         Computes the requested methods that compare 'methods' (e.g. fraction of examples) for the
         sensitive group vs the other examples.
@@ -517,14 +522,14 @@ def run_pre_training_bias(
             data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
             data_bias_config (:class:`~sagemaker.clarify.BiasConfig`): Config of sensitive groups.
             methods (str or list[str]): Selector of a subset of potential metrics:
-                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
-                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
-                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
-                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
-                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
-                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
-                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
-                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
+                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
+                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
+                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
+                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
+                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
+                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
+                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
+                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
                 Defaults to computing all.
             wait (bool): Whether the call should wait until the job completes (default: True).
             logs (bool): Whether to show the logs produced by the job.
@@ -538,7 +543,7 @@ def run_pre_training_bias(
             experiment_config (dict[str, str]): Experiment management configuration.
                 Dictionary contains three optional keys:
                 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
-        """
+        """  # noqa E501
         analysis_config = data_config.get_config()
         analysis_config.update(data_bias_config.get_config())
         analysis_config["methods"] = {"pre_training_bias": {"methods": methods}}
@@ -562,7 +567,7 @@ def run_post_training_bias(
         kms_key=None,
         experiment_config=None,
     ):
-        """Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
+        """Runs a ProcessingJob to compute the post-training bias methods of the model predictions.
 
         Spins up a model endpoint, runs inference over the input example in the
         's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
@@ -633,12 +638,11 @@ def run_bias(
         kms_key=None,
         experiment_config=None,
     ):
-        """Runs a ProcessingJob to compute the requested bias 'methods' of the model predictions.
+        """Runs a ProcessingJob to compute the requested bias methods.
 
-        Spins up a model endpoint, runs inference over the input example in the
-        's3_data_input_path' to obtain predicted labels. Computes a the requested methods that
-        compare 'methods' (e.g. accuracy, precision, recall) for the sensitive group vs the other
-        examples.
+        It computes the metrics of both the pre-training methods and the post-training methods.
+        To calculate post-training methods, it needs to spin up a model endpoint, runs inference
+        over the input example in the 's3_data_input_path' to obtain predicted labels.
 
         Args:
             data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data.
@@ -648,14 +652,14 @@ def run_bias(
             model_predicted_label_config (:class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
                 Config of how to extract the predicted label from the model output.
             pre_training_methods (str or list[str]): Selector of a subset of potential metrics:
-                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ci.html>`_",
-                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dpl.html>`_",
-                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-kl.html>`_",
-                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-js.html>`_",
-                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-lp.html>`_",
-                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-tvd.html>`_",
-                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ks.html>`_",
-                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cdd.html>`_"].
+                ["`CI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-bias-metric-class-imbalance.html>`_",
+                "`DPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-true-label-imbalance.html>`_",
+                "`KL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kl-divergence.html>`_",
+                "`JS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-jensen-shannon-divergence.html>`_",
+                "`LP <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-lp-norm.html>`_",
+                "`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
+                "`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
+                "`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
                 Defaults to computing all.
             post_training_methods (str or list[str]): Selector of a subset of potential metrics:
                 ["`DPPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dppl.html>`_"
@@ -682,7 +686,7 @@ def run_bias(
             experiment_config (dict[str, str]): Experiment management configuration.
                 Dictionary contains three optional keys:
                 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
-        """
+        """  # noqa E501
         analysis_config = data_config.get_config()
         analysis_config.update(bias_config.get_config())
         analysis_config["predictor"] = model_config.get_predictor_config()
 
@@ -27,7 +27,7 @@ class RedshiftDatasetDefinition(ApiObject):
 
     With this input, SQL queries will be executed using Redshift to generate datasets to S3.
 
-    Attributes:
+    Parameters:
         cluster_id (str): The Redshift cluster Identifier.
         database (str): The name of the Redshift database used in Redshift query execution.
         db_user (str): The database user name used in Redshift query execution.
@@ -60,7 +60,7 @@ class AthenaDatasetDefinition(ApiObject):
 
     With this input, SQL queries will be executed using Athena to generate datasets to S3.
 
-    Attributes:
+    Parameters:
         catalog (str): The name of the data catalog used in Athena query execution.
         database (str): The name of the database used in the Athena query execution.
         query_string (str): The SQL query statements, to be executed.
@@ -87,7 +87,7 @@ class AthenaDatasetDefinition(ApiObject):
 class DatasetDefinition(ApiObject):
     """DatasetDefinition input.
 
-    Attributes:
+    Parameters:
         data_distribution_type (str): Whether the generated dataset is FullyReplicated or
             ShardedByS3Key (default).
         input_mode (str): Whether to use File or Pipe input mode. In File (default) mode, Amazon
@@ -98,10 +98,9 @@ class DatasetDefinition(ApiObject):
         local_path (str): The local path where you want Amazon SageMaker to download the Dataset
             Definition inputs to run a processing job. LocalPath is an absolute path to the input
             data. This is a required parameter when `AppManaged` is False (default).
-        redshift_dataset_definition
-            (:class:`~sagemaker.dataset_definition.RedshiftDatasetDefinition`): Redshift
-            dataset definition.
-        athena_dataset_definition (:class:`~sagemaker.dataset_definition.AthenaDatasetDefinition`):
+        redshift_dataset_definition (:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`):
+            Configuration for Redshift Dataset Definition input.
+        athena_dataset_definition (:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`):
             Configuration for Athena Dataset Definition input.
     """
 
@@ -126,7 +125,7 @@ class S3Input(ApiObject):
     S3 list operations are not strongly consistent.
     Use ManifestFile if strong consistency is required.
 
-    Attributes:
+    Parameters:
         s3_uri (str): the path to a specific S3 object or a S3 prefix
         local_path (str): the path to a local directory. If not provided, skips data download
             by SageMaker platform.
 
@@ -207,7 +207,8 @@ def _ingest_single_batch(
         for row in data_frame[start_index:end_index].itertuples():
             record = [
                 FeatureValue(
-                    feature_name=data_frame.columns[index - 1], value_as_string=str(row[index])
+                    feature_name=data_frame.columns[index - 1],
+                    value_as_string=str(row[index]),
                 )
                 for index in range(1, len(row))
                 if pd.notna(row[index])
@@ -270,13 +271,24 @@ def _run_multi_process(self, data_frame: DataFrame, wait=True, timeout=None):
             timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
                 if timeout is reached.
         """
+        # pylint: disable=I1101
         batch_size = math.ceil(data_frame.shape[0] / self.max_processes)
+        # pylint: enable=I1101
 
         args = []
         for i in range(self.max_processes):
             start_index = min(i * batch_size, data_frame.shape[0])
             end_index = min(i * batch_size + batch_size, data_frame.shape[0])
-            args += [(data_frame[start_index:end_index], start_index, timeout)]
+            args += [
+                (
+                    self.max_workers,
+                    self.feature_group_name,
+                    self.sagemaker_fs_runtime_client_config,
+                    data_frame[start_index:end_index],
+                    start_index,
+                    timeout,
+                )
+            ]
 
         def init_worker():
             # ignore keyboard interrupts in child processes.
@@ -285,13 +297,21 @@ def init_worker():
         self._processing_pool = ProcessingPool(self.max_processes, init_worker)
         self._processing_pool.restart(force=True)
 
-        f = lambda x: self._run_multi_threaded(*x)  # noqa: E731
+        f = lambda x: IngestionManagerPandas._run_multi_threaded(*x)  # noqa: E731
         self._async_result = self._processing_pool.amap(f, args)
 
         if wait:
             self.wait(timeout=timeout)
 
-    def _run_multi_threaded(self, data_frame: DataFrame, row_offset=0, timeout=None) -> List[int]:
+    @staticmethod
+    def _run_multi_threaded(
+        max_workers: int,
+        feature_group_name: str,
+        sagemaker_fs_runtime_client_config: Config,
+        data_frame: DataFrame,
+        row_offset=0,
+        timeout=None,
+    ) -> List[int]:
         """Start the ingestion process.
 
         Args:
@@ -305,21 +325,23 @@ def _run_multi_threaded(self, data_frame: DataFrame, row_offset=0, timeout=None)
         Returns:
             List of row indices that failed to be ingested.
         """
-        executor = ThreadPoolExecutor(max_workers=self.max_workers)
-        batch_size = math.ceil(data_frame.shape[0] / self.max_workers)
+        executor = ThreadPoolExecutor(max_workers=max_workers)
+        # pylint: disable=I1101
+        batch_size = math.ceil(data_frame.shape[0] / max_workers)
+        # pylint: enable=I1101
 
         futures = {}
-        for i in range(self.max_workers):
+        for i in range(max_workers):
             start_index = min(i * batch_size, data_frame.shape[0])
             end_index = min(i * batch_size + batch_size, data_frame.shape[0])
             futures[
                 executor.submit(
-                    self._ingest_single_batch,
-                    feature_group_name=self.feature_group_name,
+                    IngestionManagerPandas._ingest_single_batch,
+                    feature_group_name=feature_group_name,
                     data_frame=data_frame,
                     start_index=start_index,
                     end_index=end_index,
-                    client_config=self.sagemaker_fs_runtime_client_config,
+                    client_config=sagemaker_fs_runtime_client_config,
                 )
             ] = (start_index + row_offset, end_index + row_offset)
 
 
@@ -7,6 +7,7 @@
                 "ap-east-1": "001633400207",
                 "ap-northeast-1": "574779866223",
                 "ap-northeast-2": "709848358524",
+                "ap-northeast-3": "990339680094",
                 "ap-south-1": "126357580389",
                 "ap-southeast-1": "245545462676",
                 "ap-southeast-2": "563025443158",
Original file line number	Diff line number	Diff line change
`@@ -328,6 +328,7 @@ def create_model(`
`328`	`328`	`predictor_cls=predictor_cls,`
`329`	`329`	`name=name,`
`330`	`330`	`vpc_config=vpc_config,`
	`331`	`+ enable_network_isolation=enable_network_isolation,`
`331`	`332`	`sagemaker_session=sagemaker_session or self.sagemaker_session,`
`332`	`333`	`)`
`333`	`334`	`return pipeline`