aws
diff --git a/‎.githooks/pre-push
+16 b/‎.githooks/pre-push
+16
diff --git a/‎CHANGELOG.md
+99 b/‎CHANGELOG.md
+99
diff --git a/‎README.rst
+12 b/‎README.rst
+12
diff --git a/‎VERSION
+1-1 b/‎VERSION
+1-1
diff --git a/‎ci-scripts/queue_build.py
+7-3 b/‎ci-scripts/queue_build.py
+7-3
diff --git a/‎doc/v2.rst
+6 b/‎doc/v2.rst
+6
diff --git a/‎src/sagemaker/clarify.py
+33-12 b/‎src/sagemaker/clarify.py
+33-12
diff --git a/‎src/sagemaker/deprecations.py
+62 b/‎src/sagemaker/deprecations.py
+62
@@ -0,0 +1,16 @@
+#!/bin/sh
+# this pre-push hook runs style checks and unit tests in python 3.6, 3.7, and 3.8 using tox.
+
+set -e
+
+TOX_PARALLEL_NO_SPINNER=1,
+PY_COLORS=0
+start_time=`date +%s`
+tox -e flake8,pylint,docstyle,black-check,twine --parallel all
+./ci-scripts/displaytime.sh 'flake8,pylint,docstyle,black-check,twine' $start_time
+start_time=`date +%s`
+tox -e sphinx,doc8 --parallel all
+./ci-scripts/displaytime.sh 'sphinx,doc8' $start_time
+start_time=`date +%s`
+tox -e py36,py37,py38 --parallel all -- tests/unit
+./ci-scripts/displaytime.sh 'py36,py37,py38 unit' $start_time
@@ -1,5 +1,104 @@
 # Changelog
 
+## v2.66.2.post0 (2021-10-28)
+
+### Documentation Changes
+
+ * Update estimator docstrings to add Fast File Mode
+
+## v2.66.2 (2021-10-27)
+
+### Bug Fixes and Other Changes
+
+ * expose num_clusters parameter for clarify shap in shapconfig
+ * Update cron job to run hourly
+
+## v2.66.1 (2021-10-26)
+
+### Bug Fixes and Other Changes
+
+ * HuggingFace image_uri generation for inference
+ * Update '_' and '/' with '-' in filename creation
+
+## v2.66.0 (2021-10-25)
+
+### Features
+
+ * Add image_uris.retrieve() support for AutoGluon
+
+### Documentation Changes
+
+ * fix documentation for input types in estimator.fit
+ * Add JsonGet v2 deprecation
+
+## v2.65.0 (2021-10-21)
+
+### Features
+
+ * modify RLEstimator to use newly generated Ray image (1.6.0)
+ * network isolation mode for xgboost
+ * update clarify imageURI for PDT
+
+### Bug Fixes and Other Changes
+
+ * retry downstream_trials test
+ * Add retries to pipeline execution
+
+## v2.64.0 (2021-10-20)
+
+### Deprecations and Removals
+
+ * warn for deprecation - Lambda model-predictor
+
+### Features
+
+ * Add support for TF 2.5
+ * Add a pre-push git hook
+
+### Bug Fixes and Other Changes
+
+ * add s3_analysis_config_output_path field in DataConfig constructor
+ * make marketplace jobnames random
+
+## v2.63.2 (2021-10-18)
+
+### Bug Fixes and Other Changes
+
+ * Update timeouts for integ tests from 20 to 40
+
+## v2.63.1 (2021-10-14)
+
+### Bug Fixes and Other Changes
+
+ * HF estimator attach modified to work with py38
+
+## v2.63.0 (2021-10-13)
+
+### Features
+
+ * support configurable retry for pipeline steps
+
+## v2.62.0 (2021-10-12)
+
+### Features
+
+ * Hugging Face Transformers 4.10 for Pt1.8/TF2.4 & Transformers 4.11 for PT1.9&TF2.5
+
+### Bug Fixes and Other Changes
+
+ * repack_model script used in pipelines to support source_dir and dependencies
+
+## v2.61.0 (2021-10-11)
+
+### Features
+
+ * add support for PyTorch 1.9.0
+
+### Bug Fixes and Other Changes
+
+ * Update TRAINING_DEFAULT_TIMEOUT_MINUTES to 40 min
+ * notebook test for parallel PRs
+
 ## v2.60.0 (2021-10-08)
 
 ### Features
 
@@ -154,6 +154,18 @@ You can also run them in parallel:
     tox -- -n auto tests/integ
 
 
+Git Hooks
+~~~~~~~~~
+
+to enable all git hooks in the .githooks directory, run these commands in the repository directory:
+
+::
+
+    find .git/hooks -type l -exec rm {} \;
+    find .githooks -type f -exec ln -sf ../../{} .git/hooks/ \;
+
+To enable an individual git hook, simply move it from the .githooks/ directory to the .git/hooks/ directory.
+
 Building Sphinx docs
 ~~~~~~~~~~~~~~~~~~~~
 
 
@@ -1 +1 @@
-2.60.1.dev0
+2.66.3.dev0
@@ -13,7 +13,9 @@
 from __future__ import absolute_import
 
 import os
+import re
 import time
+
 import boto3
 
 account = boto3.client(
@@ -23,9 +25,11 @@
 
 
 def queue_build():
-    build_id = os.environ.get("CODEBUILD_BUILD_ID", "CODEBUILD-BUILD-ID")
-    source_version = os.environ.get("CODEBUILD_SOURCE_VERSION", "CODEBUILD-SOURCE-VERSION").replace(
-        "/", "-"
+    build_id = re.sub("[_/]", "-", os.environ.get("CODEBUILD_BUILD_ID", "CODEBUILD-BUILD-ID"))
+    source_version = re.sub(
+        "[_/]",
+        "-",
+        os.environ.get("CODEBUILD_SOURCE_VERSION", "CODEBUILD-SOURCE-VERSION"),
     )
     ticket_number = int(1000 * time.time())
     filename = "%s_%s_%s" % (ticket_number, build_id, source_version)
 
@@ -265,6 +265,12 @@ Please use :func:`sagemaker.s3.parse_s3_url` instead.
 
 The class ``sagemaker.session.ModelContainer`` has been deprecated, as it is not needed for creating inference pipelines.
 
+``sagemaker.workflow.condition_step.JsonGet``
+---------------------------------------------
+
+The class ``sagemaker.workflow.condition_step.JsonGet`` has been deprecated.
+Please use :class:`sagemaker.workflow.functions.JsonGet` instead.
+
 Parameter and Class Name Changes
 ================================
 
 
@@ -31,6 +31,7 @@ def __init__(
         self,
         s3_data_input_path,
         s3_output_path,
+        s3_analysis_config_output_path=None,
         label=None,
         headers=None,
         features=None,
@@ -43,6 +44,9 @@ def __init__(
         Args:
             s3_data_input_path (str): Dataset S3 prefix/object URI.
             s3_output_path (str): S3 prefix to store the output.
+            s3_analysis_config_output_path (str): S3 prefix to store the analysis_config output
+                If this field is None, then the s3_output_path will be used
+                to store the analysis_config output
             label (str): Target attribute of the model required by bias metrics (optional for SHAP)
                 Specified as column name or index for CSV dataset, or as JSONPath for JSONLines.
             headers (list[str]): A list of column names in the input dataset.
@@ -61,6 +65,7 @@ def __init__(
             )
         self.s3_data_input_path = s3_data_input_path
         self.s3_output_path = s3_output_path
+        self.s3_analysis_config_output_path = s3_analysis_config_output_path
         self.s3_data_distribution_type = s3_data_distribution_type
         self.s3_compression_type = s3_compression_type
         self.label = label
@@ -300,12 +305,13 @@ class SHAPConfig(ExplainabilityConfig):
 
     def __init__(
         self,
-        baseline,
-        num_samples,
-        agg_method,
+        baseline=None,
+        num_samples=None,
+        agg_method=None,
         use_logit=False,
         save_local_shap_values=True,
         seed=None,
+        num_clusters=None,
     ):
         """Initializes config for SHAP.
 
@@ -315,34 +321,49 @@ def __init__(
                 be the same as the dataset format. Each row should contain only the feature
                 columns/values and omit the label column/values. If None a baseline will be
                 calculated automatically by using K-means or K-prototypes in the input dataset.
-            num_samples (int): Number of samples to be used in the Kernel SHAP algorithm.
+            num_samples (None or int): Number of samples to be used in the Kernel SHAP algorithm.
                 This number determines the size of the generated synthetic dataset to compute the
-                SHAP values.
-            agg_method (str): Aggregation method for global SHAP values. Valid values are
+                SHAP values. If not provided then Clarify job will choose a proper value according
+                to the count of features.
+            agg_method (None or str): Aggregation method for global SHAP values. Valid values are
                 "mean_abs" (mean of absolute SHAP values for all instances),
                 "median" (median of SHAP values for all instances) and
                 "mean_sq" (mean of squared SHAP values for all instances).
+                If not provided then Clarify job uses method "mean_abs"
             use_logit (bool): Indicator of whether the logit function is to be applied to the model
                 predictions. Default is False. If "use_logit" is true then the SHAP values will
                 have log-odds units.
             save_local_shap_values (bool): Indicator of whether to save the local SHAP values
                 in the output location. Default is True.
             seed (int): seed value to get deterministic SHAP values. Default is None.
+            num_clusters (None or int): If a baseline is not provided, Clarify automatically
+                computes a baseline dataset via a clustering algorithm (K-means/K-prototypes).
+                num_clusters is a parameter for this algorithm. num_clusters will be the resulting
+                size of the baseline dataset. If not provided, Clarify job will use a default value.
         """
-        if agg_method not in ["mean_abs", "median", "mean_sq"]:
+        if agg_method is not None and agg_method not in ["mean_abs", "median", "mean_sq"]:
             raise ValueError(
                 f"Invalid agg_method {agg_method}." f" Please choose mean_abs, median, or mean_sq."
             )
-
+        if num_clusters is not None and baseline is not None:
+            raise ValueError(
+                "Baseline and num_clusters cannot be provided together. "
+                "Please specify one of the two."
+            )
         self.shap_config = {
-            "baseline": baseline,
-            "num_samples": num_samples,
-            "agg_method": agg_method,
             "use_logit": use_logit,
             "save_local_shap_values": save_local_shap_values,
         }
+        if baseline is not None:
+            self.shap_config["baseline"] = baseline
+        if num_samples is not None:
+            self.shap_config["num_samples"] = num_samples
+        if agg_method is not None:
+            self.shap_config["agg_method"] = agg_method
         if seed is not None:
             self.shap_config["seed"] = seed
+        if num_clusters is not None:
+            self.shap_config["num_clusters"] = num_clusters
 
     def get_explainability_config(self):
         """Returns config."""
@@ -473,7 +494,7 @@ def _run(
                 json.dump(analysis_config, f)
             s3_analysis_config_file = _upload_analysis_config(
                 analysis_config_file,
-                data_config.s3_output_path,
+                data_config.s3_analysis_config_output_path or data_config.s3_output_path,
                 self.sagemaker_session,
                 kms_key,
             )
 
@@ -50,6 +50,46 @@ def renamed_warning(phrase):
     _warn(f"{phrase} has been renamed")
 
 
+def deprecation_warn(name, date, msg=None):
+    """Raise a warning for soon to be deprecated feature in sagemaker>=2
+
+    Args:
+        name (str): Name of the feature
+        date (str): the date when the feature will be deprecated
+        msg (str): the prefix phrase of the warning message.
+    """
+    _warn(f"{name} will be deprecated on {date}.{msg}")
+
+
+def deprecation_warning(date, msg=None):
+    """Decorator for raising deprecation warning for a feature in sagemaker>=2
+
+    Args:
+        date (str): the date when the feature will be deprecated
+        msg (str): the prefix phrase of the warning message.
+
+    Usage:
+        @deprecation_warning(msg="message", date="date")
+        def sample_function():
+            print("xxxx....")
+
+        @deprecation_warning(msg="message", date="date")
+        class SampleClass():
+            def __init__(self):
+                print("xxxx....")
+
+    """
+
+    def deprecate(obj):
+        def wrapper(*args, **kwargs):
+            deprecation_warn(obj.__name__, date, msg)
+            return obj(*args, **kwargs)
+
+        return wrapper
+
+    return deprecate
+
+
 def renamed_kwargs(old_name, new_name, value, kwargs):
     """Checks if the deprecated argument is in kwargs
 
@@ -106,6 +146,28 @@ def func(*args, **kwargs):  # pylint: disable=W0613
     return func
 
 
+def deprecated(obj):
+    """Decorator for raising deprecated warning for a feature in sagemaker>=2
+
+    Usage:
+        @deprecated
+        def sample_function():
+            print("xxxx....")
+
+        @deprecated
+        class SampleClass():
+            def __init__(self):
+                print("xxxx....")
+
+    """
+
+    def wrapper(*args, **kwargs):
+        removed_warning(obj.__name__)
+        return obj(*args, **kwargs)
+
+    return wrapper
+
+
 def deprecated_function(func, name):
     """Wrap a function with a deprecation warning.