Merge branch 'master' into modernize-metadata

navinsoni · web-flow · commit 3c51dfc03ba3 · 2022-12-19T10:16:19.000-08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Changelog
 
+## v2.124.0 (2022-12-16)
+
+### Features
+
+ * Doc update for TableFormatEnum
+ * Add p4de to smddp supported instance types
+ * Add disable_profiler field in config and propagate changes
+ * Added doc update for dataset builder
+
+### Bug Fixes and Other Changes
+
+ * Use Async Inference Config when available for endpoint update
+
+### Documentation Changes
+
+ * smdistributed libraries release notes
+
 ## v2.123.0 (2022-12-15)
 
 ### Features
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.123.1.dev0
+2.124.1.dev0
diff --git a/doc/api/prep_data/feature_store.rst b/doc/api/prep_data/feature_store.rst
@@ -73,6 +73,10 @@ Inputs
     :members:
     :show-inheritance:
 
+.. autoclass:: sagemaker.feature_store.inputs.TableFormatEnum
+    :members:
+    :show-inheritance:
+
 
 Dataset Builder
 ***************
diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py
@@ -330,11 +330,11 @@ def __init__(
             s3_analysis_config_output_path (str): S3 prefix to store the analysis config output.
                 If this field is None, then the ``s3_output_path`` will be used
                 to store the ``analysis_config`` output.
-            label (str): Target attribute of the model required by bias metrics.
-                Specified as column name or index for CSV dataset or as JSONPath for JSONLines.
+            label (str): Target attribute of the model required by bias metrics. Specified as
+                column name or index for CSV dataset or as JMESPath expression for JSONLines.
                 *Required parameter* except for when the input dataset does not contain the label.
-            features (List[str]): JSONPath for locating the feature columns for bias metrics if the
-                dataset format is JSONLines.
+            features (List[str]): JMESPath expression to locate the feature columns for
+                bias metrics if the dataset format is JSONLines.
             dataset_type (str): Format of the dataset. Valid values are ``"text/csv"`` for CSV,
                 ``"application/jsonlines"`` for JSONLines, and
                 ``"application/x-parquet"`` for Parquet.
@@ -716,11 +716,11 @@ def __init__(
             ``label_headers=['cat','dog','fish']`` and infer the predicted label to be ``'fish'``.
 
         Args:
-            label (str or int): Index or JSONPath location in the model output for the prediction.
-                In case, this is a predicted label of the same type as the label in the dataset,
-                no further arguments need to be specified.
-            probability (str or int): Index or JSONPath location in the model output
-                for the predicted score(s).
+            label (str or int): Index or JMESPath expression to locate the prediction
+                in the model output. In case, this is a predicted label of the same type
+                as the label in the dataset, no further arguments need to be specified.
+            probability (str or int): Index or JMESPath expression to locate the predicted score(s)
+                in the model output.
             probability_threshold (float): An optional value for binary prediction tasks in which
                 the model returns a probability, to indicate the threshold to convert the
                 prediction to a boolean value. Default is ``0.5``.
@@ -1645,9 +1645,9 @@ def run_explainability(
                 You can request multiple methods at once by passing in a list of
                 `~sagemaker.clarify.ExplainabilityConfig`.
             model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
-                Index or JSONPath to locate the predicted scores in the model output. This is not
-                required if the model output is a single score. Alternatively, it can be an instance
-                of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
+                Index or JMESPath expression to locate the predicted scores in the model output.
+                This is not required if the model output is a single score. Alternatively,
+                it can be an instance of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
                 to provide more parameters like ``label_headers``.
             wait (bool): Whether the call should wait until the job completes (default: True).
             logs (bool): Whether to show the logs produced by the job.
@@ -1774,9 +1774,9 @@ def run_bias_and_explainability(
                 str or
                 :class:`~sagemaker.clarify.ModelPredictedLabelConfig`
             ):
-                Index or JSONPath to locate the predicted scores in the model output. This is not
-                required if the model output is a single score. Alternatively, it can be an instance
-                of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
+                Index or JMESPath expression to locate the predicted scores in the model output.
+                This is not required if the model output is a single score. Alternatively,
+                it can be an instance of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
                 to provide more parameters like ``label_headers``.
             wait (bool): Whether the call should wait until the job completes (default: True).
             logs (bool): Whether to show the logs produced by the job.
diff --git a/src/sagemaker/model_monitor/clarify_model_monitoring.py b/src/sagemaker/model_monitor/clarify_model_monitoring.py
@@ -842,8 +842,8 @@ def __init__(self, bias_config, headers=None, label=None):
             bias_config (sagemaker.clarify.BiasConfig): Config object related to bias
                 configurations.
             headers (list[str]): A list of column names in the input dataset.
-            label (str): Target attribute for the model required by bias metrics.
-                Specified as column name or index for CSV dataset, or as JSONPath for JSONLines.
+            label (str): Target attribute for the model required by bias metrics. Specified as
+                column name or index for CSV dataset, or as JMESPath expression for JSONLines.
         """
         self.analysis_config = bias_config.get_config()
         if headers is not None:
@@ -889,9 +889,10 @@ def suggest_baseline(
             model_config (:class:`~sagemaker.clarify.ModelConfig`): Config of the model and its
                 endpoint to be created.
             model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
-                Index or JSONPath to locate the predicted scores in the model output. This is not
-                required if the model output is a single score. Alternatively, it can be an instance
-                of ModelPredictedLabelConfig to provide more parameters like label_headers.
+                Index or JMESPath expression to locate the predicted scores in the model output.
+                This is not required if the model output is a single score. Alternatively,
+                it can be an instance of ModelPredictedLabelConfig to provide more parameters
+                like label_headers.
             wait (bool): Whether the call should wait until the job completes (default: False).
             logs (bool): Whether to show the logs produced by the job.
                 Only meaningful when wait is True (default: False).
@@ -1302,12 +1303,12 @@ def __init__(
         Args:
             analysis_config (BiasAnalysisConfig or ExplainabilityAnalysisConfig): analysis config
                 from configurations of the baselining job.
-            features_attribute (str): JSONpath to locate features in predictor request payload.
-                Only required when predictor content type is JSONlines.
-            inference_attribute (str): Index, header or JSONpath to locate predicted label in
-                predictor response payload.
-            probability_attribute (str): Index or JSONpath location in the model output for
-                probabilities or scores to be used for explainability.
+            features_attribute (str): JMESPath expression to locate features in predictor request
+                payload. Only required when predictor content type is JSONlines.
+            inference_attribute (str): Index, header or JMESPath expression to locate predicted
+                label in predictor response payload.
+            probability_attribute (str): Index or JMESPath expression to locate probabilities or
+                scores in the model output for computing feature attribution.
             probability_threshold_attribute (float): Value to indicate the threshold to select
                 the binary label in the case of binary classification. Default is 0.5.
         """
diff --git a/src/sagemaker/model_monitor/model_monitoring.py b/src/sagemaker/model_monitor/model_monitoring.py
@@ -1061,12 +1061,13 @@ def _generate_env_map(
             dataset_format (dict): The format of the baseline_dataset.
             dataset_source_container_path (str): The path to the dataset source.
             inference_attribute (str): Index or JSONpath to locate predicted label(s).
-                Only used for ModelQualityMonitor, ModelBiasMonitor, and ModelExplainabilityMonitor
+                Only used for ModelQualityMonitor.
             probability_attribute (str or int): Index or JSONpath to locate probabilities.
-                Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
-            ground_truth_attribute (str): Index or JSONpath to locate actual label(s).
+                Only used for ModelQualityMonitor.
+            ground_truth_attribute (str): Index to locate actual label(s).
+                Only used for ModelQualityMonitor.
             probability_threshold_attribute (float): threshold to convert probabilities to binaries
-                Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
+                Only used for ModelQualityMonitor.
 
         Returns:
             dict: Dictionary of environment keys and values.
@@ -2600,10 +2601,13 @@ def suggest_baseline(
             problem_type (str): The type of problem of this model quality monitoring. Valid
                 values are "Regression", "BinaryClassification", "MulticlassClassification".
             inference_attribute (str): Index or JSONpath to locate predicted label(s).
+                Only used for ModelQualityMonitor.
             probability_attribute (str or int): Index or JSONpath to locate probabilities.
-            ground_truth_attribute (str): Index or JSONpath to locate actual label(s).
+                Only used for ModelQualityMonitor.
+            ground_truth_attribute (str): Index to locate actual label(s).
+                Only used for ModelQualityMonitor.
             probability_threshold_attribute (float): threshold to convert probabilities to binaries
-                Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
+                Only used for ModelQualityMonitor.
             post_analytics_processor_script (str): The path to the record post-analytics processor
                 script. This can be a local path or an S3 uri.
             output_s3_uri (str): Desired S3 destination Destination of the constraint_violations
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -2146,6 +2146,7 @@ def tune(  # noqa: C901
         use_spot_instances=False,
         checkpoint_s3_uri=None,
         checkpoint_local_path=None,
+        random_seed=None,
     ):
         """Create an Amazon SageMaker hyperparameter tuning job.
 
@@ -2226,6 +2227,9 @@ def tune(  # noqa: C901
                 started. If the path is unset then SageMaker assumes the
                 checkpoints will be provided under `/opt/ml/checkpoints/`.
                 (default: ``None``).
+            random_seed (int): An initial value used to initialize a pseudo-random number generator.
+                Setting a random seed will make the hyperparameter tuning search strategies to
+                produce more consistent configurations for the same tuning job. (default: ``None``).
         """
 
         tune_request = {
@@ -2238,6 +2242,7 @@ def tune(  # noqa: C901
                 objective_metric_name=objective_metric_name,
                 parameter_ranges=parameter_ranges,
                 early_stopping_type=early_stopping_type,
+                random_seed=random_seed,
                 strategy_config=strategy_config,
             ),
             "TrainingJobDefinition": self._map_training_config(
@@ -2394,6 +2399,7 @@ def _map_tuning_config(
         objective_type=None,
         objective_metric_name=None,
         parameter_ranges=None,
+        random_seed=None,
         strategy_config=None,
     ):
         """Construct tuning job configuration dictionary.
@@ -2412,6 +2418,9 @@ def _map_tuning_config(
             objective_metric_name (str): Name of the metric for evaluating training jobs.
             parameter_ranges (dict): Dictionary of parameter ranges. These parameter ranges can
                 be one of three types: Continuous, Integer, or Categorical.
+            random_seed (int): An initial value used to initialize a pseudo-random number generator.
+                Setting a random seed will make the hyperparameter tuning search strategies to
+                produce more consistent configurations for the same tuning job.
             strategy_config (dict): A configuration for the hyperparameter tuning job optimisation
                 strategy.
 
@@ -2430,6 +2439,9 @@ def _map_tuning_config(
             "TrainingJobEarlyStoppingType": early_stopping_type,
         }
 
+        if random_seed is not None:
+            tuning_config["RandomSeed"] = random_seed
+
         tuning_objective = cls._map_tuning_objective(objective_type, objective_metric_name)
         if tuning_objective is not None:
             tuning_config["HyperParameterTuningJobObjective"] = tuning_objective
diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py
@@ -413,6 +413,7 @@ def __init__(
         strategy_config: Optional[StrategyConfig] = None,
         early_stopping_type: Union[str, PipelineVariable] = "Off",
         estimator_name: Optional[str] = None,
+        random_seed: Optional[int] = None,
     ):
         """Creates a ``HyperparameterTuner`` instance.
 
@@ -470,6 +471,9 @@ def __init__(
             estimator_name (str): A unique name to identify an estimator within the
                 hyperparameter tuning job, when more than one estimator is used with
                 the same tuning job (default: None).
+            random_seed (int): An initial value used to initialize a pseudo-random number generator.
+                Setting a random seed will make the hyperparameter tuning search strategies to
+                produce more consistent configurations for the same tuning job.
         """
         if hyperparameter_ranges is None or len(hyperparameter_ranges) == 0:
             raise ValueError("Need to specify hyperparameter ranges")
@@ -516,6 +520,7 @@ def __init__(
         self.latest_tuning_job = None
         self.warm_start_config = warm_start_config
         self.early_stopping_type = early_stopping_type
+        self.random_seed = random_seed
 
     def _prepare_for_tuning(self, job_name=None, include_cls_metadata=False):
         """Prepare the tuner instance for tuning (fit)."""
@@ -1222,6 +1227,9 @@ def _prepare_init_params_from_job_description(cls, job_details):
             "base_tuning_job_name": base_from_name(job_details["HyperParameterTuningJobName"]),
         }
 
+        if "RandomSeed" in tuning_config:
+            params["random_seed"] = tuning_config["RandomSeed"]
+
         if "HyperParameterTuningJobObjective" in tuning_config:
             params["objective_metric_name"] = tuning_config["HyperParameterTuningJobObjective"][
                 "MetricName"
@@ -1483,6 +1491,7 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
                     warm_start_type=warm_start_type, parents=all_parents
                 ),
                 early_stopping_type=self.early_stopping_type,
+                random_seed=self.random_seed,
             )
 
         if len(self.estimator_dict) > 1:
@@ -1508,6 +1517,7 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
             max_parallel_jobs=self.max_parallel_jobs,
             warm_start_config=WarmStartConfig(warm_start_type=warm_start_type, parents=all_parents),
             early_stopping_type=self.early_stopping_type,
+            random_seed=self.random_seed,
         )
 
     @classmethod
@@ -1526,6 +1536,7 @@ def create(
         tags=None,
         warm_start_config=None,
         early_stopping_type="Off",
+        random_seed=None,
     ):
         """Factory method to create a ``HyperparameterTuner`` instance.
 
@@ -1586,6 +1597,9 @@ def create(
                 Can be either 'Auto' or 'Off' (default: 'Off'). If set to 'Off', early stopping
                 will not be attempted. If set to 'Auto', early stopping of some training jobs may
                 happen, but is not guaranteed to.
+            random_seed (int): An initial value used to initialize a pseudo-random number generator.
+                Setting a random seed will make the hyperparameter tuning search strategies to
+                produce more consistent configurations for the same tuning job.
 
         Returns:
             sagemaker.tuner.HyperparameterTuner: a new ``HyperparameterTuner`` object that can
@@ -1624,6 +1638,7 @@ def create(
             tags=tags,
             warm_start_config=warm_start_config,
             early_stopping_type=early_stopping_type,
+            random_seed=random_seed,
         )
 
         for estimator_name in estimator_names[1:]:
@@ -1775,6 +1790,9 @@ def _get_tuner_args(cls, tuner, inputs):
             "early_stopping_type": tuner.early_stopping_type,
         }
 
+        if tuner.random_seed is not None:
+            tuning_config["random_seed"] = tuner.random_seed
+
         if tuner.strategy_config is not None:
             tuning_config["strategy_config"] = tuner.strategy_config.to_input_req()
 
diff --git a/src/sagemaker/workflow/clarify_check_step.py b/src/sagemaker/workflow/clarify_check_step.py
@@ -132,8 +132,8 @@ class ModelExplainabilityCheckConfig(ClarifyCheckConfig):
         model_config (ModelConfig): Config of the model and its endpoint to be created.
         explainability_config (SHAPConfig): Config of the specific explainability method.
             Currently, only SHAP is supported.
-        model_scores (str or int or ModelPredictedLabelConfig): Index or JSONPath location
-            in the model output for the predicted scores to be explained (default: None).
+        model_scores (str or int or ModelPredictedLabelConfig): Index or JMESPath expression
+            to locate the predicted scores in the model output (default: None).
             This is not required if the model output is a single score. Alternatively,
             an instance of ModelPredictedLabelConfig can be provided
             but this field CANNOT be any type of the `PipelineVariable`.
diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py
diff --git a/tests/unit/test_tuner.py b/tests/unit/test_tuner.py
diff --git a/tests/unit/tuner_test_utils.py b/tests/unit/tuner_test_utils.py