Skip to content

Commit 3c51dfc

Browse files
authored
Merge branch 'master' into modernize-metadata
2 parents eb43677 + ea0d053 commit 3c51dfc

File tree

12 files changed

+99
-35
lines changed

12 files changed

+99
-35
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,22 @@
11
# Changelog
22

3+
## v2.124.0 (2022-12-16)
4+
5+
### Features
6+
7+
* Doc update for TableFormatEnum
8+
* Add p4de to smddp supported instance types
9+
* Add disable_profiler field in config and propagate changes
10+
* Added doc update for dataset builder
11+
12+
### Bug Fixes and Other Changes
13+
14+
* Use Async Inference Config when available for endpoint update
15+
16+
### Documentation Changes
17+
18+
* smdistributed libraries release notes
19+
320
## v2.123.0 (2022-12-15)
421

522
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.123.1.dev0
1+
2.124.1.dev0

doc/api/prep_data/feature_store.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ Inputs
7373
:members:
7474
:show-inheritance:
7575

76+
.. autoclass:: sagemaker.feature_store.inputs.TableFormatEnum
77+
:members:
78+
:show-inheritance:
79+
7680

7781
Dataset Builder
7882
***************

src/sagemaker/clarify.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -330,11 +330,11 @@ def __init__(
330330
s3_analysis_config_output_path (str): S3 prefix to store the analysis config output.
331331
If this field is None, then the ``s3_output_path`` will be used
332332
to store the ``analysis_config`` output.
333-
label (str): Target attribute of the model required by bias metrics.
334-
Specified as column name or index for CSV dataset or as JSONPath for JSONLines.
333+
label (str): Target attribute of the model required by bias metrics. Specified as
334+
column name or index for CSV dataset or as JMESPath expression for JSONLines.
335335
*Required parameter* except for when the input dataset does not contain the label.
336-
features (List[str]): JSONPath for locating the feature columns for bias metrics if the
337-
dataset format is JSONLines.
336+
features (List[str]): JMESPath expression to locate the feature columns for
337+
bias metrics if the dataset format is JSONLines.
338338
dataset_type (str): Format of the dataset. Valid values are ``"text/csv"`` for CSV,
339339
``"application/jsonlines"`` for JSONLines, and
340340
``"application/x-parquet"`` for Parquet.
@@ -716,11 +716,11 @@ def __init__(
716716
``label_headers=['cat','dog','fish']`` and infer the predicted label to be ``'fish'``.
717717
718718
Args:
719-
label (str or int): Index or JSONPath location in the model output for the prediction.
720-
In case, this is a predicted label of the same type as the label in the dataset,
721-
no further arguments need to be specified.
722-
probability (str or int): Index or JSONPath location in the model output
723-
for the predicted score(s).
719+
label (str or int): Index or JMESPath expression to locate the prediction
720+
in the model output. In case, this is a predicted label of the same type
721+
as the label in the dataset, no further arguments need to be specified.
722+
probability (str or int): Index or JMESPath expression to locate the predicted score(s)
723+
in the model output.
724724
probability_threshold (float): An optional value for binary prediction tasks in which
725725
the model returns a probability, to indicate the threshold to convert the
726726
prediction to a boolean value. Default is ``0.5``.
@@ -1645,9 +1645,9 @@ def run_explainability(
16451645
You can request multiple methods at once by passing in a list of
16461646
`~sagemaker.clarify.ExplainabilityConfig`.
16471647
model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
1648-
Index or JSONPath to locate the predicted scores in the model output. This is not
1649-
required if the model output is a single score. Alternatively, it can be an instance
1650-
of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
1648+
Index or JMESPath expression to locate the predicted scores in the model output.
1649+
This is not required if the model output is a single score. Alternatively,
1650+
it can be an instance of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
16511651
to provide more parameters like ``label_headers``.
16521652
wait (bool): Whether the call should wait until the job completes (default: True).
16531653
logs (bool): Whether to show the logs produced by the job.
@@ -1774,9 +1774,9 @@ def run_bias_and_explainability(
17741774
str or
17751775
:class:`~sagemaker.clarify.ModelPredictedLabelConfig`
17761776
):
1777-
Index or JSONPath to locate the predicted scores in the model output. This is not
1778-
required if the model output is a single score. Alternatively, it can be an instance
1779-
of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
1777+
Index or JMESPath expression to locate the predicted scores in the model output.
1778+
This is not required if the model output is a single score. Alternatively,
1779+
it can be an instance of :class:`~sagemaker.clarify.SageMakerClarifyProcessor`
17801780
to provide more parameters like ``label_headers``.
17811781
wait (bool): Whether the call should wait until the job completes (default: True).
17821782
logs (bool): Whether to show the logs produced by the job.

src/sagemaker/model_monitor/clarify_model_monitoring.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -842,8 +842,8 @@ def __init__(self, bias_config, headers=None, label=None):
842842
bias_config (sagemaker.clarify.BiasConfig): Config object related to bias
843843
configurations.
844844
headers (list[str]): A list of column names in the input dataset.
845-
label (str): Target attribute for the model required by bias metrics.
846-
Specified as column name or index for CSV dataset, or as JSONPath for JSONLines.
845+
label (str): Target attribute for the model required by bias metrics. Specified as
846+
column name or index for CSV dataset, or as JMESPath expression for JSONLines.
847847
"""
848848
self.analysis_config = bias_config.get_config()
849849
if headers is not None:
@@ -889,9 +889,10 @@ def suggest_baseline(
889889
model_config (:class:`~sagemaker.clarify.ModelConfig`): Config of the model and its
890890
endpoint to be created.
891891
model_scores (int or str or :class:`~sagemaker.clarify.ModelPredictedLabelConfig`):
892-
Index or JSONPath to locate the predicted scores in the model output. This is not
893-
required if the model output is a single score. Alternatively, it can be an instance
894-
of ModelPredictedLabelConfig to provide more parameters like label_headers.
892+
Index or JMESPath expression to locate the predicted scores in the model output.
893+
This is not required if the model output is a single score. Alternatively,
894+
it can be an instance of ModelPredictedLabelConfig to provide more parameters
895+
like label_headers.
895896
wait (bool): Whether the call should wait until the job completes (default: False).
896897
logs (bool): Whether to show the logs produced by the job.
897898
Only meaningful when wait is True (default: False).
@@ -1302,12 +1303,12 @@ def __init__(
13021303
Args:
13031304
analysis_config (BiasAnalysisConfig or ExplainabilityAnalysisConfig): analysis config
13041305
from configurations of the baselining job.
1305-
features_attribute (str): JSONpath to locate features in predictor request payload.
1306-
Only required when predictor content type is JSONlines.
1307-
inference_attribute (str): Index, header or JSONpath to locate predicted label in
1308-
predictor response payload.
1309-
probability_attribute (str): Index or JSONpath location in the model output for
1310-
probabilities or scores to be used for explainability.
1306+
features_attribute (str): JMESPath expression to locate features in predictor request
1307+
payload. Only required when predictor content type is JSONlines.
1308+
inference_attribute (str): Index, header or JMESPath expression to locate predicted
1309+
label in predictor response payload.
1310+
probability_attribute (str): Index or JMESPath expression to locate probabilities or
1311+
scores in the model output for computing feature attribution.
13111312
probability_threshold_attribute (float): Value to indicate the threshold to select
13121313
the binary label in the case of binary classification. Default is 0.5.
13131314
"""

src/sagemaker/model_monitor/model_monitoring.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,12 +1061,13 @@ def _generate_env_map(
10611061
dataset_format (dict): The format of the baseline_dataset.
10621062
dataset_source_container_path (str): The path to the dataset source.
10631063
inference_attribute (str): Index or JSONpath to locate predicted label(s).
1064-
Only used for ModelQualityMonitor, ModelBiasMonitor, and ModelExplainabilityMonitor
1064+
Only used for ModelQualityMonitor.
10651065
probability_attribute (str or int): Index or JSONpath to locate probabilities.
1066-
Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
1067-
ground_truth_attribute (str): Index or JSONpath to locate actual label(s).
1066+
Only used for ModelQualityMonitor.
1067+
ground_truth_attribute (str): Index to locate actual label(s).
1068+
Only used for ModelQualityMonitor.
10681069
probability_threshold_attribute (float): threshold to convert probabilities to binaries
1069-
Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
1070+
Only used for ModelQualityMonitor.
10701071
10711072
Returns:
10721073
dict: Dictionary of environment keys and values.
@@ -2600,10 +2601,13 @@ def suggest_baseline(
26002601
problem_type (str): The type of problem of this model quality monitoring. Valid
26012602
values are "Regression", "BinaryClassification", "MulticlassClassification".
26022603
inference_attribute (str): Index or JSONpath to locate predicted label(s).
2604+
Only used for ModelQualityMonitor.
26032605
probability_attribute (str or int): Index or JSONpath to locate probabilities.
2604-
ground_truth_attribute (str): Index or JSONpath to locate actual label(s).
2606+
Only used for ModelQualityMonitor.
2607+
ground_truth_attribute (str): Index to locate actual label(s).
2608+
Only used for ModelQualityMonitor.
26052609
probability_threshold_attribute (float): threshold to convert probabilities to binaries
2606-
Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
2610+
Only used for ModelQualityMonitor.
26072611
post_analytics_processor_script (str): The path to the record post-analytics processor
26082612
script. This can be a local path or an S3 uri.
26092613
output_s3_uri (str): Desired S3 destination Destination of the constraint_violations

src/sagemaker/session.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,6 +2146,7 @@ def tune( # noqa: C901
21462146
use_spot_instances=False,
21472147
checkpoint_s3_uri=None,
21482148
checkpoint_local_path=None,
2149+
random_seed=None,
21492150
):
21502151
"""Create an Amazon SageMaker hyperparameter tuning job.
21512152
@@ -2226,6 +2227,9 @@ def tune( # noqa: C901
22262227
started. If the path is unset then SageMaker assumes the
22272228
checkpoints will be provided under `/opt/ml/checkpoints/`.
22282229
(default: ``None``).
2230+
random_seed (int): An initial value used to initialize a pseudo-random number generator.
2231+
Setting a random seed will make the hyperparameter tuning search strategies to
2232+
produce more consistent configurations for the same tuning job. (default: ``None``).
22292233
"""
22302234

22312235
tune_request = {
@@ -2238,6 +2242,7 @@ def tune( # noqa: C901
22382242
objective_metric_name=objective_metric_name,
22392243
parameter_ranges=parameter_ranges,
22402244
early_stopping_type=early_stopping_type,
2245+
random_seed=random_seed,
22412246
strategy_config=strategy_config,
22422247
),
22432248
"TrainingJobDefinition": self._map_training_config(
@@ -2394,6 +2399,7 @@ def _map_tuning_config(
23942399
objective_type=None,
23952400
objective_metric_name=None,
23962401
parameter_ranges=None,
2402+
random_seed=None,
23972403
strategy_config=None,
23982404
):
23992405
"""Construct tuning job configuration dictionary.
@@ -2412,6 +2418,9 @@ def _map_tuning_config(
24122418
objective_metric_name (str): Name of the metric for evaluating training jobs.
24132419
parameter_ranges (dict): Dictionary of parameter ranges. These parameter ranges can
24142420
be one of three types: Continuous, Integer, or Categorical.
2421+
random_seed (int): An initial value used to initialize a pseudo-random number generator.
2422+
Setting a random seed will make the hyperparameter tuning search strategies to
2423+
produce more consistent configurations for the same tuning job.
24152424
strategy_config (dict): A configuration for the hyperparameter tuning job optimisation
24162425
strategy.
24172426
@@ -2430,6 +2439,9 @@ def _map_tuning_config(
24302439
"TrainingJobEarlyStoppingType": early_stopping_type,
24312440
}
24322441

2442+
if random_seed is not None:
2443+
tuning_config["RandomSeed"] = random_seed
2444+
24332445
tuning_objective = cls._map_tuning_objective(objective_type, objective_metric_name)
24342446
if tuning_objective is not None:
24352447
tuning_config["HyperParameterTuningJobObjective"] = tuning_objective

src/sagemaker/tuner.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,7 @@ def __init__(
413413
strategy_config: Optional[StrategyConfig] = None,
414414
early_stopping_type: Union[str, PipelineVariable] = "Off",
415415
estimator_name: Optional[str] = None,
416+
random_seed: Optional[int] = None,
416417
):
417418
"""Creates a ``HyperparameterTuner`` instance.
418419
@@ -470,6 +471,9 @@ def __init__(
470471
estimator_name (str): A unique name to identify an estimator within the
471472
hyperparameter tuning job, when more than one estimator is used with
472473
the same tuning job (default: None).
474+
random_seed (int): An initial value used to initialize a pseudo-random number generator.
475+
Setting a random seed will make the hyperparameter tuning search strategies to
476+
produce more consistent configurations for the same tuning job.
473477
"""
474478
if hyperparameter_ranges is None or len(hyperparameter_ranges) == 0:
475479
raise ValueError("Need to specify hyperparameter ranges")
@@ -516,6 +520,7 @@ def __init__(
516520
self.latest_tuning_job = None
517521
self.warm_start_config = warm_start_config
518522
self.early_stopping_type = early_stopping_type
523+
self.random_seed = random_seed
519524

520525
def _prepare_for_tuning(self, job_name=None, include_cls_metadata=False):
521526
"""Prepare the tuner instance for tuning (fit)."""
@@ -1222,6 +1227,9 @@ def _prepare_init_params_from_job_description(cls, job_details):
12221227
"base_tuning_job_name": base_from_name(job_details["HyperParameterTuningJobName"]),
12231228
}
12241229

1230+
if "RandomSeed" in tuning_config:
1231+
params["random_seed"] = tuning_config["RandomSeed"]
1232+
12251233
if "HyperParameterTuningJobObjective" in tuning_config:
12261234
params["objective_metric_name"] = tuning_config["HyperParameterTuningJobObjective"][
12271235
"MetricName"
@@ -1483,6 +1491,7 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
14831491
warm_start_type=warm_start_type, parents=all_parents
14841492
),
14851493
early_stopping_type=self.early_stopping_type,
1494+
random_seed=self.random_seed,
14861495
)
14871496

14881497
if len(self.estimator_dict) > 1:
@@ -1508,6 +1517,7 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
15081517
max_parallel_jobs=self.max_parallel_jobs,
15091518
warm_start_config=WarmStartConfig(warm_start_type=warm_start_type, parents=all_parents),
15101519
early_stopping_type=self.early_stopping_type,
1520+
random_seed=self.random_seed,
15111521
)
15121522

15131523
@classmethod
@@ -1526,6 +1536,7 @@ def create(
15261536
tags=None,
15271537
warm_start_config=None,
15281538
early_stopping_type="Off",
1539+
random_seed=None,
15291540
):
15301541
"""Factory method to create a ``HyperparameterTuner`` instance.
15311542
@@ -1586,6 +1597,9 @@ def create(
15861597
Can be either 'Auto' or 'Off' (default: 'Off'). If set to 'Off', early stopping
15871598
will not be attempted. If set to 'Auto', early stopping of some training jobs may
15881599
happen, but is not guaranteed to.
1600+
random_seed (int): An initial value used to initialize a pseudo-random number generator.
1601+
Setting a random seed will make the hyperparameter tuning search strategies to
1602+
produce more consistent configurations for the same tuning job.
15891603
15901604
Returns:
15911605
sagemaker.tuner.HyperparameterTuner: a new ``HyperparameterTuner`` object that can
@@ -1624,6 +1638,7 @@ def create(
16241638
tags=tags,
16251639
warm_start_config=warm_start_config,
16261640
early_stopping_type=early_stopping_type,
1641+
random_seed=random_seed,
16271642
)
16281643

16291644
for estimator_name in estimator_names[1:]:
@@ -1775,6 +1790,9 @@ def _get_tuner_args(cls, tuner, inputs):
17751790
"early_stopping_type": tuner.early_stopping_type,
17761791
}
17771792

1793+
if tuner.random_seed is not None:
1794+
tuning_config["random_seed"] = tuner.random_seed
1795+
17781796
if tuner.strategy_config is not None:
17791797
tuning_config["strategy_config"] = tuner.strategy_config.to_input_req()
17801798

src/sagemaker/workflow/clarify_check_step.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ class ModelExplainabilityCheckConfig(ClarifyCheckConfig):
132132
model_config (ModelConfig): Config of the model and its endpoint to be created.
133133
explainability_config (SHAPConfig): Config of the specific explainability method.
134134
Currently, only SHAP is supported.
135-
model_scores (str or int or ModelPredictedLabelConfig): Index or JSONPath location
136-
in the model output for the predicted scores to be explained (default: None).
135+
model_scores (str or int or ModelPredictedLabelConfig): Index or JMESPath expression
136+
to locate the predicted scores in the model output (default: None).
137137
This is not required if the model output is a single score. Alternatively,
138138
an instance of ModelPredictedLabelConfig can be provided
139139
but this field CANNOT be any type of the `PipelineVariable`.

0 commit comments

Comments
 (0)