Skip to content

Commit ea0d053

Browse files
timyberTim Songtrajanikant
authored
feature: add RandomSeed to support reproducible HPO (#3519)
* feature: add RandomSeed to support reproducible HPO * fix pylint Co-authored-by: Tim Song <[email protected]> Co-authored-by: Rajanikant Tenguria <[email protected]>
1 parent 53108b6 commit ea0d053

File tree

5 files changed

+38
-0
lines changed

5 files changed

+38
-0
lines changed

src/sagemaker/session.py

+12
Original file line numberDiff line numberDiff line change
@@ -2146,6 +2146,7 @@ def tune( # noqa: C901
21462146
use_spot_instances=False,
21472147
checkpoint_s3_uri=None,
21482148
checkpoint_local_path=None,
2149+
random_seed=None,
21492150
):
21502151
"""Create an Amazon SageMaker hyperparameter tuning job.
21512152
@@ -2226,6 +2227,9 @@ def tune( # noqa: C901
22262227
started. If the path is unset then SageMaker assumes the
22272228
checkpoints will be provided under `/opt/ml/checkpoints/`.
22282229
(default: ``None``).
2230+
random_seed (int): An initial value used to initialize a pseudo-random number generator.
2231+
Setting a random seed will make the hyperparameter tuning search strategies to
2232+
produce more consistent configurations for the same tuning job. (default: ``None``).
22292233
"""
22302234

22312235
tune_request = {
@@ -2238,6 +2242,7 @@ def tune( # noqa: C901
22382242
objective_metric_name=objective_metric_name,
22392243
parameter_ranges=parameter_ranges,
22402244
early_stopping_type=early_stopping_type,
2245+
random_seed=random_seed,
22412246
strategy_config=strategy_config,
22422247
),
22432248
"TrainingJobDefinition": self._map_training_config(
@@ -2394,6 +2399,7 @@ def _map_tuning_config(
23942399
objective_type=None,
23952400
objective_metric_name=None,
23962401
parameter_ranges=None,
2402+
random_seed=None,
23972403
strategy_config=None,
23982404
):
23992405
"""Construct tuning job configuration dictionary.
@@ -2412,6 +2418,9 @@ def _map_tuning_config(
24122418
objective_metric_name (str): Name of the metric for evaluating training jobs.
24132419
parameter_ranges (dict): Dictionary of parameter ranges. These parameter ranges can
24142420
be one of three types: Continuous, Integer, or Categorical.
2421+
random_seed (int): An initial value used to initialize a pseudo-random number generator.
2422+
Setting a random seed will make the hyperparameter tuning search strategies to
2423+
produce more consistent configurations for the same tuning job.
24152424
strategy_config (dict): A configuration for the hyperparameter tuning job optimisation
24162425
strategy.
24172426
@@ -2430,6 +2439,9 @@ def _map_tuning_config(
24302439
"TrainingJobEarlyStoppingType": early_stopping_type,
24312440
}
24322441

2442+
if random_seed is not None:
2443+
tuning_config["RandomSeed"] = random_seed
2444+
24332445
tuning_objective = cls._map_tuning_objective(objective_type, objective_metric_name)
24342446
if tuning_objective is not None:
24352447
tuning_config["HyperParameterTuningJobObjective"] = tuning_objective

src/sagemaker/tuner.py

+18
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,7 @@ def __init__(
413413
strategy_config: Optional[StrategyConfig] = None,
414414
early_stopping_type: Union[str, PipelineVariable] = "Off",
415415
estimator_name: Optional[str] = None,
416+
random_seed: Optional[int] = None,
416417
):
417418
"""Creates a ``HyperparameterTuner`` instance.
418419
@@ -470,6 +471,9 @@ def __init__(
470471
estimator_name (str): A unique name to identify an estimator within the
471472
hyperparameter tuning job, when more than one estimator is used with
472473
the same tuning job (default: None).
474+
random_seed (int): An initial value used to initialize a pseudo-random number generator.
475+
Setting a random seed will make the hyperparameter tuning search strategies to
476+
produce more consistent configurations for the same tuning job.
473477
"""
474478
if hyperparameter_ranges is None or len(hyperparameter_ranges) == 0:
475479
raise ValueError("Need to specify hyperparameter ranges")
@@ -516,6 +520,7 @@ def __init__(
516520
self.latest_tuning_job = None
517521
self.warm_start_config = warm_start_config
518522
self.early_stopping_type = early_stopping_type
523+
self.random_seed = random_seed
519524

520525
def _prepare_for_tuning(self, job_name=None, include_cls_metadata=False):
521526
"""Prepare the tuner instance for tuning (fit)."""
@@ -1222,6 +1227,9 @@ def _prepare_init_params_from_job_description(cls, job_details):
12221227
"base_tuning_job_name": base_from_name(job_details["HyperParameterTuningJobName"]),
12231228
}
12241229

1230+
if "RandomSeed" in tuning_config:
1231+
params["random_seed"] = tuning_config["RandomSeed"]
1232+
12251233
if "HyperParameterTuningJobObjective" in tuning_config:
12261234
params["objective_metric_name"] = tuning_config["HyperParameterTuningJobObjective"][
12271235
"MetricName"
@@ -1483,6 +1491,7 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
14831491
warm_start_type=warm_start_type, parents=all_parents
14841492
),
14851493
early_stopping_type=self.early_stopping_type,
1494+
random_seed=self.random_seed,
14861495
)
14871496

14881497
if len(self.estimator_dict) > 1:
@@ -1508,6 +1517,7 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
15081517
max_parallel_jobs=self.max_parallel_jobs,
15091518
warm_start_config=WarmStartConfig(warm_start_type=warm_start_type, parents=all_parents),
15101519
early_stopping_type=self.early_stopping_type,
1520+
random_seed=self.random_seed,
15111521
)
15121522

15131523
@classmethod
@@ -1526,6 +1536,7 @@ def create(
15261536
tags=None,
15271537
warm_start_config=None,
15281538
early_stopping_type="Off",
1539+
random_seed=None,
15291540
):
15301541
"""Factory method to create a ``HyperparameterTuner`` instance.
15311542
@@ -1586,6 +1597,9 @@ def create(
15861597
Can be either 'Auto' or 'Off' (default: 'Off'). If set to 'Off', early stopping
15871598
will not be attempted. If set to 'Auto', early stopping of some training jobs may
15881599
happen, but is not guaranteed to.
1600+
random_seed (int): An initial value used to initialize a pseudo-random number generator.
1601+
Setting a random seed will make the hyperparameter tuning search strategies to
1602+
produce more consistent configurations for the same tuning job.
15891603
15901604
Returns:
15911605
sagemaker.tuner.HyperparameterTuner: a new ``HyperparameterTuner`` object that can
@@ -1624,6 +1638,7 @@ def create(
16241638
tags=tags,
16251639
warm_start_config=warm_start_config,
16261640
early_stopping_type=early_stopping_type,
1641+
random_seed=random_seed,
16271642
)
16281643

16291644
for estimator_name in estimator_names[1:]:
@@ -1775,6 +1790,9 @@ def _get_tuner_args(cls, tuner, inputs):
17751790
"early_stopping_type": tuner.early_stopping_type,
17761791
}
17771792

1793+
if tuner.random_seed is not None:
1794+
tuning_config["random_seed"] = tuner.random_seed
1795+
17781796
if tuner.strategy_config is not None:
17791797
tuning_config["strategy_config"] = tuner.strategy_config.to_input_req()
17801798

tests/unit/test_session.py

+6
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,7 @@ def test_train_pack_to_request(sagemaker_session):
897897
"ResourceLimits": {"MaxNumberOfTrainingJobs": 100, "MaxParallelTrainingJobs": 5},
898898
"ParameterRanges": SAMPLE_PARAM_RANGES,
899899
"TrainingJobEarlyStoppingType": "Off",
900+
"RandomSeed": 0,
900901
},
901902
"TrainingJobDefinition": {
902903
"StaticHyperParameters": STATIC_HPs,
@@ -989,6 +990,7 @@ def assert_create_tuning_job_request(**kwrags):
989990
sagemaker_session.tune(
990991
job_name="dummy-tuning-1",
991992
strategy="Bayesian",
993+
random_seed=0,
992994
objective_type="Maximize",
993995
objective_metric_name="val-score",
994996
max_jobs=100,
@@ -1080,6 +1082,7 @@ def assert_create_tuning_job_request(**kwrags):
10801082
"max_jobs": 100,
10811083
"max_parallel_jobs": 5,
10821084
"parameter_ranges": SAMPLE_PARAM_RANGES,
1085+
"random_seed": 0,
10831086
},
10841087
training_config={
10851088
"static_hyperparameters": STATIC_HPs,
@@ -1170,6 +1173,7 @@ def assert_create_tuning_job_request(**kwrags):
11701173
sagemaker_session.tune(
11711174
job_name="dummy-tuning-1",
11721175
strategy="Bayesian",
1176+
random_seed=0,
11731177
objective_type="Maximize",
11741178
objective_metric_name="val-score",
11751179
max_jobs=100,
@@ -1246,6 +1250,7 @@ def assert_create_tuning_job_request(**kwrags):
12461250
sagemaker_session.tune(
12471251
job_name="dummy-tuning-1",
12481252
strategy="Bayesian",
1253+
random_seed=0,
12491254
objective_type="Maximize",
12501255
objective_metric_name="val-score",
12511256
max_jobs=100,
@@ -1289,6 +1294,7 @@ def assert_create_tuning_job_request(**kwargs):
12891294
sagemaker_session.tune(
12901295
job_name="dummy-tuning-1",
12911296
strategy="Bayesian",
1297+
random_seed=0,
12921298
objective_type="Maximize",
12931299
objective_metric_name="val-score",
12941300
max_jobs=100,

tests/unit/test_tuner.py

+1
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,7 @@ def test_attach_tuning_job_with_estimator_from_hyperparameters(sagemaker_session
545545
assert tuner.strategy == "Bayesian"
546546
assert tuner.objective_type == "Minimize"
547547
assert tuner.early_stopping_type == "Off"
548+
assert tuner.random_seed == 0
548549

549550
assert isinstance(tuner.estimator, PCA)
550551
assert tuner.estimator.role == ROLE

tests/unit/tuner_test_utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
],
113113
},
114114
"TrainingJobEarlyStoppingType": "Off",
115+
"RandomSeed": 0,
115116
},
116117
"HyperParameterTuningJobName": JOB_NAME,
117118
"TrainingJobDefinition": {

0 commit comments

Comments
 (0)