Skip to content

feature: add RandomSeed to support reproducible HPO #3519

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/sagemaker/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -2146,6 +2146,7 @@ def tune( # noqa: C901
use_spot_instances=False,
checkpoint_s3_uri=None,
checkpoint_local_path=None,
random_seed=None,
):
"""Create an Amazon SageMaker hyperparameter tuning job.

Expand Down Expand Up @@ -2226,6 +2227,9 @@ def tune( # noqa: C901
started. If the path is unset then SageMaker assumes the
checkpoints will be provided under `/opt/ml/checkpoints/`.
(default: ``None``).
random_seed (int): An initial value used to initialize a pseudo-random number generator.
Setting a random seed will make the hyperparameter tuning search strategies to
produce more consistent configurations for the same tuning job. (default: ``None``).
"""

tune_request = {
Expand All @@ -2238,6 +2242,7 @@ def tune( # noqa: C901
objective_metric_name=objective_metric_name,
parameter_ranges=parameter_ranges,
early_stopping_type=early_stopping_type,
random_seed=random_seed,
strategy_config=strategy_config,
),
"TrainingJobDefinition": self._map_training_config(
Expand Down Expand Up @@ -2394,6 +2399,7 @@ def _map_tuning_config(
objective_type=None,
objective_metric_name=None,
parameter_ranges=None,
random_seed=None,
strategy_config=None,
):
"""Construct tuning job configuration dictionary.
Expand All @@ -2412,6 +2418,9 @@ def _map_tuning_config(
objective_metric_name (str): Name of the metric for evaluating training jobs.
parameter_ranges (dict): Dictionary of parameter ranges. These parameter ranges can
be one of three types: Continuous, Integer, or Categorical.
random_seed (int): An initial value used to initialize a pseudo-random number generator.
Setting a random seed will make the hyperparameter tuning search strategies to
produce more consistent configurations for the same tuning job.
strategy_config (dict): A configuration for the hyperparameter tuning job optimisation
strategy.

Expand All @@ -2430,6 +2439,9 @@ def _map_tuning_config(
"TrainingJobEarlyStoppingType": early_stopping_type,
}

if random_seed is not None:
tuning_config["RandomSeed"] = random_seed

tuning_objective = cls._map_tuning_objective(objective_type, objective_metric_name)
if tuning_objective is not None:
tuning_config["HyperParameterTuningJobObjective"] = tuning_objective
Expand Down
18 changes: 18 additions & 0 deletions src/sagemaker/tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def __init__(
strategy_config: Optional[StrategyConfig] = None,
early_stopping_type: Union[str, PipelineVariable] = "Off",
estimator_name: Optional[str] = None,
random_seed: Optional[int] = None,
):
"""Creates a ``HyperparameterTuner`` instance.

Expand Down Expand Up @@ -470,6 +471,9 @@ def __init__(
estimator_name (str): A unique name to identify an estimator within the
hyperparameter tuning job, when more than one estimator is used with
the same tuning job (default: None).
random_seed (int): An initial value used to initialize a pseudo-random number generator.
Setting a random seed will make the hyperparameter tuning search strategies to
produce more consistent configurations for the same tuning job.
"""
if hyperparameter_ranges is None or len(hyperparameter_ranges) == 0:
raise ValueError("Need to specify hyperparameter ranges")
Expand Down Expand Up @@ -516,6 +520,7 @@ def __init__(
self.latest_tuning_job = None
self.warm_start_config = warm_start_config
self.early_stopping_type = early_stopping_type
self.random_seed = random_seed

def _prepare_for_tuning(self, job_name=None, include_cls_metadata=False):
"""Prepare the tuner instance for tuning (fit)."""
Expand Down Expand Up @@ -1222,6 +1227,9 @@ def _prepare_init_params_from_job_description(cls, job_details):
"base_tuning_job_name": base_from_name(job_details["HyperParameterTuningJobName"]),
}

if "RandomSeed" in tuning_config:
params["random_seed"] = tuning_config["RandomSeed"]

if "HyperParameterTuningJobObjective" in tuning_config:
params["objective_metric_name"] = tuning_config["HyperParameterTuningJobObjective"][
"MetricName"
Expand Down Expand Up @@ -1483,6 +1491,7 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
warm_start_type=warm_start_type, parents=all_parents
),
early_stopping_type=self.early_stopping_type,
random_seed=self.random_seed,
)

if len(self.estimator_dict) > 1:
Expand All @@ -1508,6 +1517,7 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
max_parallel_jobs=self.max_parallel_jobs,
warm_start_config=WarmStartConfig(warm_start_type=warm_start_type, parents=all_parents),
early_stopping_type=self.early_stopping_type,
random_seed=self.random_seed,
)

@classmethod
Expand All @@ -1526,6 +1536,7 @@ def create(
tags=None,
warm_start_config=None,
early_stopping_type="Off",
random_seed=None,
):
"""Factory method to create a ``HyperparameterTuner`` instance.

Expand Down Expand Up @@ -1586,6 +1597,9 @@ def create(
Can be either 'Auto' or 'Off' (default: 'Off'). If set to 'Off', early stopping
will not be attempted. If set to 'Auto', early stopping of some training jobs may
happen, but is not guaranteed to.
random_seed (int): An initial value used to initialize a pseudo-random number generator.
Setting a random seed will make the hyperparameter tuning search strategies to
produce more consistent configurations for the same tuning job.

Returns:
sagemaker.tuner.HyperparameterTuner: a new ``HyperparameterTuner`` object that can
Expand Down Expand Up @@ -1624,6 +1638,7 @@ def create(
tags=tags,
warm_start_config=warm_start_config,
early_stopping_type=early_stopping_type,
random_seed=random_seed,
)

for estimator_name in estimator_names[1:]:
Expand Down Expand Up @@ -1775,6 +1790,9 @@ def _get_tuner_args(cls, tuner, inputs):
"early_stopping_type": tuner.early_stopping_type,
}

if tuner.random_seed is not None:
tuning_config["random_seed"] = tuner.random_seed

if tuner.strategy_config is not None:
tuning_config["strategy_config"] = tuner.strategy_config.to_input_req()

Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,7 @@ def test_train_pack_to_request(sagemaker_session):
"ResourceLimits": {"MaxNumberOfTrainingJobs": 100, "MaxParallelTrainingJobs": 5},
"ParameterRanges": SAMPLE_PARAM_RANGES,
"TrainingJobEarlyStoppingType": "Off",
"RandomSeed": 0,
},
"TrainingJobDefinition": {
"StaticHyperParameters": STATIC_HPs,
Expand Down Expand Up @@ -989,6 +990,7 @@ def assert_create_tuning_job_request(**kwrags):
sagemaker_session.tune(
job_name="dummy-tuning-1",
strategy="Bayesian",
random_seed=0,
objective_type="Maximize",
objective_metric_name="val-score",
max_jobs=100,
Expand Down Expand Up @@ -1080,6 +1082,7 @@ def assert_create_tuning_job_request(**kwrags):
"max_jobs": 100,
"max_parallel_jobs": 5,
"parameter_ranges": SAMPLE_PARAM_RANGES,
"random_seed": 0,
},
training_config={
"static_hyperparameters": STATIC_HPs,
Expand Down Expand Up @@ -1170,6 +1173,7 @@ def assert_create_tuning_job_request(**kwrags):
sagemaker_session.tune(
job_name="dummy-tuning-1",
strategy="Bayesian",
random_seed=0,
objective_type="Maximize",
objective_metric_name="val-score",
max_jobs=100,
Expand Down Expand Up @@ -1246,6 +1250,7 @@ def assert_create_tuning_job_request(**kwrags):
sagemaker_session.tune(
job_name="dummy-tuning-1",
strategy="Bayesian",
random_seed=0,
objective_type="Maximize",
objective_metric_name="val-score",
max_jobs=100,
Expand Down Expand Up @@ -1289,6 +1294,7 @@ def assert_create_tuning_job_request(**kwargs):
sagemaker_session.tune(
job_name="dummy-tuning-1",
strategy="Bayesian",
random_seed=0,
objective_type="Maximize",
objective_metric_name="val-score",
max_jobs=100,
Expand Down
1 change: 1 addition & 0 deletions tests/unit/test_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ def test_attach_tuning_job_with_estimator_from_hyperparameters(sagemaker_session
assert tuner.strategy == "Bayesian"
assert tuner.objective_type == "Minimize"
assert tuner.early_stopping_type == "Off"
assert tuner.random_seed == 0

assert isinstance(tuner.estimator, PCA)
assert tuner.estimator.role == ROLE
Expand Down
1 change: 1 addition & 0 deletions tests/unit/tuner_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
],
},
"TrainingJobEarlyStoppingType": "Off",
"RandomSeed": 0,
},
"HyperParameterTuningJobName": JOB_NAME,
"TrainingJobDefinition": {
Expand Down