feature: support the GridSearch strategy for hyperparameter optimization

Anton Repushko · Anton Repushko · commit be76bb145a66 · 2022-10-26T15:59:31.000+02:00
diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py
@@ -60,6 +60,7 @@
 HYPERPARAMETER_TUNING_JOB_NAME = "HyperParameterTuningJobName"
 PARENT_HYPERPARAMETER_TUNING_JOBS = "ParentHyperParameterTuningJobs"
 WARM_START_TYPE = "WarmStartType"
+GRID_SEARCH = "GridSearch"
 
 logger = logging.getLogger(__name__)
 
@@ -219,7 +220,7 @@ def __init__(
         metric_definitions: Optional[List[Dict[str, Union[str, PipelineVariable]]]] = None,
         strategy: Union[str, PipelineVariable] = "Bayesian",
         objective_type: Union[str, PipelineVariable] = "Maximize",
-        max_jobs: Union[int, PipelineVariable] = 1,
+        max_jobs: Union[int, PipelineVariable] = None,
         max_parallel_jobs: Union[int, PipelineVariable] = 1,
         tags: Optional[List[Dict[str, Union[str, PipelineVariable]]]] = None,
         base_tuning_job_name: Optional[str] = None,
@@ -258,7 +259,8 @@ def __init__(
                 evaluating training jobs. This value can be either 'Minimize' or
                 'Maximize' (default: 'Maximize').
             max_jobs (int or PipelineVariable): Maximum total number of training jobs to start for
-                the hyperparameter tuning job (default: 1).
+                the hyperparameter tuning job. The default value is unspecified fot the GridSearch strategy 
+                and the default value is 1 for all others strategies (default: None).
             max_parallel_jobs (int or PipelineVariable): Maximum number of parallel training jobs to
                 start (default: 1).
             tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags for
@@ -311,7 +313,11 @@ def __init__(
 
         self.strategy = strategy
         self.objective_type = objective_type
+        # For the GridSearch strategy we expect the max_jobs equals None and recalculate it later.
+        # For all other strategies for the backward compatibility we keep the default value as 1 (previous default value).
         self.max_jobs = max_jobs
+        if max_jobs is None and strategy is not GRID_SEARCH:
+            self.max_jobs = 1
         self.max_parallel_jobs = max_parallel_jobs
 
         self.tags = tags
@@ -1301,7 +1307,7 @@ def create(
         base_tuning_job_name=None,
         strategy="Bayesian",
         objective_type="Maximize",
-        max_jobs=1,
+        max_jobs=None,
         max_parallel_jobs=1,
         tags=None,
         warm_start_config=None,
@@ -1351,7 +1357,8 @@ def create(
             objective_type (str): The type of the objective metric for evaluating training jobs.
                 This value can be either 'Minimize' or 'Maximize' (default: 'Maximize').
             max_jobs (int): Maximum total number of training jobs to start for the hyperparameter
-                tuning job (default: 1).
+                tuning job. The default value is unspecified fot the GridSearch strategy 
+                and the value is 1 for all others strategies (default: None).
             max_parallel_jobs (int): Maximum number of parallel training jobs to start
                 (default: 1).
             tags (list[dict]): List of tags for labeling the tuning job (default: None). For more,
diff --git a/tests/unit/test_tuner.py b/tests/unit/test_tuner.py
@@ -1774,3 +1774,21 @@ def test_no_tags_prefixes_non_jumpstart_models(
     assert sagemaker_session.create_model.call_args_list[0][1]["tags"] == []
 
     assert sagemaker_session.endpoint_from_production_variants.call_args_list[0][1]["tags"] == []
+
+def test_create_tuner_with_grid_search_strategy():
+    tuner = HyperparameterTuner.create(
+        base_tuning_job_name=BASE_JOB_NAME,
+        estimator_dict={ESTIMATOR_NAME: ESTIMATOR},
+        objective_metric_name_dict={ESTIMATOR_NAME: OBJECTIVE_METRIC_NAME},
+        hyperparameter_ranges_dict={ESTIMATOR_NAME: HYPERPARAMETER_RANGES},
+        metric_definitions_dict={ESTIMATOR_NAME: METRIC_DEFINITIONS},
+        strategy="GridSearch",
+        objective_type="Minimize",
+        max_parallel_jobs=1,
+        tags=TAGS,
+        warm_start_config=WARM_START_CONFIG,
+        early_stopping_type="Auto",
+    )
+
+    assert tuner is not None
+    assert tuner.max_jobs is None