Add support for Hyperparameter Tuning Early Stopping (#550)

laurenyu · web-flow · commit 4bac1856926e · 2018-12-12T18:48:27.000-08:00
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,8 +2,8 @@
 CHANGELOG
 =========
 
-1.16.2.dev
-==========
+1.16.2
+======
 
 * enhancement: Check for S3 paths being passed as entry point
 * feature: Add support for AugmentedManifestFile and ShuffleConfig
@@ -15,6 +15,7 @@ CHANGELOG
 * bug-fix: Update PyYAML version to avoid conflicts with docker-compose
 * doc-fix: Correct the numbered list in the table of contents
 * doc-fix: Add Airflow API documentation
+* feature: HyperparameterTuner: add Early Stopping support
 
 1.16.1.post1
 ============
diff --git a/README.rst b/README.rst
@@ -614,6 +614,22 @@ A hyperparameter range can be one of three types: continuous, integer, or catego
 The SageMaker Python SDK provides corresponding classes for defining these different types.
 You can define up to 20 hyperparameters to search over, but each value of a categorical hyperparameter range counts against that limit.
 
+By default, training job early stopping is turned off. To enable early stopping for the tuning job, you need to set the ``early_stopping_type`` parameter to ``Auto``:
+
+.. code:: python
+
+    # Enable early stopping
+    my_tuner = HyperparameterTuner(estimator=my_estimator,  # previously-configured Estimator object
+                                   objective_metric_name='validation-accuracy',
+                                   hyperparameter_ranges={'learning-rate': ContinuousParameter(0.05, 0.06)},
+                                   metric_definitions=[{'Name': 'validation-accuracy', 'Regex': 'validation-accuracy=(\d\.\d+)'}],
+                                   max_jobs=100,
+                                   max_parallel_jobs=10,
+                                   early_stopping_type='Auto')
+
+When early stopping is turned on, Amazon SageMaker will automatically stop a training job if it appears unlikely to produce a model of better quality than other jobs.
+If not using built-in Amazon SageMaker algorithms, note that, for early stopping to be effective, the objective metric should be emitted at epoch level.
+
 If you are using an Amazon SageMaker built-in algorithm, you don't need to pass in anything for ``metric_definitions``.
 In addition, the ``fit()`` call uses a list of ``RecordSet`` objects instead of a dictionary:
 
diff --git a/doc/conf.py b/doc/conf.py
@@ -32,7 +32,7 @@ def __getattr__(cls, name):
                 'numpy', 'scipy', 'scipy.sparse']
 sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 
-version = '1.16.1.post1'
+version = '1.16.2'
 project = u'sagemaker'
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
diff --git a/setup.py b/setup.py
@@ -33,7 +33,7 @@ def read(fname):
 
 
 # Declare minimal set for installation
-required_packages = ['boto3>=1.9.55', 'numpy>=1.9.0', 'protobuf>=3.1', 'scipy>=0.19.0',
+required_packages = ['boto3>=1.9.64', 'numpy>=1.9.0', 'protobuf>=3.1', 'scipy>=0.19.0',
                      'urllib3>=1.21', 'PyYAML>=3.2, <4', 'protobuf3-to-dict>=0.1.5',
                      'docker-compose>=1.23.0', 'requests>=2.20.0, <2.21']
 
diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py
@@ -39,4 +39,4 @@
 from sagemaker.session import s3_input  # noqa: F401
 from sagemaker.session import get_execution_role  # noqa: F401
 
-__version__ = '1.16.1.post1'
+__version__ = '1.16.2'
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -350,7 +350,8 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
              max_jobs, max_parallel_jobs, parameter_ranges,
              static_hyperparameters, input_mode, metric_definitions,
              role, input_config, output_config, resource_config, stop_condition, tags,
-             warm_start_config, enable_network_isolation=False, image=None, algorithm_arn=None):
+             warm_start_config, enable_network_isolation=False, image=None, algorithm_arn=None,
+             early_stopping_type='Off'):
         """Create an Amazon SageMaker hyperparameter tuning job
 
         Args:
@@ -396,6 +397,9 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
                 https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
             warm_start_config (dict): Configuration defining the type of warm start and
                 other required configurations.
+            early_stopping_type (str): Specifies whether early stopping is enabled for the job.
+                Can be either 'Auto' or 'Off'. If set to 'Off', early stopping will not be attempted.
+                If set to 'Auto', early stopping of some training jobs may happen, but is not guaranteed to.
         """
         tune_request = {
             'HyperParameterTuningJobName': job_name,
@@ -410,6 +414,7 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
                     'MaxParallelTrainingJobs': max_parallel_jobs,
                 },
                 'ParameterRanges': parameter_ranges,
+                'TrainingJobEarlyStoppingType': early_stopping_type,
             },
             'TrainingJobDefinition': {
                 'StaticHyperParameters': static_hyperparameters,
diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py
@@ -165,7 +165,7 @@ class HyperparameterTuner(object):
 
     def __init__(self, estimator, objective_metric_name, hyperparameter_ranges, metric_definitions=None,
                  strategy='Bayesian', objective_type='Maximize', max_jobs=1, max_parallel_jobs=1,
-                 tags=None, base_tuning_job_name=None, warm_start_config=None):
+                 tags=None, base_tuning_job_name=None, warm_start_config=None, early_stopping_type='Off'):
         """Initialize a ``HyperparameterTuner``. It takes an estimator to obtain configuration information
         for training jobs that are created as the result of a hyperparameter tuning job.
 
@@ -194,6 +194,9 @@ def __init__(self, estimator, objective_metric_name, hyperparameter_ranges, metr
                 a default job name is generated, based on the training image name and current timestamp.
             warm_start_config (sagemaker.tuner.WarmStartConfig): A ``WarmStartConfig`` object that has been initialized
                 with the configuration defining the nature of warm start tuning job.
+            early_stopping_type (str): Specifies whether early stopping is enabled for the job.
+                Can be either 'Auto' or 'Off' (default: 'Off'). If set to 'Off', early stopping will not be attempted.
+                If set to 'Auto', early stopping of some training jobs may happen, but is not guaranteed to.
         """
         self._hyperparameter_ranges = hyperparameter_ranges
         if self._hyperparameter_ranges is None or len(self._hyperparameter_ranges) == 0:
@@ -214,6 +217,7 @@ def __init__(self, estimator, objective_metric_name, hyperparameter_ranges, metr
         self._current_job_name = None
         self.latest_tuning_job = None
         self.warm_start_config = warm_start_config
+        self.early_stopping_type = early_stopping_type
 
     def _prepare_for_training(self, job_name=None, include_cls_metadata=True):
         if job_name is not None:
@@ -445,7 +449,8 @@ def _prepare_init_params_from_job_description(cls, job_details):
             'strategy': tuning_config['Strategy'],
             'max_jobs': tuning_config['ResourceLimits']['MaxNumberOfTrainingJobs'],
             'max_parallel_jobs': tuning_config['ResourceLimits']['MaxParallelTrainingJobs'],
-            'warm_start_config': WarmStartConfig.from_job_desc(job_details.get('WarmStartConfig', None))
+            'warm_start_config': WarmStartConfig.from_job_desc(job_details.get('WarmStartConfig', None)),
+            'early_stopping_type': tuning_config['TrainingJobEarlyStoppingType']
         }
 
     @classmethod
@@ -625,6 +630,7 @@ def start_new(cls, tuner, inputs):
         tuner_args['metric_definitions'] = tuner.metric_definitions
         tuner_args['tags'] = tuner.tags
         tuner_args['warm_start_config'] = warm_start_config_req
+        tuner_args['early_stopping_type'] = tuner.early_stopping_type
 
         del tuner_args['vpc_config']
         if isinstance(tuner.estimator, sagemaker.algorithm.AlgorithmEstimator):
diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py
@@ -83,15 +83,16 @@ def hyperparameter_ranges():
 
 def _tune_and_deploy(kmeans_estimator, kmeans_train_set, sagemaker_session,
                      hyperparameter_ranges=None, job_name=None,
-                     warm_start_config=None):
+                     warm_start_config=None, early_stopping_type='Off'):
     tuner = _tune(kmeans_estimator, kmeans_train_set,
                   hyperparameter_ranges=hyperparameter_ranges, warm_start_config=warm_start_config,
-                  job_name=job_name)
-    _deploy(kmeans_train_set, sagemaker_session, tuner)
+                  job_name=job_name, early_stopping_type=early_stopping_type)
+    _deploy(kmeans_train_set, sagemaker_session, tuner, early_stopping_type)
 
 
-def _deploy(kmeans_train_set, sagemaker_session, tuner):
+def _deploy(kmeans_train_set, sagemaker_session, tuner, early_stopping_type):
     best_training_job = tuner.best_training_job()
+    assert tuner.early_stopping_type == early_stopping_type
     with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
         predictor = tuner.deploy(1, 'ml.c4.xlarge')
 
@@ -105,7 +106,7 @@ def _deploy(kmeans_train_set, sagemaker_session, tuner):
 
 def _tune(kmeans_estimator, kmeans_train_set, tuner=None,
           hyperparameter_ranges=None, job_name=None, warm_start_config=None,
-          wait_till_terminal=True, max_jobs=2, max_parallel_jobs=2):
+          wait_till_terminal=True, max_jobs=2, max_parallel_jobs=2, early_stopping_type='Off'):
     with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
 
         if not tuner:
@@ -115,7 +116,8 @@ def _tune(kmeans_estimator, kmeans_train_set, tuner=None,
                                         objective_type='Minimize',
                                         max_jobs=max_jobs,
                                         max_parallel_jobs=max_parallel_jobs,
-                                        warm_start_config=warm_start_config)
+                                        warm_start_config=warm_start_config,
+                                        early_stopping_type=early_stopping_type)
 
         records = kmeans_estimator.record_set(kmeans_train_set[0][:100])
         test_record_set = kmeans_estimator.record_set(kmeans_train_set[0][:100], channel='test')
@@ -332,16 +334,23 @@ def test_tuning_lda(sagemaker_session):
         tuner = HyperparameterTuner(estimator=lda, objective_metric_name=objective_metric_name,
                                     hyperparameter_ranges=hyperparameter_ranges,
                                     objective_type='Maximize', max_jobs=2,
-                                    max_parallel_jobs=2)
+                                    max_parallel_jobs=2,
+                                    early_stopping_type='Auto')
 
         tuning_job_name = unique_name_from_base('test-lda', max_length=32)
         tuner.fit([record_set, test_record_set], mini_batch_size=1, job_name=tuning_job_name)
 
-        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)
+        latest_tuning_job_name = tuner.latest_tuning_job.name
+
+        print('Started hyperparameter tuning job with name:' + latest_tuning_job_name)
 
         time.sleep(15)
         tuner.wait()
 
+    desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client \
+        .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=latest_tuning_job_name)
+    assert desc['HyperParameterTuningJobConfig']['TrainingJobEarlyStoppingType'] == 'Auto'
+
     best_training_job = tuner.best_training_job()
     with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
         predictor = tuner.deploy(1, 'ml.c4.xlarge')
@@ -555,7 +564,8 @@ def test_attach_tuning_pytorch(sagemaker_session):
 
         tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges,
                                     metric_definitions,
-                                    max_jobs=2, max_parallel_jobs=2)
+                                    max_jobs=2, max_parallel_jobs=2,
+                                    early_stopping_type='Auto')
 
         training_data = estimator.sagemaker_session.upload_data(
             path=os.path.join(mnist_dir, 'training'),
@@ -571,6 +581,8 @@ def test_attach_tuning_pytorch(sagemaker_session):
 
     attached_tuner = HyperparameterTuner.attach(tuning_job_name,
                                                 sagemaker_session=sagemaker_session)
+    assert attached_tuner.early_stopping_type == 'Auto'
+
     best_training_job = tuner.best_training_job()
     with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
         predictor = attached_tuner.deploy(1, 'ml.c4.xlarge')
diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py
@@ -301,6 +301,7 @@ def test_train_pack_to_request(sagemaker_session):
             'MaxParallelTrainingJobs': 5,
         },
         'ParameterRanges': SAMPLE_PARAM_RANGES,
+        'TrainingJobEarlyStoppingType': 'Off'
     },
     'TrainingJobDefinition': {
         'StaticHyperParameters': STATIC_HPs,
diff --git a/tests/unit/test_tuner.py b/tests/unit/test_tuner.py
@@ -74,7 +74,8 @@
                     'MinValue': '10',
                 },
             ]
-        }
+        },
+        'TrainingJobEarlyStoppingType': 'Off'
     },
     'HyperParameterTuningJobName': JOB_NAME,
     'TrainingJobDefinition': {
@@ -241,9 +242,26 @@ def test_fit_pca(sagemaker_session, tuner):
     assert len(tune_kwargs['parameter_ranges']['IntegerParameterRanges']) == 1
     assert tune_kwargs['job_name'].startswith('pca')
     assert tune_kwargs['tags'] == tags
+    assert tune_kwargs['early_stopping_type'] == 'Off'
     assert tuner.estimator.mini_batch_size == 9999
 
 
+def test_fit_pca_with_early_stopping(sagemaker_session, tuner):
+    pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
+              base_job_name='pca', sagemaker_session=sagemaker_session)
+
+    tuner.estimator = pca
+    tuner.early_stopping_type = 'Auto'
+
+    records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1)
+    tuner.fit(records, mini_batch_size=9999)
+
+    _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0]
+
+    assert tune_kwargs['job_name'].startswith('pca')
+    assert tune_kwargs['early_stopping_type'] == 'Auto'
+
+
 def test_attach_tuning_job_with_estimator_from_hyperparameters(sagemaker_session):
     job_details = copy.deepcopy(TUNING_JOB_DETAILS)
     sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
@@ -257,6 +275,7 @@ def test_attach_tuning_job_with_estimator_from_hyperparameters(sagemaker_session
     assert tuner.metric_definitions == METRIC_DEFINTIONS
     assert tuner.strategy == 'Bayesian'
     assert tuner.objective_type == 'Minimize'
+    assert tuner.early_stopping_type == 'Off'
 
     assert isinstance(tuner.estimator, PCA)
     assert tuner.estimator.role == ROLE
@@ -270,6 +289,19 @@ def test_attach_tuning_job_with_estimator_from_hyperparameters(sagemaker_session
     assert tuner.estimator.hyperparameters()['num_components'] == '1'
 
 
+def test_attach_tuning_job_with_estimator_from_hyperparameters_with_early_stopping(sagemaker_session):
+    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
+    job_details['HyperParameterTuningJobConfig']['TrainingJobEarlyStoppingType'] = 'Auto'
+    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
+                                                                                  return_value=job_details)
+    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)
+
+    assert tuner.latest_tuning_job.name == JOB_NAME
+    assert tuner.early_stopping_type == 'Auto'
+
+    assert isinstance(tuner.estimator, PCA)
+
+
 def test_attach_tuning_job_with_job_details(sagemaker_session):
     job_details = copy.deepcopy(TUNING_JOB_DETAILS)
     HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session, job_details=job_details)