72
72
HYPERBAND_MIN_RESOURCE = "MinResource"
73
73
HYPERBAND_MAX_RESOURCE = "MaxResource"
74
74
GRID_SEARCH = "GridSearch"
75
+ MAX_NUMBER_OF_TRAINING_JOBS_NOT_IMPROVING = "MaxNumberOfTrainingJobsNotImproving"
76
+ BEST_OBJECTIVE_NOT_IMPROVING = "BestObjectiveNotImproving"
77
+ CONVERGENCE_DETECTED = "ConvergenceDetected"
78
+ COMPLETE_ON_CONVERGENCE_DETECTED = "CompleteOnConvergence"
79
+ TARGET_OBJECTIVE_METRIC_VALUE = "TargetObjectiveMetricValue"
80
+ MAX_RUNTIME_IN_SECONDS = "MaxRuntimeInSeconds"
75
81
76
82
logger = logging .getLogger (__name__ )
77
83
@@ -383,6 +389,109 @@ def to_input_req(self):
383
389
}
384
390
385
391
392
+ class TuningJobCompletionCriteriaConfig (object ):
393
+ """The configuration for a job completion criteria.
394
+ """
395
+
396
+ def __init__ (
397
+ self ,
398
+ max_number_of_training_jobs_not_improving : int = None ,
399
+ complete_on_convergence : bool = None ,
400
+ target_objective_metric_value : float = None
401
+ ):
402
+ """Creates a ``TuningJobCompletionCriteriaConfig`` with provided criteria.
403
+
404
+ Args:
405
+ max_number_of_training_jobs_not_improving (int): The number of training jobs that have failed
406
+ to improve model performance by 1% or greater over prior training jobs as evaluated
407
+ against an objective function.
408
+ complete_on_convergence (bool): A flag to top your hyperparameter tuning job if
409
+ automatic model tuning (AMT) has detected that your model has converged as evaluated against
410
+ your objective function.
411
+ target_objective_metric_value (float): The value of the objective metric.
412
+ """
413
+
414
+ self .max_number_of_training_jobs_not_improving = max_number_of_training_jobs_not_improving
415
+ self .complete_on_convergence = complete_on_convergence
416
+ self .target_objective_metric_value = target_objective_metric_value
417
+
418
+ @classmethod
419
+ def from_job_desc (cls , completion_criteria_config ):
420
+ """Creates a ``TuningJobCompletionCriteriaConfig`` from a tuning job completion criteria configuration response.
421
+
422
+ This is the completion criteria configuration from the DescribeTuningJob response.
423
+
424
+ Args:
425
+ completion_criteria_config (dict): The expected format of the
426
+ ``completion_criteria_config`` contains three first-class fields
427
+
428
+ Returns:
429
+ sagemaker.tuner.TuningJobCompletionCriteriaConfig: De-serialized instance of
430
+ TuningJobCompletionCriteriaConfig containing the completion criteria.
431
+ """
432
+ complete_on_convergence = None
433
+ if completion_criteria_config [CONVERGENCE_DETECTED ]:
434
+ if completion_criteria_config [CONVERGENCE_DETECTED ][COMPLETE_ON_CONVERGENCE_DETECTED ]:
435
+ complete_on_convergence = \
436
+ True if completion_criteria_config [CONVERGENCE_DETECTED ][COMPLETE_ON_CONVERGENCE_DETECTED ] == 'Enabled' \
437
+ else False
438
+
439
+ max_number_of_training_jobs_not_improving = None
440
+ if completion_criteria_config [BEST_OBJECTIVE_NOT_IMPROVING ]:
441
+ if completion_criteria_config [BEST_OBJECTIVE_NOT_IMPROVING ] \
442
+ [MAX_NUMBER_OF_TRAINING_JOBS_NOT_IMPROVING ]:
443
+ max_number_of_training_jobs_not_improving = \
444
+ completion_criteria_config [BEST_OBJECTIVE_NOT_IMPROVING ][
445
+ MAX_NUMBER_OF_TRAINING_JOBS_NOT_IMPROVING ]
446
+
447
+ target_objective_metric_value = None
448
+ if completion_criteria_config [TARGET_OBJECTIVE_METRIC_VALUE ]:
449
+ target_objective_metric_value = completion_criteria_config [TARGET_OBJECTIVE_METRIC_VALUE ]
450
+
451
+ return cls (
452
+ max_number_of_training_jobs_not_improving = max_number_of_training_jobs_not_improving ,
453
+ complete_on_convergence = complete_on_convergence ,
454
+ target_objective_metric_value = target_objective_metric_value
455
+ )
456
+
457
+ def to_input_req (self ):
458
+ """Converts the ``self`` instance to the desired input request format.
459
+
460
+ Examples:
461
+ >>> completion_criteria_config = TuningJobCompletionCriteriaConfig(
462
+ max_number_of_training_jobs_not_improving=5
463
+ complete_on_convergence = True,
464
+ target_objective_metric_value = 0.42
465
+ )
466
+ >>> completion_criteria_config.to_input_req()
467
+ {
468
+ "BestObjectiveNotImproving": {
469
+ "MaxNumberOfTrainingJobsNotImproving":5
470
+ },
471
+ "ConvergenceDetected": {
472
+ "CompleteOnConvergence": "Enabled",
473
+ },
474
+ "TargetObjectiveMetricValue": 0.42
475
+ }
476
+
477
+ Returns:
478
+ dict: Containing the completion criteria configurations.
479
+ """
480
+ completion_criteria_config = {}
481
+ if self .max_number_of_training_jobs_not_improving is not None :
482
+ completion_criteria_config [BEST_OBJECTIVE_NOT_IMPROVING ][MAX_NUMBER_OF_TRAINING_JOBS_NOT_IMPROVING ] = \
483
+ self .max_number_of_training_jobs_not_improving
484
+
485
+ if self .target_objective_metric_value is not None :
486
+ completion_criteria_config [TARGET_OBJECTIVE_METRIC_VALUE ] = self .target_objective_metric_value
487
+
488
+ if self .complete_on_convergence is not None :
489
+ completion_criteria_config [CONVERGENCE_DETECTED ][COMPLETE_ON_CONVERGENCE_DETECTED ] = \
490
+ 'Enabled' if self .complete_on_convergence else 'Disabled'
491
+
492
+ return completion_criteria_config
493
+
494
+
386
495
class HyperparameterTuner (object ):
387
496
"""Defines interaction with Amazon SageMaker hyperparameter tuning jobs.
388
497
@@ -407,10 +516,12 @@ def __init__(
407
516
objective_type : Union [str , PipelineVariable ] = "Maximize" ,
408
517
max_jobs : Union [int , PipelineVariable ] = None ,
409
518
max_parallel_jobs : Union [int , PipelineVariable ] = 1 ,
519
+ max_runtime_in_seconds : Optional [Union [int , PipelineVariable ]] = None ,
410
520
tags : Optional [List [Dict [str , Union [str , PipelineVariable ]]]] = None ,
411
521
base_tuning_job_name : Optional [str ] = None ,
412
522
warm_start_config : Optional [WarmStartConfig ] = None ,
413
523
strategy_config : Optional [StrategyConfig ] = None ,
524
+ completion_criteria_config : Optional [TuningJobCompletionCriteriaConfig ] = None ,
414
525
early_stopping_type : Union [str , PipelineVariable ] = "Off" ,
415
526
estimator_name : Optional [str ] = None ,
416
527
random_seed : Optional [int ] = None ,
@@ -450,6 +561,8 @@ def __init__(
450
561
strategy and the default value is 1 for all others strategies (default: None).
451
562
max_parallel_jobs (int or PipelineVariable): Maximum number of parallel training jobs to
452
563
start (default: 1).
564
+ max_runtime_in_seconds (int or PipelineVariable): The maximum time in seconds
565
+ that a training job launched by a hyperparameter tuning job can run.
453
566
tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags for
454
567
labeling the tuning job (default: None). For more, see
455
568
https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
@@ -463,6 +576,8 @@ def __init__(
463
576
configuration defining the nature of warm start tuning job.
464
577
strategy_config (sagemaker.tuner.StrategyConfig): A configuration for "Hyperparameter"
465
578
tuning job optimisation strategy.
579
+ completion_criteria_config (sagemaker.tuner.TuningJobCompletionCriteriaConfig): A configuration
580
+ for the completion criteria.
466
581
early_stopping_type (str or PipelineVariable): Specifies whether early stopping is
467
582
enabled for the job. Can be either 'Auto' or 'Off' (default:
468
583
'Off'). If set to 'Off', early stopping will not be attempted.
@@ -505,6 +620,7 @@ def __init__(
505
620
506
621
self .strategy = strategy
507
622
self .strategy_config = strategy_config
623
+ self .completion_criteria_config = completion_criteria_config
508
624
self .objective_type = objective_type
509
625
# For the GridSearch strategy we expect the max_jobs equals None and recalculate it later.
510
626
# For all other strategies for the backward compatibility we keep
@@ -513,6 +629,7 @@ def __init__(
513
629
if max_jobs is None and strategy is not GRID_SEARCH :
514
630
self .max_jobs = 1
515
631
self .max_parallel_jobs = max_parallel_jobs
632
+ self .max_runtime_in_seconds = max_runtime_in_seconds
516
633
517
634
self .tags = tags
518
635
self .base_tuning_job_name = base_tuning_job_name
@@ -1227,6 +1344,9 @@ def _prepare_init_params_from_job_description(cls, job_details):
1227
1344
"base_tuning_job_name" : base_from_name (job_details ["HyperParameterTuningJobName" ]),
1228
1345
}
1229
1346
1347
+ if MAX_RUNTIME_IN_SECONDS in tuning_config ["ResourceLimits" ]:
1348
+ params ["max_runtime_in_seconds" ] = tuning_config ["ResourceLimits" ][MAX_RUNTIME_IN_SECONDS ]
1349
+
1230
1350
if "RandomSeed" in tuning_config :
1231
1351
params ["random_seed" ] = tuning_config ["RandomSeed" ]
1232
1352
@@ -1484,9 +1604,11 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
1484
1604
hyperparameter_ranges = self ._hyperparameter_ranges ,
1485
1605
strategy = self .strategy ,
1486
1606
strategy_config = self .strategy_config ,
1607
+ completion_criteria_config = self .completion_criteria_config ,
1487
1608
objective_type = self .objective_type ,
1488
1609
max_jobs = self .max_jobs ,
1489
1610
max_parallel_jobs = self .max_parallel_jobs ,
1611
+ max_runtime_in_seconds = self .max_runtime_in_seconds ,
1490
1612
warm_start_config = WarmStartConfig (
1491
1613
warm_start_type = warm_start_type , parents = all_parents
1492
1614
),
@@ -1512,9 +1634,11 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
1512
1634
metric_definitions_dict = self .metric_definitions_dict ,
1513
1635
strategy = self .strategy ,
1514
1636
strategy_config = self .strategy_config ,
1637
+ completion_criteria_config = self .completion_criteria_config ,
1515
1638
objective_type = self .objective_type ,
1516
1639
max_jobs = self .max_jobs ,
1517
1640
max_parallel_jobs = self .max_parallel_jobs ,
1641
+ max_runtime_in_seconds = self .max_runtime_in_seconds ,
1518
1642
warm_start_config = WarmStartConfig (warm_start_type = warm_start_type , parents = all_parents ),
1519
1643
early_stopping_type = self .early_stopping_type ,
1520
1644
random_seed = self .random_seed ,
@@ -1530,9 +1654,11 @@ def create(
1530
1654
base_tuning_job_name = None ,
1531
1655
strategy = "Bayesian" ,
1532
1656
strategy_config = None ,
1657
+ completion_criteria_config = None ,
1533
1658
objective_type = "Maximize" ,
1534
1659
max_jobs = None ,
1535
1660
max_parallel_jobs = 1 ,
1661
+ max_runtime_in_seconds = None ,
1536
1662
tags = None ,
1537
1663
warm_start_config = None ,
1538
1664
early_stopping_type = "Off" ,
@@ -1581,13 +1707,16 @@ def create(
1581
1707
(default: 'Bayesian').
1582
1708
strategy_config (dict): The configuration for a training job launched by a
1583
1709
hyperparameter tuning job.
1710
+ completion_criteria_config (dict): The configuration for tuning job completion criteria.
1584
1711
objective_type (str): The type of the objective metric for evaluating training jobs.
1585
1712
This value can be either 'Minimize' or 'Maximize' (default: 'Maximize').
1586
1713
max_jobs (int): Maximum total number of training jobs to start for the hyperparameter
1587
1714
tuning job. The default value is unspecified fot the GridSearch strategy
1588
1715
and the value is 1 for all others strategies (default: None).
1589
1716
max_parallel_jobs (int): Maximum number of parallel training jobs to start
1590
1717
(default: 1).
1718
+ max_runtime_in_seconds (int): The maximum time in seconds
1719
+ that a training job launched by a hyperparameter tuning job can run.
1591
1720
tags (list[dict]): List of tags for labeling the tuning job (default: None). For more,
1592
1721
see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
1593
1722
warm_start_config (sagemaker.tuner.WarmStartConfig): A ``WarmStartConfig`` object that
@@ -1632,9 +1761,11 @@ def create(
1632
1761
metric_definitions = metric_definitions ,
1633
1762
strategy = strategy ,
1634
1763
strategy_config = strategy_config ,
1764
+ completion_criteria_config = completion_criteria_config ,
1635
1765
objective_type = objective_type ,
1636
1766
max_jobs = max_jobs ,
1637
1767
max_parallel_jobs = max_parallel_jobs ,
1768
+ max_runtime_in_seconds = max_runtime_in_seconds ,
1638
1769
tags = tags ,
1639
1770
warm_start_config = warm_start_config ,
1640
1771
early_stopping_type = early_stopping_type ,
@@ -1790,6 +1921,9 @@ def _get_tuner_args(cls, tuner, inputs):
1790
1921
"early_stopping_type" : tuner .early_stopping_type ,
1791
1922
}
1792
1923
1924
+ if tuner .max_runtime_in_seconds is not None :
1925
+ tuning_config ["max_runtime_in_seconds" ] = tuner .max_runtime_in_seconds
1926
+
1793
1927
if tuner .random_seed is not None :
1794
1928
tuning_config ["random_seed" ] = tuner .random_seed
1795
1929
@@ -1804,6 +1938,9 @@ def _get_tuner_args(cls, tuner, inputs):
1804
1938
if parameter_ranges is not None :
1805
1939
tuning_config ["parameter_ranges" ] = parameter_ranges
1806
1940
1941
+ if tuner .completion_criteria_config is not None :
1942
+ tuning_config ["completion_criteria_config" ] = tuner .completion_criteria_config .to_input_req ()
1943
+
1807
1944
tuner_args = {
1808
1945
"job_name" : tuner ._current_job_name ,
1809
1946
"tuning_config" : tuning_config ,
0 commit comments