72
72
HYPERBAND_MIN_RESOURCE = "MinResource"
73
73
HYPERBAND_MAX_RESOURCE = "MaxResource"
74
74
GRID_SEARCH = "GridSearch"
75
+ MAX_NUMBER_OF_TRAINING_JOBS_NOT_IMPROVING = "MaxNumberOfTrainingJobsNotImproving"
76
+ BEST_OBJECTIVE_NOT_IMPROVING = "BestObjectiveNotImproving"
77
+ CONVERGENCE_DETECTED = "ConvergenceDetected"
78
+ COMPLETE_ON_CONVERGENCE_DETECTED = "CompleteOnConvergence"
79
+ TARGET_OBJECTIVE_METRIC_VALUE = "TargetObjectiveMetricValue"
80
+ MAX_RUNTIME_IN_SECONDS = "MaxRuntimeInSeconds"
75
81
76
82
logger = logging .getLogger (__name__ )
77
83
@@ -383,6 +389,116 @@ def to_input_req(self):
383
389
}
384
390
385
391
392
+ class TuningJobCompletionCriteriaConfig (object ):
393
+ """The configuration for a job completion criteria."""
394
+
395
+ def __init__ (
396
+ self ,
397
+ max_number_of_training_jobs_not_improving : int = None ,
398
+ complete_on_convergence : bool = None ,
399
+ target_objective_metric_value : float = None ,
400
+ ):
401
+ """Creates a ``TuningJobCompletionCriteriaConfig`` with provided criteria.
402
+
403
+ Args:
404
+ max_number_of_training_jobs_not_improving (int): The number of training jobs that do not
405
+ improve the best objective after which tuning job will stop.
406
+ complete_on_convergence (bool): A flag to stop your hyperparameter tuning job if
407
+ automatic model tuning (AMT) has detected that your model has converged as evaluated
408
+ against your objective function.
409
+ target_objective_metric_value (float): The value of the objective metric.
410
+ """
411
+
412
+ self .max_number_of_training_jobs_not_improving = max_number_of_training_jobs_not_improving
413
+ self .complete_on_convergence = complete_on_convergence
414
+ self .target_objective_metric_value = target_objective_metric_value
415
+
416
+ @classmethod
417
+ def from_job_desc (cls , completion_criteria_config ):
418
+ """Creates a ``TuningJobCompletionCriteriaConfig`` from a configuration response.
419
+
420
+ This is the completion criteria configuration from the DescribeTuningJob response.
421
+ Args:
422
+ completion_criteria_config (dict): The expected format of the
423
+ ``completion_criteria_config`` contains three first-class fields
424
+
425
+ Returns:
426
+ sagemaker.tuner.TuningJobCompletionCriteriaConfig: De-serialized instance of
427
+ TuningJobCompletionCriteriaConfig containing the completion criteria.
428
+ """
429
+ complete_on_convergence = None
430
+ if CONVERGENCE_DETECTED in completion_criteria_config :
431
+ if completion_criteria_config [CONVERGENCE_DETECTED ][COMPLETE_ON_CONVERGENCE_DETECTED ]:
432
+ complete_on_convergence = bool (
433
+ completion_criteria_config [CONVERGENCE_DETECTED ][
434
+ COMPLETE_ON_CONVERGENCE_DETECTED
435
+ ]
436
+ == "Enabled"
437
+ )
438
+
439
+ max_number_of_training_jobs_not_improving = None
440
+ if BEST_OBJECTIVE_NOT_IMPROVING in completion_criteria_config :
441
+ if completion_criteria_config [BEST_OBJECTIVE_NOT_IMPROVING ][
442
+ MAX_NUMBER_OF_TRAINING_JOBS_NOT_IMPROVING
443
+ ]:
444
+ max_number_of_training_jobs_not_improving = completion_criteria_config [
445
+ BEST_OBJECTIVE_NOT_IMPROVING
446
+ ][MAX_NUMBER_OF_TRAINING_JOBS_NOT_IMPROVING ]
447
+
448
+ target_objective_metric_value = None
449
+ if TARGET_OBJECTIVE_METRIC_VALUE in completion_criteria_config :
450
+ target_objective_metric_value = completion_criteria_config [
451
+ TARGET_OBJECTIVE_METRIC_VALUE
452
+ ]
453
+
454
+ return cls (
455
+ max_number_of_training_jobs_not_improving = max_number_of_training_jobs_not_improving ,
456
+ complete_on_convergence = complete_on_convergence ,
457
+ target_objective_metric_value = target_objective_metric_value ,
458
+ )
459
+
460
+ def to_input_req (self ):
461
+ """Converts the ``self`` instance to the desired input request format.
462
+
463
+ Examples:
464
+ >>> completion_criteria_config = TuningJobCompletionCriteriaConfig(
465
+ max_number_of_training_jobs_not_improving=5
466
+ complete_on_convergence = True,
467
+ target_objective_metric_value = 0.42
468
+ )
469
+ >>> completion_criteria_config.to_input_req()
470
+ {
471
+ "BestObjectiveNotImproving": {
472
+ "MaxNumberOfTrainingJobsNotImproving":5
473
+ },
474
+ "ConvergenceDetected": {
475
+ "CompleteOnConvergence": "Enabled",
476
+ },
477
+ "TargetObjectiveMetricValue": 0.42
478
+ }
479
+
480
+ Returns:
481
+ dict: Containing the completion criteria configurations.
482
+ """
483
+ completion_criteria_config = {}
484
+ if self .max_number_of_training_jobs_not_improving is not None :
485
+ completion_criteria_config [BEST_OBJECTIVE_NOT_IMPROVING ][
486
+ MAX_NUMBER_OF_TRAINING_JOBS_NOT_IMPROVING
487
+ ] = self .max_number_of_training_jobs_not_improving
488
+
489
+ if self .target_objective_metric_value is not None :
490
+ completion_criteria_config [
491
+ TARGET_OBJECTIVE_METRIC_VALUE
492
+ ] = self .target_objective_metric_value
493
+
494
+ if self .complete_on_convergence is not None :
495
+ completion_criteria_config [CONVERGENCE_DETECTED ][COMPLETE_ON_CONVERGENCE_DETECTED ] = (
496
+ "Enabled" if self .complete_on_convergence else "Disabled"
497
+ )
498
+
499
+ return completion_criteria_config
500
+
501
+
386
502
class HyperparameterTuner (object ):
387
503
"""Defines interaction with Amazon SageMaker hyperparameter tuning jobs.
388
504
@@ -407,10 +523,12 @@ def __init__(
407
523
objective_type : Union [str , PipelineVariable ] = "Maximize" ,
408
524
max_jobs : Union [int , PipelineVariable ] = None ,
409
525
max_parallel_jobs : Union [int , PipelineVariable ] = 1 ,
526
+ max_runtime_in_seconds : Optional [Union [int , PipelineVariable ]] = None ,
410
527
tags : Optional [List [Dict [str , Union [str , PipelineVariable ]]]] = None ,
411
528
base_tuning_job_name : Optional [str ] = None ,
412
529
warm_start_config : Optional [WarmStartConfig ] = None ,
413
530
strategy_config : Optional [StrategyConfig ] = None ,
531
+ completion_criteria_config : Optional [TuningJobCompletionCriteriaConfig ] = None ,
414
532
early_stopping_type : Union [str , PipelineVariable ] = "Off" ,
415
533
estimator_name : Optional [str ] = None ,
416
534
random_seed : Optional [int ] = None ,
@@ -450,6 +568,8 @@ def __init__(
450
568
strategy and the default value is 1 for all others strategies (default: None).
451
569
max_parallel_jobs (int or PipelineVariable): Maximum number of parallel training jobs to
452
570
start (default: 1).
571
+ max_runtime_in_seconds (int or PipelineVariable): The maximum time in seconds
572
+ that a training job launched by a hyperparameter tuning job can run.
453
573
tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags for
454
574
labeling the tuning job (default: None). For more, see
455
575
https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
@@ -463,6 +583,8 @@ def __init__(
463
583
configuration defining the nature of warm start tuning job.
464
584
strategy_config (sagemaker.tuner.StrategyConfig): A configuration for "Hyperparameter"
465
585
tuning job optimisation strategy.
586
+ completion_criteria_config (sagemaker.tuner.TuningJobCompletionCriteriaConfig): A
587
+ configuration for the completion criteria.
466
588
early_stopping_type (str or PipelineVariable): Specifies whether early stopping is
467
589
enabled for the job. Can be either 'Auto' or 'Off' (default:
468
590
'Off'). If set to 'Off', early stopping will not be attempted.
@@ -505,6 +627,7 @@ def __init__(
505
627
506
628
self .strategy = strategy
507
629
self .strategy_config = strategy_config
630
+ self .completion_criteria_config = completion_criteria_config
508
631
self .objective_type = objective_type
509
632
# For the GridSearch strategy we expect the max_jobs equals None and recalculate it later.
510
633
# For all other strategies for the backward compatibility we keep
@@ -513,6 +636,7 @@ def __init__(
513
636
if max_jobs is None and strategy is not GRID_SEARCH :
514
637
self .max_jobs = 1
515
638
self .max_parallel_jobs = max_parallel_jobs
639
+ self .max_runtime_in_seconds = max_runtime_in_seconds
516
640
517
641
self .tags = tags
518
642
self .base_tuning_job_name = base_tuning_job_name
@@ -1227,6 +1351,16 @@ def _prepare_init_params_from_job_description(cls, job_details):
1227
1351
"base_tuning_job_name" : base_from_name (job_details ["HyperParameterTuningJobName" ]),
1228
1352
}
1229
1353
1354
+ if "TuningJobCompletionCriteria" in tuning_config :
1355
+ params ["completion_criteria_config" ] = TuningJobCompletionCriteriaConfig .from_job_desc (
1356
+ tuning_config ["TuningJobCompletionCriteria" ]
1357
+ )
1358
+
1359
+ if MAX_RUNTIME_IN_SECONDS in tuning_config ["ResourceLimits" ]:
1360
+ params ["max_runtime_in_seconds" ] = tuning_config ["ResourceLimits" ][
1361
+ MAX_RUNTIME_IN_SECONDS
1362
+ ]
1363
+
1230
1364
if "RandomSeed" in tuning_config :
1231
1365
params ["random_seed" ] = tuning_config ["RandomSeed" ]
1232
1366
@@ -1484,9 +1618,11 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
1484
1618
hyperparameter_ranges = self ._hyperparameter_ranges ,
1485
1619
strategy = self .strategy ,
1486
1620
strategy_config = self .strategy_config ,
1621
+ completion_criteria_config = self .completion_criteria_config ,
1487
1622
objective_type = self .objective_type ,
1488
1623
max_jobs = self .max_jobs ,
1489
1624
max_parallel_jobs = self .max_parallel_jobs ,
1625
+ max_runtime_in_seconds = self .max_runtime_in_seconds ,
1490
1626
warm_start_config = WarmStartConfig (
1491
1627
warm_start_type = warm_start_type , parents = all_parents
1492
1628
),
@@ -1512,9 +1648,11 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato
1512
1648
metric_definitions_dict = self .metric_definitions_dict ,
1513
1649
strategy = self .strategy ,
1514
1650
strategy_config = self .strategy_config ,
1651
+ completion_criteria_config = self .completion_criteria_config ,
1515
1652
objective_type = self .objective_type ,
1516
1653
max_jobs = self .max_jobs ,
1517
1654
max_parallel_jobs = self .max_parallel_jobs ,
1655
+ max_runtime_in_seconds = self .max_runtime_in_seconds ,
1518
1656
warm_start_config = WarmStartConfig (warm_start_type = warm_start_type , parents = all_parents ),
1519
1657
early_stopping_type = self .early_stopping_type ,
1520
1658
random_seed = self .random_seed ,
@@ -1530,9 +1668,11 @@ def create(
1530
1668
base_tuning_job_name = None ,
1531
1669
strategy = "Bayesian" ,
1532
1670
strategy_config = None ,
1671
+ completion_criteria_config = None ,
1533
1672
objective_type = "Maximize" ,
1534
1673
max_jobs = None ,
1535
1674
max_parallel_jobs = 1 ,
1675
+ max_runtime_in_seconds = None ,
1536
1676
tags = None ,
1537
1677
warm_start_config = None ,
1538
1678
early_stopping_type = "Off" ,
@@ -1581,13 +1721,16 @@ def create(
1581
1721
(default: 'Bayesian').
1582
1722
strategy_config (dict): The configuration for a training job launched by a
1583
1723
hyperparameter tuning job.
1724
+ completion_criteria_config (dict): The configuration for tuning job completion criteria.
1584
1725
objective_type (str): The type of the objective metric for evaluating training jobs.
1585
1726
This value can be either 'Minimize' or 'Maximize' (default: 'Maximize').
1586
1727
max_jobs (int): Maximum total number of training jobs to start for the hyperparameter
1587
1728
tuning job. The default value is unspecified fot the GridSearch strategy
1588
1729
and the value is 1 for all others strategies (default: None).
1589
1730
max_parallel_jobs (int): Maximum number of parallel training jobs to start
1590
1731
(default: 1).
1732
+ max_runtime_in_seconds (int): The maximum time in seconds
1733
+ that a training job launched by a hyperparameter tuning job can run.
1591
1734
tags (list[dict]): List of tags for labeling the tuning job (default: None). For more,
1592
1735
see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
1593
1736
warm_start_config (sagemaker.tuner.WarmStartConfig): A ``WarmStartConfig`` object that
@@ -1632,9 +1775,11 @@ def create(
1632
1775
metric_definitions = metric_definitions ,
1633
1776
strategy = strategy ,
1634
1777
strategy_config = strategy_config ,
1778
+ completion_criteria_config = completion_criteria_config ,
1635
1779
objective_type = objective_type ,
1636
1780
max_jobs = max_jobs ,
1637
1781
max_parallel_jobs = max_parallel_jobs ,
1782
+ max_runtime_in_seconds = max_runtime_in_seconds ,
1638
1783
tags = tags ,
1639
1784
warm_start_config = warm_start_config ,
1640
1785
early_stopping_type = early_stopping_type ,
@@ -1790,6 +1935,9 @@ def _get_tuner_args(cls, tuner, inputs):
1790
1935
"early_stopping_type" : tuner .early_stopping_type ,
1791
1936
}
1792
1937
1938
+ if tuner .max_runtime_in_seconds is not None :
1939
+ tuning_config ["max_runtime_in_seconds" ] = tuner .max_runtime_in_seconds
1940
+
1793
1941
if tuner .random_seed is not None :
1794
1942
tuning_config ["random_seed" ] = tuner .random_seed
1795
1943
@@ -1804,6 +1952,11 @@ def _get_tuner_args(cls, tuner, inputs):
1804
1952
if parameter_ranges is not None :
1805
1953
tuning_config ["parameter_ranges" ] = parameter_ranges
1806
1954
1955
+ if tuner .completion_criteria_config is not None :
1956
+ tuning_config [
1957
+ "completion_criteria_config"
1958
+ ] = tuner .completion_criteria_config .to_input_req ()
1959
+
1807
1960
tuner_args = {
1808
1961
"job_name" : tuner ._current_job_name ,
1809
1962
"tuning_config" : tuning_config ,
0 commit comments