12
12
# language governing permissions and limitations under the License.
13
13
from __future__ import absolute_import
14
14
15
+ import importlib
15
16
import inspect
16
17
import json
17
18
18
19
from sagemaker .analytics import HyperparameterTuningJobAnalytics
19
20
from sagemaker .estimator import Framework
20
21
from sagemaker .job import _Job
22
+ from sagemaker .session import Session
21
23
from sagemaker .utils import base_name_from_image , name_from_base
22
24
25
+ # TODO: probably move these somewhere to Amazon Estimator land after
26
+ # the circular dependency issue is resolved
27
+ AMAZON_ESTIMATOR_MODULE = 'sagemaker'
28
+ AMAZON_ESTIMATOR_CLS_NAMES = {
29
+ 'factorization-machines' : 'FactorizationMachines' ,
30
+ 'kmeans' : 'KMeans' ,
31
+ 'lda' : 'LDA' ,
32
+ 'linear-learner' : 'LinearLearner' ,
33
+ 'ntm' : 'NTM' ,
34
+ 'pca' : 'PCA' ,
35
+ 'randomcutforest' : 'RandomCutForest' ,
36
+ }
37
+
23
38
24
39
class _ParameterRange (object ):
25
40
__all_types__ = ['Continuous' , 'Categorical' , 'Integer' ]
@@ -66,8 +81,11 @@ def __init__(self, min_value, max_value):
66
81
67
82
68
83
class HyperparameterTuner (object ):
69
- SAGEMAKER_ESTIMATOR_CLASS_NAME = 'sagemaker_estimator_class_name'
70
84
SAGEMAKER_ESTIMATOR_MODULE = 'sagemaker_estimator_module'
85
+ SAGEMAKER_ESTIMATOR_CLASS_NAME = 'sagemaker_estimator_class_name'
86
+
87
+ DEFAULT_ESTIMATOR_MODULE = 'sagemaker.estimator'
88
+ DEFAULT_ESTIMATOR_CLS_NAME = 'Estimator'
71
89
72
90
def __init__ (self , estimator , objective_metric_name , hyperparameter_ranges , metric_definitions , strategy = 'Bayesian' ,
73
91
objective_type = 'Maximize' , max_jobs = 1 , max_parallel_jobs = 1 , base_tuning_job_name = None ):
@@ -100,8 +118,8 @@ def prepare_for_training(self):
100
118
from sagemaker .amazon .amazon_estimator import AmazonAlgorithmEstimatorBase
101
119
102
120
if not isinstance (self .estimator , AmazonAlgorithmEstimatorBase ):
103
- self .static_hyperparameters [self .SAGEMAKER_ESTIMATOR_CLASS_NAME ] = self .estimator .__class__ .__name__
104
- self .static_hyperparameters [self .SAGEMAKER_ESTIMATOR_MODULE ] = self .estimator .__module__
121
+ self .static_hyperparameters [self .SAGEMAKER_ESTIMATOR_CLASS_NAME ] = json . dumps ( self .estimator .__class__ .__name__ )
122
+ self .static_hyperparameters [self .SAGEMAKER_ESTIMATOR_MODULE ] = json . dumps ( self .estimator .__module__ )
105
123
106
124
def fit (self , inputs , job_name = None , ** kwargs ):
107
125
"""Start a hyperparameter tuning job.
@@ -124,6 +142,24 @@ def fit(self, inputs, job_name=None, **kwargs):
124
142
self .prepare_for_training ()
125
143
self .latest_tuning_job = _TuningJob .start_new (self , inputs )
126
144
145
+ @classmethod
146
+ def attach (cls , tuning_job_name , sagemaker_session = None , job_details = None , estimator_cls = None ):
147
+ sagemaker_session = sagemaker_session or Session ()
148
+
149
+ if job_details is None :
150
+ job_details = sagemaker_session .sagemaker_client \
151
+ .describe_hyper_parameter_tuning_job (HyperParameterTuningJobName = tuning_job_name )
152
+
153
+ estimator_cls = cls ._prepare_estimator_cls (estimator_cls , job_details ['TrainingJobDefinition' ])
154
+ estimator = cls ._prepare_estimator_from_job_description (estimator_cls , job_details ['TrainingJobDefinition' ],
155
+ sagemaker_session )
156
+ init_params = cls ._prepare_init_params_from_job_description (job_details )
157
+
158
+ tuner = cls (estimator = estimator , ** init_params )
159
+ tuner .latest_tuning_job = _TuningJob (sagemaker_session = sagemaker_session , tuning_job_name = tuning_job_name )
160
+
161
+ return tuner
162
+
127
163
def deploy (self , initial_instance_count , instance_type , endpoint_name = None , ** kwargs ):
128
164
"""Deploy the best trained or user specified model to an Amazon SageMaker endpoint and return a
129
165
``sagemaker.RealTimePredictor``
@@ -182,6 +218,75 @@ def _ensure_last_tuning_job(self):
182
218
if self .latest_tuning_job is None :
183
219
raise ValueError ('No tuning job available' )
184
220
221
+ @classmethod
222
+ def _prepare_estimator_cls (cls , estimator_cls , training_details ):
223
+ # Check for customer-specified estimator first
224
+ if estimator_cls is not None :
225
+ module , cls_name = estimator_cls .rsplit ('.' , 1 )
226
+ return getattr (importlib .import_module (module ), cls_name )
227
+
228
+ # Then check for estimator class in hyperparameters
229
+ hyperparameters = training_details ['StaticHyperParameters' ]
230
+ if cls .SAGEMAKER_ESTIMATOR_CLASS_NAME in hyperparameters and cls .SAGEMAKER_ESTIMATOR_MODULE in hyperparameters :
231
+ module = hyperparameters .get (cls .SAGEMAKER_ESTIMATOR_MODULE )
232
+ cls_name = hyperparameters .get (cls .SAGEMAKER_ESTIMATOR_CLASS_NAME )
233
+ return getattr (importlib .import_module (json .loads (module )), json .loads (cls_name ))
234
+
235
+ # Then try to derive the estimator from the image name for 1P algorithms
236
+ image_name = training_details ['AlgorithmSpecification' ]['TrainingImage' ]
237
+ algorithm = image_name [image_name .find ('/' )+ 1 :image_name .find (':' )]
238
+ if algorithm in AMAZON_ESTIMATOR_CLS_NAMES :
239
+ cls_name = AMAZON_ESTIMATOR_CLS_NAMES [algorithm ]
240
+ return getattr (importlib .import_module (AMAZON_ESTIMATOR_MODULE ), cls_name )
241
+
242
+ # Default to the BYO estimator
243
+ return getattr (importlib .import_module (cls .DEFAULT_ESTIMATOR_MODULE ), cls .DEFAULT_ESTIMATOR_CLS_NAME )
244
+
245
+ @classmethod
246
+ def _prepare_estimator_from_job_description (cls , estimator_cls , training_details , sagemaker_session ):
247
+ # Swap name for static hyperparameters to what an estimator would expect
248
+ training_details ['HyperParameters' ] = training_details ['StaticHyperParameters' ]
249
+ del training_details ['StaticHyperParameters' ]
250
+
251
+ # Remove hyperparameter reserved by SageMaker for tuning jobs
252
+ del training_details ['HyperParameters' ]['_tuning_objective_metric' ]
253
+
254
+ # Add items expected by the estimator (but aren't needed otherwise)
255
+ training_details ['TrainingJobName' ] = ''
256
+ if 'KmsKeyId' not in training_details ['OutputDataConfig' ]:
257
+ training_details ['OutputDataConfig' ]['KmsKeyId' ] = ''
258
+
259
+ estimator_init_params = estimator_cls ._prepare_init_params_from_job_description (training_details )
260
+ return estimator_cls (sagemaker_session = sagemaker_session , ** estimator_init_params )
261
+
262
+ @classmethod
263
+ def _prepare_init_params_from_job_description (cls , job_details ):
264
+ tuning_config = job_details ['HyperParameterTuningJobConfig' ]
265
+ return {
266
+ 'metric_definitions' : job_details ['TrainingJobDefinition' ]['AlgorithmSpecification' ]['MetricDefinitions' ],
267
+ 'objective_metric_name' : tuning_config ['HyperParameterTuningJobObjective' ]['MetricName' ],
268
+ 'objective_type' : tuning_config ['HyperParameterTuningJobObjective' ]['Type' ],
269
+ 'hyperparameter_ranges' : cls ._prepare_parameter_ranges (tuning_config ['ParameterRanges' ]),
270
+ 'strategy' : tuning_config ['Strategy' ],
271
+ 'max_jobs' : tuning_config ['ResourceLimits' ]['MaxNumberOfTrainingJobs' ],
272
+ 'max_parallel_jobs' : tuning_config ['ResourceLimits' ]['MaxParallelTrainingJobs' ],
273
+ }
274
+
275
+ @classmethod
276
+ def _prepare_parameter_ranges (cls , parameter_ranges ):
277
+ ranges = {}
278
+
279
+ for parameter in parameter_ranges ['CategoricalParameterRanges' ]:
280
+ ranges [parameter ['Name' ]] = CategoricalParameter (parameter ['Values' ])
281
+
282
+ for parameter in parameter_ranges ['ContinuousParameterRanges' ]:
283
+ ranges [parameter ['Name' ]] = ContinuousParameter (float (parameter ['MinValue' ]), float (parameter ['MaxValue' ]))
284
+
285
+ for parameter in parameter_ranges ['IntegerParameterRanges' ]:
286
+ ranges [parameter ['Name' ]] = IntegerParameter (int (parameter ['MinValue' ]), int (parameter ['MaxValue' ]))
287
+
288
+ return ranges
289
+
185
290
def hyperparameter_ranges (self ):
186
291
"""Return collections of ``ParameterRanges``
187
292
0 commit comments