Skip to content

Commit f9c460a

Browse files
Remove kmeans metric definitions (aws#44)
1 parent 56f737c commit f9c460a

File tree

4 files changed

+12
-38
lines changed

4 files changed

+12
-38
lines changed

src/sagemaker/amazon/kmeans.py

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -91,32 +91,7 @@ def __init__(self, role, train_instance_count, train_instance_type, k, init_meth
9191
the score shall be reported in terms of all requested metrics.
9292
**kwargs: base class keyword argument values.
9393
"""
94-
# TODO: shouldn't be defined here, delete this once HPO fixes validation
95-
metric_definitions = [
96-
{
97-
"Name": "test:msd",
98-
"Regex": "#quality_metric: host=\\S+, test msd <loss>=(\\S+)"
99-
},
100-
{
101-
"Name": "test:ssd",
102-
"Regex": "#quality_metric: host=\\S+, test ssd <loss>=(\\S+)"
103-
},
104-
{
105-
"Name": "train:msd",
106-
"Regex": "#quality_metric: host=\\S+, train msd <loss>=(\\S+)"
107-
},
108-
{
109-
"Name": "train:progress",
110-
"Regex": "#progress_metric: host=\\S+, completed (\\S+) %"
111-
},
112-
# updated below basing on current log format
113-
{
114-
"Name": "train:throughput",
115-
"Regex": "#throughput_metric: train throughput in records/second: (\\S+)"
116-
}
117-
]
118-
super(KMeans, self).__init__(role, train_instance_count, train_instance_type,
119-
metric_definitions=metric_definitions, **kwargs)
94+
super(KMeans, self).__init__(role, train_instance_count, train_instance_type, **kwargs)
12095
self.k = k
12196
self.init_method = init_method
12297
self.max_iterations = max_iterations

src/sagemaker/session.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
279279
* 'File' - Amazon SageMaker copies the training dataset from the S3 location to
280280
a directory in the Docker container.
281281
* 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe.
282-
metric_definitions (str):
282+
metric_definitions (list[dict]): Metrics definition with 'name' and 'regex' keys.
283283
role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs
284284
that create Amazon SageMaker endpoints use this role to access training data and model artifacts.
285285
You must grant sufficient permissions to this role.
@@ -312,7 +312,6 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
312312
'AlgorithmSpecification': {
313313
'TrainingImage': image,
314314
'TrainingInputMode': input_mode,
315-
'MetricDefinitions': metric_definitions,
316315
},
317316
'RoleArn': role,
318317
'InputDataConfig': input_config,

src/sagemaker/tuner.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ class HyperparameterTuner(object):
8989
DEFAULT_ESTIMATOR_MODULE = 'sagemaker.estimator'
9090
DEFAULT_ESTIMATOR_CLS_NAME = 'Estimator'
9191

92-
def __init__(self, estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, strategy='Bayesian',
93-
objective_type='Maximize', max_jobs=1, max_parallel_jobs=1, base_tuning_job_name=None):
92+
def __init__(self, estimator, objective_metric_name, hyperparameter_ranges, metric_definitions=None,
93+
strategy='Bayesian', objective_type='Maximize', max_jobs=1, max_parallel_jobs=1,
94+
base_tuning_job_name=None):
9495
self._hyperparameter_ranges = hyperparameter_ranges
9596
if self._hyperparameter_ranges is None or len(self._hyperparameter_ranges) == 0:
9697
raise ValueError('Need to specify hyperparameter ranges')
@@ -102,7 +103,6 @@ def __init__(self, estimator, objective_metric_name, hyperparameter_ranges, metr
102103

103104
self.strategy = strategy
104105
self.objective_type = objective_type
105-
106106
self.max_jobs = max_jobs
107107
self.max_parallel_jobs = max_parallel_jobs
108108

@@ -124,7 +124,8 @@ def prepare_for_training(self, job_name=None):
124124
# For attach() to know what estimator to use for non-1P algorithms
125125
# (1P algorithms don't accept extra hyperparameters)
126126
if not isinstance(self.estimator, AmazonAlgorithmEstimatorBase):
127-
self.static_hyperparameters[self.SAGEMAKER_ESTIMATOR_CLASS_NAME] = json.dumps(self.estimator.__class__.__name__)
127+
self.static_hyperparameters[self.SAGEMAKER_ESTIMATOR_CLASS_NAME] = json.dumps(
128+
self.estimator.__class__.__name__)
128129
self.static_hyperparameters[self.SAGEMAKER_ESTIMATOR_MODULE] = json.dumps(self.estimator.__module__)
129130

130131
def fit(self, inputs, job_name=None, **kwargs):
@@ -150,7 +151,7 @@ def attach(cls, tuning_job_name, sagemaker_session=None, job_details=None, estim
150151
sagemaker_session = sagemaker_session or Session()
151152

152153
if job_details is None:
153-
job_details = sagemaker_session.sagemaker_client\
154+
job_details = sagemaker_session.sagemaker_client \
154155
.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)
155156

156157
estimator_cls = cls._prepare_estimator_cls(estimator_cls, job_details['TrainingJobDefinition'])
@@ -249,7 +250,7 @@ def _prepare_estimator_cls(cls, estimator_cls, training_details):
249250

250251
# Then try to derive the estimator from the image name for 1P algorithms
251252
image_name = training_details['AlgorithmSpecification']['TrainingImage']
252-
algorithm = image_name[image_name.find('/')+1:image_name.find(':')]
253+
algorithm = image_name[image_name.find('/') + 1:image_name.find(':')]
253254
if algorithm in AMAZON_ESTIMATOR_CLS_NAMES:
254255
cls_name = AMAZON_ESTIMATOR_CLS_NAMES[algorithm]
255256
return getattr(importlib.import_module(AMAZON_ESTIMATOR_MODULE), cls_name)

tests/integ/test_tuner.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,12 @@ def test_fit_1p(sagemaker_session):
5151
# specify which hp you want to optimize over
5252
hyperparameter_ranges = {'extra_center_factor': IntegerParameter(1, 10),
5353
'mini_batch_size': IntegerParameter(10, 100),
54-
'local_lloyd_tol': ContinuousParameter(0.5, 0.75),
55-
'local_lloyd_init_method': CategoricalParameter(['kmeans++', 'random'])}
54+
'epochs': IntegerParameter(1, 2),
55+
'init_method': CategoricalParameter(['kmeans++', 'random'])}
5656
objective_metric_name = 'test:msd'
5757

5858
tuner = HyperparameterTuner(estimator=kmeans, objective_metric_name=objective_metric_name,
59-
hyperparameter_ranges=hyperparameter_ranges,
60-
metric_definitions=kmeans.metric_definitions, objective_type='Minimize', max_jobs=8,
59+
hyperparameter_ranges=hyperparameter_ranges, objective_type='Minimize', max_jobs=2,
6160
max_parallel_jobs=2)
6261

6362
tuner.fit(records)

0 commit comments

Comments
 (0)