Remove kmeans metric definitions (aws#44)

ChoiByungWook · web-flow · commit f9c460aed042 · 2018-05-30T16:27:59.000-07:00
diff --git a/src/sagemaker/amazon/kmeans.py b/src/sagemaker/amazon/kmeans.py
@@ -91,32 +91,7 @@ def __init__(self, role, train_instance_count, train_instance_type, k, init_meth
                 the score shall be reported in terms of all requested metrics.
             **kwargs: base class keyword argument values.
         """
-        # TODO: shouldn't be defined here, delete this once HPO fixes validation
-        metric_definitions = [
-            {
-                "Name": "test:msd",
-                "Regex": "#quality_metric: host=\\S+, test msd <loss>=(\\S+)"
-            },
-            {
-                "Name": "test:ssd",
-                "Regex": "#quality_metric: host=\\S+, test ssd <loss>=(\\S+)"
-            },
-            {
-                "Name": "train:msd",
-                "Regex": "#quality_metric: host=\\S+, train msd <loss>=(\\S+)"
-            },
-            {
-                "Name": "train:progress",
-                "Regex": "#progress_metric: host=\\S+, completed (\\S+) %"
-            },
-            # updated below basing on current log format
-            {
-                "Name": "train:throughput",
-                "Regex": "#throughput_metric: train throughput in records/second: (\\S+)"
-            }
-        ]
-        super(KMeans, self).__init__(role, train_instance_count, train_instance_type,
-                                     metric_definitions=metric_definitions, **kwargs)
+        super(KMeans, self).__init__(role, train_instance_count, train_instance_type, **kwargs)
         self.k = k
         self.init_method = init_method
         self.max_iterations = max_iterations
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -279,7 +279,7 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
                 * 'File' - Amazon SageMaker copies the training dataset from the S3 location to
                     a directory in the Docker container.
                 * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe.
-            metric_definitions (str):
+            metric_definitions (list[dict]): Metrics definition with 'name' and 'regex' keys.
             role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs
                 that create Amazon SageMaker endpoints use this role to access training data and model artifacts.
                 You must grant sufficient permissions to this role.
@@ -312,7 +312,6 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
                 'AlgorithmSpecification': {
                     'TrainingImage': image,
                     'TrainingInputMode': input_mode,
-                    'MetricDefinitions': metric_definitions,
                 },
                 'RoleArn': role,
                 'InputDataConfig': input_config,
diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py
@@ -89,8 +89,9 @@ class HyperparameterTuner(object):
     DEFAULT_ESTIMATOR_MODULE = 'sagemaker.estimator'
     DEFAULT_ESTIMATOR_CLS_NAME = 'Estimator'
 
-    def __init__(self, estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, strategy='Bayesian',
-                 objective_type='Maximize', max_jobs=1, max_parallel_jobs=1, base_tuning_job_name=None):
+    def __init__(self, estimator, objective_metric_name, hyperparameter_ranges, metric_definitions=None,
+                 strategy='Bayesian', objective_type='Maximize', max_jobs=1, max_parallel_jobs=1,
+                 base_tuning_job_name=None):
         self._hyperparameter_ranges = hyperparameter_ranges
         if self._hyperparameter_ranges is None or len(self._hyperparameter_ranges) == 0:
             raise ValueError('Need to specify hyperparameter ranges')
@@ -102,7 +103,6 @@ def __init__(self, estimator, objective_metric_name, hyperparameter_ranges, metr
 
         self.strategy = strategy
         self.objective_type = objective_type
-
         self.max_jobs = max_jobs
         self.max_parallel_jobs = max_parallel_jobs
 
@@ -124,7 +124,8 @@ def prepare_for_training(self, job_name=None):
         # For attach() to know what estimator to use for non-1P algorithms
         # (1P algorithms don't accept extra hyperparameters)
         if not isinstance(self.estimator, AmazonAlgorithmEstimatorBase):
-            self.static_hyperparameters[self.SAGEMAKER_ESTIMATOR_CLASS_NAME] = json.dumps(self.estimator.__class__.__name__)
+            self.static_hyperparameters[self.SAGEMAKER_ESTIMATOR_CLASS_NAME] = json.dumps(
+                self.estimator.__class__.__name__)
             self.static_hyperparameters[self.SAGEMAKER_ESTIMATOR_MODULE] = json.dumps(self.estimator.__module__)
 
     def fit(self, inputs, job_name=None, **kwargs):
@@ -150,7 +151,7 @@ def attach(cls, tuning_job_name, sagemaker_session=None, job_details=None, estim
         sagemaker_session = sagemaker_session or Session()
 
         if job_details is None:
-            job_details = sagemaker_session.sagemaker_client\
+            job_details = sagemaker_session.sagemaker_client \
                 .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)
 
         estimator_cls = cls._prepare_estimator_cls(estimator_cls, job_details['TrainingJobDefinition'])
@@ -249,7 +250,7 @@ def _prepare_estimator_cls(cls, estimator_cls, training_details):
 
         # Then try to derive the estimator from the image name for 1P algorithms
         image_name = training_details['AlgorithmSpecification']['TrainingImage']
-        algorithm = image_name[image_name.find('/')+1:image_name.find(':')]
+        algorithm = image_name[image_name.find('/') + 1:image_name.find(':')]
         if algorithm in AMAZON_ESTIMATOR_CLS_NAMES:
             cls_name = AMAZON_ESTIMATOR_CLS_NAMES[algorithm]
             return getattr(importlib.import_module(AMAZON_ESTIMATOR_MODULE), cls_name)
diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py
@@ -51,13 +51,12 @@ def test_fit_1p(sagemaker_session):
     # specify which hp you want to optimize over
     hyperparameter_ranges = {'extra_center_factor': IntegerParameter(1, 10),
                              'mini_batch_size': IntegerParameter(10, 100),
-                             'local_lloyd_tol': ContinuousParameter(0.5, 0.75),
-                             'local_lloyd_init_method': CategoricalParameter(['kmeans++', 'random'])}
+                             'epochs': IntegerParameter(1, 2),
+                             'init_method': CategoricalParameter(['kmeans++', 'random'])}
     objective_metric_name = 'test:msd'
 
     tuner = HyperparameterTuner(estimator=kmeans, objective_metric_name=objective_metric_name,
-                                hyperparameter_ranges=hyperparameter_ranges,
-                                metric_definitions=kmeans.metric_definitions, objective_type='Minimize', max_jobs=8,
+                                hyperparameter_ranges=hyperparameter_ranges, objective_type='Minimize', max_jobs=2,
                                 max_parallel_jobs=2)
 
     tuner.fit(records)