aws
diff --git a/‎CHANGELOG.rst
Lines changed: 3 additions & 0 deletions b/‎CHANGELOG.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.rst
Lines changed: 20 additions & 1 deletion b/‎README.rst
Lines changed: 20 additions & 1 deletion
diff --git a/‎src/sagemaker/chainer/README.rst
Lines changed: 33 additions & 33 deletions b/‎src/sagemaker/chainer/README.rst
Lines changed: 33 additions & 33 deletions
diff --git a/‎src/sagemaker/chainer/estimator.py
Lines changed: 3 additions & 1 deletion b/‎src/sagemaker/chainer/estimator.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/sagemaker/estimator.py
Lines changed: 18 additions & 4 deletions b/‎src/sagemaker/estimator.py
Lines changed: 18 additions & 4 deletions
diff --git a/‎src/sagemaker/fw_utils.py
Lines changed: 3 additions & 0 deletions b/‎src/sagemaker/fw_utils.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/sagemaker/mxnet/estimator.py
Lines changed: 0 additions & 1 deletion b/‎src/sagemaker/mxnet/estimator.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/sagemaker/session.py
Lines changed: 8 additions & 2 deletions b/‎src/sagemaker/session.py
Lines changed: 8 additions & 2 deletions
@@ -12,6 +12,8 @@ CHANGELOG
 * doc-fix: Add estimator base classes to API docs
 * feature: HyperparameterTuner: add support for Automatic Model Tuning's Warm Start Jobs
 * feature: HyperparameterTuner: Make input channels optional
+* feature: Add support for Chainer 5.0
+* feature: Estimator: add support for MetricDefinitions
 
 1.14.2
 ======
@@ -24,6 +26,7 @@ CHANGELOG
 * build: added pylint
 * build: upgrade docker-compose to 1.23
 * enhancement: Frameworks: update warning for not setting framework_version as we aren't planning a breaking change anymore
+* feature: Estimator: add script mode and Python 3 support for TensorFlow
 * enhancement: Session: remove hardcoded 'training' from job status error message
 * bug-fix: Updated Cloudwatch namespace for metrics in TrainingJobsAnalytics
 * bug-fix: Changes to use correct s3 bucket and time range for dataframes in TrainingJobAnalytics.
 
@@ -170,6 +170,25 @@ Here is an end to end example of how to use a SageMaker Estimator:
     # Tears down the SageMaker endpoint
     mxnet_estimator.delete_endpoint()
 
+Training Metrics
+~~~~~~~~~~~~~~~~
+The SageMaker Python SDK allows you to specify a name and a regular expression for metrics you want to track for training.
+A regular expression (regex) matches what is in the training algorithm logs, like a search function.
+Here is an example of how to define metrics:
+
+.. code:: python
+
+    # Configure an BYO Estimator with metric definitions (no training happens yet)
+    byo_estimator = Estimator(image_name=image_name,
+                              role='SageMakerRole', train_instance_count=1,
+                              train_instance_type='ml.c4.xlarge',
+                              sagemaker_session=sagemaker_session,
+                              metric_definitions=[{'Name': 'test:msd', 'Regex': '#quality_metric: host=\S+, test msd <loss>=(\S+)'},
+                                                  {'Name': 'test:ssd', 'Regex': '#quality_metric: host=\S+, test ssd <loss>=(\S+)'}])
+
+All Amazon SageMaker algorithms come with built-in support for metrics.
+You can go to `the AWS documentation <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`__ for more details about built-in metrics of each Amazon SageMaker algorithm.
+
 Local Mode
 ~~~~~~~~~~
 
@@ -358,7 +377,7 @@ Chainer SageMaker Estimators
 
 By using Chainer SageMaker ``Estimators``, you can train and host Chainer models on Amazon SageMaker.
 
-Supported versions of Chainer: ``4.0.0``, ``4.1.0``.
+Supported versions of Chainer: ``4.0.0``, ``4.1.0``, ``5.0.0``.
 
 We recommend that you use the latest supported version, because that's where we focus most of our development efforts.
 
 
@@ -4,7 +4,7 @@ Chainer SageMaker Estimators and Models
 
 With Chainer Estimators, you can train and host Chainer models on Amazon SageMaker.
 
-Supported versions of Chainer: ``4.0.0``, ``4.1.0``
+Supported versions of Chainer: ``4.0.0``, ``4.1.0``, ``5.0.0``
 
 You can visit the Chainer repository at https://github.com/chainer/chainer.
 
@@ -32,7 +32,7 @@ Suppose that you already have an Chainer training script called
                                 role='SageMakerRole',
                                 train_instance_type='ml.p3.2xlarge',
                                 train_instance_count=1,
-                                framework_version='4.1.0')
+                                framework_version='5.0.0')
     chainer_estimator.fit('s3://bucket/path/to/training/data')
 
 Where the S3 URL is a path to your training data, within Amazon S3. The constructor keyword arguments define how
@@ -111,7 +111,7 @@ directories ('train' and 'test').
     chainer_estimator = Chainer('chainer-train.py',
                                 train_instance_type='ml.p3.2xlarge',
                                 train_instance_count=1,
-                                framework_version='4.1.0',
+                                framework_version='5.0.0',
                                 hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate': 0.1})
     chainer_estimator.fit({'train': 's3://my-data-bucket/path/to/my/training/data',
                            'test': 's3://my-data-bucket/path/to/my/test/data'})
@@ -285,7 +285,7 @@ operation.
     chainer_estimator = Chainer(entry_point='train_and_deploy.py',
                                 train_instance_type='ml.p3.2xlarge',
                                 train_instance_count=1,
-                                framework_version='4.1.0')
+                                framework_version='5.0.0')
     chainer_estimator.fit('s3://my_bucket/my_training_data/')
 
     # Deploy my estimator to a SageMaker Endpoint and get a Predictor
@@ -631,38 +631,38 @@ This Python version applies to both the Training Job, created by fit, and the En
 
 The Chainer Docker images have the following dependencies installed:
 
-+-----------------------------+-------------+
-| Dependencies                | chainer 4.0 |
-+-----------------------------+-------------+
-| chainer                     | 4.0.0       |
-+-----------------------------+-------------+
-| chainercv                   | 0.9.0       |
-+-----------------------------+-------------+
-| chainermn                   | 1.2.0       |
-+-----------------------------+-------------+
-| CUDA (GPU image only)       | 9.0         |
-+-----------------------------+-------------+
-| cupy                        | 4.0.0       |
-+-----------------------------+-------------+
-| matplotlib                  | 2.2.0       |
-+-----------------------------+-------------+
-| mpi4py                      | 3.0.0       |
-+-----------------------------+-------------+
-| numpy                       | 1.14.3      |
-+-----------------------------+-------------+
-| opencv-python               | 3.4.0.12    |
-+-----------------------------+-------------+
-| Pillow                      | 5.1.0       |
-+-----------------------------+-------------+
-| Python                      | 2.7 or 3.5  |
-+-----------------------------+-------------+
++-----------------------------+-------------+-------------+-------------+
+| Dependencies                | chainer 4.0 | chainer 4.1 | chainer 5.0 |
++-----------------------------+-------------+-------------+-------------+
+| chainer                     | 4.0.0       | 4.1.0       | 5.0.0       |
++-----------------------------+-------------+-------------+-------------+
+| chainercv                   | 0.9.0       | 0.10.0      | 0.10.0      |
++-----------------------------+-------------+-------------+-------------+
+| chainermn                   | 1.2.0       | 1.3.0       | N/A         |
++-----------------------------+-------------+-------------+-------------+
+| CUDA (GPU image only)       | 9.0         | 9.0         | 9.0         |
++-----------------------------+-------------+-------------+-------------+
+| cupy                        | 4.0.0       | 4.1.0       | 5.0.0       |
++-----------------------------+-------------+-------------+-------------+
+| matplotlib                  | 2.2.0       | 2.2.0       | 2.2.0       |
++-----------------------------+-------------+-------------+-------------+
+| mpi4py                      | 3.0.0       | 3.0.0       | 3.0.0       |
++-----------------------------+-------------+-------------+-------------+
+| numpy                       | 1.14.3      | 1.15.3      | 1.15.4      |
++-----------------------------+-------------+-------------+-------------+
+| opencv-python               | 3.4.0.12    | 3.4.0.12    | 3.4.0.12    |
++-----------------------------+-------------+-------------+-------------+
+| Pillow                      | 5.1.0       | 5.3.0       | 5.3.0       |
++-----------------------------+-------------+-------------+-------------+
+| Python                      | 2.7 or 3.5  | 2.7 or 3.5  | 2.7 or 3.5  |
++-----------------------------+-------------+-------------+-------------+
 
 The Docker images extend Ubuntu 16.04.
 
-You can select version of Chainer by passing a framework_version keyword arg to the Chainer Estimator constructor.
-Currently supported versions are listed in the above table. You can also set framework_version to only specify major and
-minor version, which will cause your training script to be run on the latest supported patch version of that minor
-version.
+You must select a version of Chainer by passing a ``framework_version`` keyword arg to the Chainer Estimator
+constructor. Currently supported versions are listed in the above table. You can also set framework_version to only
+specify major and minor version, which will cause your training script to be run on the latest supported patch
+version of that minor version.
 
 Alternatively, you can build your own image by following the instructions in the SageMaker Chainer containers
 repository, and passing ``image_name`` to the Chainer Estimator constructor.
 
@@ -35,6 +35,8 @@ class Chainer(Framework):
     _process_slots_per_host = "sagemaker_process_slots_per_host"
     _additional_mpi_options = "sagemaker_additional_mpi_options"
 
+    LATEST_VERSION = '5.0.0'
+
     def __init__(self, entry_point, use_mpi=None, num_processes=None, process_slots_per_host=None,
                  additional_mpi_options=None, source_dir=None, hyperparameters=None, py_version='py3',
                  framework_version=None, image_name=None, **kwargs):
@@ -82,7 +84,7 @@ def __init__(self, entry_point, use_mpi=None, num_processes=None, process_slots_
             **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor.
         """
         if framework_version is None:
-            logger.warning(empty_framework_version_warning(CHAINER_VERSION, CHAINER_VERSION))
+            logger.warning(empty_framework_version_warning(CHAINER_VERSION, self.LATEST_VERSION))
         self.framework_version = framework_version or CHAINER_VERSION
 
         super(Chainer, self).__init__(entry_point, source_dir, hyperparameters,
 
@@ -50,7 +50,8 @@ class EstimatorBase(with_metaclass(ABCMeta, object)):
     def __init__(self, role, train_instance_count, train_instance_type,
                  train_volume_size=30, train_volume_kms_key=None, train_max_run=24 * 60 * 60, input_mode='File',
                  output_path=None, output_kms_key=None, base_job_name=None, sagemaker_session=None, tags=None,
-                 subnets=None, security_group_ids=None, model_uri=None, model_channel_name='model'):
+                 subnets=None, security_group_ids=None, model_uri=None, model_channel_name='model',
+                 metric_definitions=None):
         """Initialize an ``EstimatorBase`` instance.
 
         Args:
@@ -97,6 +98,10 @@ def __init__(self, role, train_instance_count, train_instance_type,
 
                 More information: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization
             model_channel_name (str): Name of the channel where 'model_uri' will be downloaded (default: 'model').
+            metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the
+                training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for
+                the regular expression used to extract the metric from the logs. This should be defined only
+                for jobs that don't use an Amazon algorithm.
         """
         self.role = role
         self.train_instance_count = train_instance_count
@@ -106,6 +111,7 @@ def __init__(self, role, train_instance_count, train_instance_type,
         self.train_max_run = train_max_run
         self.input_mode = input_mode
         self.tags = tags
+        self.metric_definitions = metric_definitions
         self.model_uri = model_uri
         self.model_channel_name = model_channel_name
 
@@ -330,6 +336,9 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na
         init_params['hyperparameters'] = job_details['HyperParameters']
         init_params['image'] = job_details['AlgorithmSpecification']['TrainingImage']
 
+        if 'MetricDefinitons' in job_details['AlgorithmSpecification']:
+            init_params['metric_definitions'] = job_details['AlgorithmSpecification']['MetricsDefinition']
+
         subnets, security_group_ids = vpc_utils.from_dict(job_details.get(vpc_utils.VPC_CONFIG_KEY))
         if subnets:
             init_params['subnets'] = subnets
@@ -447,7 +456,7 @@ def start_new(cls, estimator, inputs):
                                           job_name=estimator._current_job_name, output_config=config['output_config'],
                                           resource_config=config['resource_config'], vpc_config=config['vpc_config'],
                                           hyperparameters=hyperparameters, stop_condition=config['stop_condition'],
-                                          tags=estimator.tags)
+                                          tags=estimator.tags, metric_definitions=estimator.metric_definitions)
 
         return cls(estimator.sagemaker_session, estimator._current_job_name)
 
@@ -472,7 +481,7 @@ def __init__(self, image_name, role, train_instance_count, train_instance_type,
                  train_volume_size=30, train_volume_kms_key=None, train_max_run=24 * 60 * 60,
                  input_mode='File', output_path=None, output_kms_key=None, base_job_name=None,
                  sagemaker_session=None, hyperparameters=None, tags=None, subnets=None, security_group_ids=None,
-                 model_uri=None, model_channel_name='model'):
+                 model_uri=None, model_channel_name='model', metric_definitions=None):
         """Initialize an ``Estimator`` instance.
 
         Args:
@@ -523,14 +532,18 @@ def __init__(self, image_name, role, train_instance_count, train_instance_type,
 
                 More information: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization
             model_channel_name (str): Name of the channel where 'model_uri' will be downloaded (default: 'model').
+            metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the
+                training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for
+                the regular expression used to extract the metric from the logs. This should be defined only
+                for jobs that don't use an Amazon algorithm.
         """
         self.image_name = image_name
         self.hyperparam_dict = hyperparameters.copy() if hyperparameters else {}
         super(Estimator, self).__init__(role, train_instance_count, train_instance_type,
                                         train_volume_size, train_volume_kms_key, train_max_run, input_mode,
                                         output_path, output_kms_key, base_job_name, sagemaker_session,
                                         tags, subnets, security_group_ids, model_uri=model_uri,
-                                        model_channel_name=model_channel_name)
+                                        model_channel_name=model_channel_name, metric_definitions=metric_definitions)
 
     def train_image(self):
         """
@@ -616,6 +629,7 @@ class Framework(EstimatorBase):
     """
 
     __framework_name__ = None
+    LAUNCH_PS_ENV_NAME = 'sagemaker_parameter_server_enabled'
 
     def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cloudwatch_metrics=False,
                  container_log_level=logging.INFO, code_location=None, image_name=None, **kwargs):
 
@@ -31,6 +31,9 @@
                                   'If you would like to use version {latest}, ' \
                                   'please add framework_version={latest} to your constructor.'
 
+EMPTY_FRAMEWORK_VERSION_ERROR = 'framework_version is required for script mode estimator. ' \
+                                'Please add framework_version={} to your constructor to avoid this error.'
+
 VALID_PY_VERSIONS = ['py2', 'py3']
 
 
 
@@ -30,7 +30,6 @@ class MXNet(Framework):
     __framework_name__ = 'mxnet'
 
     _LOWEST_SCRIPT_MODE_VERSION = ['1', '3']
-    LAUNCH_PS_ENV_NAME = 'sagemaker_parameter_server_enabled'
     LATEST_VERSION = '1.3'
 
     def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_version='py2',
 
@@ -203,7 +203,7 @@ def default_bucket(self):
         return self._default_bucket
 
     def train(self, image, input_mode, input_config, role, job_name, output_config,
-              resource_config, vpc_config, hyperparameters, stop_condition, tags):
+              resource_config, vpc_config, hyperparameters, stop_condition, tags, metric_definitions):
         """Create an Amazon SageMaker training job.
 
         Args:
@@ -243,6 +243,9 @@ def train(self, image, input_mode, input_config, role, job_name, output_config,
                 service like ``MaxRuntimeInSeconds``.
             tags (list[dict]): List of tags for labeling a training job. For more, see
                 https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
+            metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the
+                training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for
+                the regular expression used to extract the metric from the logs.
 
         Returns:
             str: ARN of the training job, if it is created.
@@ -263,6 +266,9 @@ def train(self, image, input_mode, input_config, role, job_name, output_config,
         if input_config is not None:
             train_request['InputDataConfig'] = input_config
 
+        if metric_definitions is not None:
+            train_request['AlgorithmSpecification']['MetricDefinitions'] = metric_definitions
+
         if hyperparameters and len(hyperparameters) > 0:
             train_request['HyperParameters'] = hyperparameters
 
@@ -306,7 +312,7 @@ def tune(self, job_name, strategy, objective_type, objective_metric_name,
             metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the
                 training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for
                 the regular expression used to extract the metric from the logs. This should be defined only for
-                hyperparameter tuning jobs that don't use an Amazon algorithm.
+                jobs that don't use an Amazon algorithm.
             role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs
                 that create Amazon SageMaker endpoints use this role to access training data and model artifacts.
                 You must grant sufficient permissions to this role.