aws · iquintero · Jun 25, 2018 · Jun 4, 2018 · Jun 14, 2018 · Jun 14, 2018
@@ -8,6 +8,7 @@ CHANGELOG
 * enhancement: Let Framework models reuse code uploaded by Framework estimators
 * enhancement: Unify generation of model uploaded code location
 * feature: Change minimum required scipy from 1.0.0 to 0.19.0
+* feature: Allow all Framework Estimators to use a custom docker image.
 
 1.5.0
 =====

@@ -175,6 +175,12 @@ The following are optional arguments. When you create a ``Chainer`` object, you
 -  ``job_name`` Name to assign for the training job that the fit()
    method launches. If not specified, the estimator generates a default
    job name, based on the training image name and current timestamp
+-  ``image_name`` An alternative docker image to use for training and
+   serving.  If specified, the estimator will use this image for training and
+   hosting, instead of selecting the appropriate SageMaker official image based on
+   framework_version and py_version. Refer to: `SageMaker Chainer Docker Containers
+   <#sagemaker-chainer-docker-containers>`_ for details on what the Official images support
+   and where to find the source code to build your custom image.
 
 
 Distributed Chainer Training
@@ -657,5 +663,8 @@ Currently supported versions are listed in the above table. You can also set fra
 minor version, which will cause your training script to be run on the latest supported patch version of that minor
 version.
 
+Alternatively, you can build your own image by following the instructions in the SageMaker Chainer containers
+repository, and passing ``image_name`` to the Chainer Estimator constructor.
+
 You can visit the SageMaker Chainer containers repository here: https://github.com/aws/sagemaker-chainer-containers/
 
@@ -13,7 +13,7 @@
 from __future__ import absolute_import
 
 from sagemaker.estimator import Framework
-from sagemaker.fw_utils import create_image_uri, framework_name_from_image, framework_version_from_tag
+from sagemaker.fw_utils import framework_name_from_image, framework_version_from_tag
 from sagemaker.chainer.defaults import CHAINER_VERSION
 from sagemaker.chainer.model import ChainerModel
 
@@ -31,7 +31,7 @@ class Chainer(Framework):
 
     def __init__(self, entry_point, use_mpi=None, num_processes=None, process_slots_per_host=None,
                  additional_mpi_options=None, source_dir=None, hyperparameters=None, py_version='py3',
-                 framework_version=CHAINER_VERSION, **kwargs):
+                 framework_version=CHAINER_VERSION, image_name=None, **kwargs):
         """
         This ``Estimator`` executes an Chainer script in a managed Chainer execution environment, within a SageMaker
         Training Job. The managed Chainer environment is an Amazon-built Docker container that executes functions
@@ -67,9 +67,16 @@ def __init__(self, entry_point, use_mpi=None, num_processes=None, process_slots_
                               One of 'py2' or 'py3'.
             framework_version (str): Chainer version you want to use for executing your model training code.
                 List of supported versions https://github.com/aws/sagemaker-python-sdk#chainer-sagemaker-estimators
+            image_name (str): If specified, the estimator will use this image for training and hosting, instead of
+                selecting the appropriate SageMaker official image based on framework_version and py_version. It can
+                be an ECR url or dockerhub image and tag.
+                Examples:
+                    123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0
+                    custom-image:latest.
             **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor.
         """
-        super(Chainer, self).__init__(entry_point, source_dir, hyperparameters, **kwargs)
+        super(Chainer, self).__init__(entry_point, source_dir, hyperparameters,
+                                      image_name=image_name, **kwargs)
         self.py_version = py_version
         self.framework_version = framework_version
         self.use_mpi = use_mpi
@@ -91,20 +98,6 @@ def hyperparameters(self):
         hyperparameters.update(Framework._json_encode_hyperparameters(additional_hyperparameters))
         return hyperparameters
 
-    def train_image(self):
-        """Return the Docker image to use for training.
-
-        The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does the model training, calls this method to
-        find the image to use for model training.
-
-        Returns:
-            str: The URI of the Docker image.
-        """
-
-        return create_image_uri(self.sagemaker_session.boto_session.region_name, self.__framework_name__,
-                                self.train_instance_type, framework_version=self.framework_version,
-                                py_version=self.py_version)
-
     def create_model(self, model_server_workers=None):
         """Create a SageMaker ``ChainerModel`` object that can be deployed to an ``Endpoint``.
 
@@ -120,7 +113,8 @@ def create_model(self, model_server_workers=None):
                             enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name,
                             container_log_level=self.container_log_level, code_location=self.code_location,
                             py_version=self.py_version, framework_version=self.framework_version,
-                            model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session)
+                            model_server_workers=model_server_workers, image=self.image_name,
+                            sagemaker_session=self.sagemaker_session)
 
     @classmethod
     def _prepare_init_params_from_job_description(cls, job_details):
@@ -142,7 +136,14 @@ def _prepare_init_params_from_job_description(cls, job_details):
             if value:
                 init_params[argument[len('sagemaker_'):]] = value
 
-        framework, py_version, tag = framework_name_from_image(init_params.pop('image'))
+        image_name = init_params.pop('image')
+        framework, py_version, tag = framework_name_from_image(image_name)
+
+        if not framework:
+            # If we were unable to parse the framework name from the image it is not one of our
+            # officially supported images, in this case just add the image to the init params.
+            init_params['image_name'] = image_name
+            return init_params
 
         init_params['py_version'] = py_version
         init_params['framework_version'] = framework_version_from_tag(tag)

@@ -20,7 +20,8 @@
 from six import with_metaclass
 
 from sagemaker.analytics import TrainingJobAnalytics
-from sagemaker.fw_utils import tar_and_upload_dir, parse_s3_url, UploadedCode, validate_source_dir
+from sagemaker.fw_utils import (create_image_uri, tar_and_upload_dir, parse_s3_url, UploadedCode,
+                                validate_source_dir)
 from sagemaker.job import _Job
 from sagemaker.local import LocalSession
 from sagemaker.model import Model
@@ -493,7 +494,7 @@ class Framework(EstimatorBase):
     """
 
     def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cloudwatch_metrics=False,
-                 container_log_level=logging.INFO, code_location=None, **kwargs):
+                 container_log_level=logging.INFO, code_location=None, image_name=None, **kwargs):
         """Base class initializer. Subclasses which override ``__init__`` should invoke ``super()``
 
         Args:
@@ -513,6 +514,9 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
             code_location (str): Name of the S3 bucket where custom code is uploaded (default: None).
                 If not specified, default bucket created by ``sagemaker.session.Session`` is used.
             **kwargs: Additional kwargs passed to the ``EstimatorBase`` constructor.
+            image_name (str): An alternate image name to use instead of the official Sagemaker image
+                for the framework. This is useful to run one of the Sagemaker supported frameworks
+                with an image containing custom dependencies.
         """
         super(Framework, self).__init__(**kwargs)
         self.source_dir = source_dir
@@ -521,6 +525,7 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
         self.container_log_level = container_log_level
         self._hyperparameters = hyperparameters or {}
         self.code_location = code_location
+        self.image_name = image_name
 
     def _prepare_for_training(self, job_name=None):
         """Set hyperparameters needed for training. This method will also validate ``source_dir``.
@@ -632,6 +637,21 @@ def _prepare_init_params_from_job_description(cls, job_details):
 
         return init_params
 
+    def train_image(self):
+        """Return the Docker image to use for training.
+
+        The  :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does the model training,
+        calls this method to find the image to use for model training.
+
+        Returns:
+            str: The URI of the Docker image.
+        """
+        if self.image_name:
+            return self.image_name
+        else:
+            return create_image_uri(self.sagemaker_session.boto_region_name, self.__framework_name__,
+                                    self.train_instance_type, self.framework_version, py_version=self.py_version)
+
     @classmethod
     def attach(cls, training_job_name, sagemaker_session=None):
         """Attach to an existing training job.

@@ -153,6 +153,12 @@ The following are optional arguments. When you create an ``MXNet`` object, you c
 -  ``job_name`` Name to assign for the training job that the fit()
    method launches. If not specified, the estimator generates a default
    job name, based on the training image name and current timestamp
+-  ``image_name`` An alternative docker image to use for training and
+   serving.  If specified, the estimator will use this image for training and
+   hosting, instead of selecting the appropriate SageMaker official image based on
+   framework_version and py_version. Refer to: `SageMaker MXNet Docker Containers
+   <#sagemaker-mxnet-docker-containers>`_ for details on what the Official images support
+   and where to find the source code to build your custom image.
 
 Calling fit
 ^^^^^^^^^^^
@@ -595,5 +601,6 @@ The Docker images have the following dependencies installed:
 The Docker images extend Ubuntu 16.04.
 
 You can select version of MXNet by passing a ``framework_version`` keyword arg to the MXNet Estimator constructor. Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and minor version, e.g ``1.1``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.1.0.
+Alternatively, you can build your own image by following the instructions in the SageMaker MXNet containers repository, and passing ``image_name`` to the MXNet Estimator constructor.
 
 You can visit the SageMaker MXNet containers repository here: https://github.com/aws/sagemaker-mxnet-containers/
@@ -13,7 +13,7 @@
 from __future__ import absolute_import
 
 from sagemaker.estimator import Framework
-from sagemaker.fw_utils import create_image_uri, framework_name_from_image, framework_version_from_tag
+from sagemaker.fw_utils import framework_name_from_image, framework_version_from_tag
 from sagemaker.mxnet.defaults import MXNET_VERSION
 from sagemaker.mxnet.model import MXNetModel
 
@@ -24,7 +24,7 @@ class MXNet(Framework):
     __framework_name__ = "mxnet"
 
     def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_version='py2',
-                 framework_version=MXNET_VERSION, **kwargs):
+                 framework_version=MXNET_VERSION, image_name=None, **kwargs):
         """
         This ``Estimator`` executes an MXNet script in a managed MXNet execution environment, within a SageMaker
         Training Job. The managed MXNet environment is an Amazon-built Docker container that executes functions
@@ -52,25 +52,19 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_versio
                               One of 'py2' or 'py3'.
             framework_version (str): MXNet version you want to use for executing your model training code.
                 List of supported versions https://github.com/aws/sagemaker-python-sdk#mxnet-sagemaker-estimators
+            image_name (str): If specified, the estimator will use this image for training and hosting, instead of
+                selecting the appropriate SageMaker official image based on framework_version and py_version. It can
+                be an ECR url or dockerhub image and tag.
+                    Examples:
+                        123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0
+                        custom-image:latest.
             **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor.
         """
-        super(MXNet, self).__init__(entry_point, source_dir, hyperparameters, **kwargs)
+        super(MXNet, self).__init__(entry_point, source_dir, hyperparameters,
+                                    image_name=image_name, **kwargs)
         self.py_version = py_version
         self.framework_version = framework_version
 
-    def train_image(self):
-        """Return the Docker image to use for training.
-
-        The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does the model training, calls this method to
-        find the image to use for model training.
-
-        Returns:
-            str: The URI of the Docker image.
-        """
-        return create_image_uri(self.sagemaker_session.boto_region_name, self.__framework_name__,
-                                self.train_instance_type, framework_version=self.framework_version,
-                                py_version=self.py_version)
-
     def create_model(self, model_server_workers=None):
         """Create a SageMaker ``MXNetModel`` object that can be deployed to an ``Endpoint``.
 
@@ -85,7 +79,7 @@ def create_model(self, model_server_workers=None):
         return MXNetModel(self.model_data, self.role, self.entry_point, source_dir=self._model_source_dir(),
                           enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name,
                           container_log_level=self.container_log_level, code_location=self.code_location,
-                          py_version=self.py_version, framework_version=self.framework_version,
+                          py_version=self.py_version, framework_version=self.framework_version, image=self.image_name,
                           model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session)
 
     @classmethod
@@ -100,7 +94,14 @@ def _prepare_init_params_from_job_description(cls, job_details):
 
         """
         init_params = super(MXNet, cls)._prepare_init_params_from_job_description(job_details)
-        framework, py_version, tag = framework_name_from_image(init_params.pop('image'))
+        image_name = init_params.pop('image')
+        framework, py_version, tag = framework_name_from_image(image_name)
+
+        if not framework:
+            # If we were unable to parse the framework name from the image it is not one of our
+            # officially supported images, in this case just add the image to the init params.
+            init_params['image_name'] = image_name
+            return init_params
 
         init_params['py_version'] = py_version
 

@@ -204,7 +204,12 @@ The following are optional arguments. When you create a ``PyTorch`` object, you
 -  ``job_name`` Name to assign for the training job that the ``fit```
    method launches. If not specified, the estimator generates a default
    job name, based on the training image name and current timestamp
-
+-  ``image_name`` An alternative docker image to use for training and
+   serving.  If specified, the estimator will use this image for training and
+   hosting, instead of selecting the appropriate SageMaker official image based on
+   framework_version and py_version. Refer to: `SageMaker PyTorch Docker Containers
+   <#sagemaker-pytorch-docker-containers>`_ for details on what the Official images support
+   and where to find the source code to build your custom image.
 
 Calling fit
 ~~~~~~~~~~~
@@ -705,4 +710,7 @@ Currently supported versions are listed in the above table. You can also set ``f
 minor version, which will cause your training script to be run on the latest supported patch version of that minor
 version.
 
+Alternatively, you can build your own image by following the instructions in the SageMaker Chainer containers
+repository, and passing ``image_name`` to the Chainer Estimator constructor.
+
 You can visit `the SageMaker PyTorch containers repository <https://github.com/aws/sagemaker-pytorch-containers>`_.