Skip to content

add chainer, bump to 1.3 #195

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
CHANGELOG
=========

1.3.0
=======

* feature: Add chainer

1.2.5
========
Expand Down
685 changes: 677 additions & 8 deletions README.rst

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def read(fname):


setup(name="sagemaker",
version="1.2.5",
version="1.3.0",
description="Open source library for training and deploying models on Amazon SageMaker.",
packages=find_packages('src'),
package_dir={'': 'src'},
Expand Down
18 changes: 18 additions & 0 deletions src/sagemaker/chainer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

from sagemaker.chainer.estimator import Chainer
from sagemaker.chainer.model import ChainerModel, ChainerPredictor

__all__ = [Chainer, ChainerModel, ChainerPredictor]
15 changes: 15 additions & 0 deletions src/sagemaker/chainer/defaults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

CHAINER_VERSION = '4.0.0'
154 changes: 154 additions & 0 deletions src/sagemaker/chainer/estimator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

from sagemaker.estimator import Framework
from sagemaker.fw_utils import create_image_uri, framework_name_from_image, framework_version_from_tag
from sagemaker.chainer.defaults import CHAINER_VERSION
from sagemaker.chainer.model import ChainerModel


class Chainer(Framework):
"""Handle end-to-end training and deployment of custom Chainer code."""

__framework_name__ = "chainer"

# Hyperparameters
_use_mpi = "sagemaker_use_mpi"
_num_processes = "sagemaker_num_processes"
_process_slots_per_host = "sagemaker_process_slots_per_host"
_additional_mpi_options = "sagemaker_additional_mpi_options"

def __init__(self, entry_point, use_mpi=None, num_processes=None, process_slots_per_host=None,
additional_mpi_options=None, source_dir=None, hyperparameters=None, py_version='py3',
framework_version=CHAINER_VERSION, **kwargs):
"""
This ``Estimator`` executes an Chainer script in a managed Chainer execution environment, within a SageMaker
Training Job. The managed Chainer environment is an Amazon-built Docker container that executes functions
defined in the supplied ``entry_point`` Python script.

Training is started by calling :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator.
After training is complete, calling :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a
hosted SageMaker endpoint and returns an :class:`~sagemaker.amazon.chainer.model.ChainerPredictor` instance
that can be used to perform inference against the hosted model.

Technical documentation on preparing Chainer scripts for SageMaker training and using the Chainer Estimator is
available on the project home-page: https://github.com/aws/sagemaker-python-sdk

Args:
entry_point (str): Path (absolute or relative) to the Python source file which should be executed
as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
use_mpi (bool): If true, entry point is run as an MPI script. By default, the Chainer Framework runs
the entry point with 'mpirun' if more than one instance is used.
num_processes (int): Total number of processes to run the entry point with. By default, the Chainer
Framework runs one process per GPU (on GPU instances), or one process per host (on CPU instances).
process_slots_per_host (int): The number of processes that can run on each instance. By default, this is
set to the number of GPUs on the instance (on GPU instances), or one (on CPU instances).
additional_mpi_options (str): String of options to the 'mpirun' command used to run the entry point.
For example, '-X NCCL_DEBUG=WARN' will pass that option string to the mpirun command.
source_dir (str): Path (absolute or relative) to a directory with any other training
source code dependencies aside from tne entry point file (default: None). Structure within this
directory are preserved when training on Amazon SageMaker.
hyperparameters (dict): Hyperparameters that will be used for training (default: None).
The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
For convenience, this accepts other types for keys and values, but ``str()`` will be called
to convert them before training.
py_version (str): Python version you want to use for executing your model training code (default: 'py2').
One of 'py2' or 'py3'.
framework_version (str): Chainer version you want to use for executing your model training code.
List of supported versions https://github.com/aws/sagemaker-python-sdk#chainer-sagemaker-estimators
**kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor.
"""
super(Chainer, self).__init__(entry_point, source_dir, hyperparameters, **kwargs)
self.py_version = py_version
self.framework_version = framework_version
self.use_mpi = use_mpi
self.num_processes = num_processes
self.process_slots_per_host = process_slots_per_host
self.additional_mpi_options = additional_mpi_options

def hyperparameters(self):
"""Return hyperparameters used by your custom Chainer code during training."""
hyperparameters = super(Chainer, self).hyperparameters()

additional_hyperparameters = {Chainer._use_mpi: self.use_mpi,
Chainer._num_processes: self.num_processes,
Chainer._process_slots_per_host: self.process_slots_per_host,
Chainer._additional_mpi_options: self.additional_mpi_options}

# remove unset keys.
additional_hyperparameters = {k: v for k, v in additional_hyperparameters.items() if v}
hyperparameters.update(Framework._json_encode_hyperparameters(additional_hyperparameters))
return hyperparameters

def train_image(self):
"""Return the Docker image to use for training.

The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does the model training, calls this method to
find the image to use for model training.

Returns:
str: The URI of the Docker image.
"""

return create_image_uri(self.sagemaker_session.boto_session.region_name, self.__framework_name__,
self.train_instance_type, framework_version=self.framework_version,
py_version=self.py_version)

def create_model(self, model_server_workers=None):
"""Create a SageMaker ``ChainerModel`` object that can be deployed to an ``Endpoint``.

Args:
model_server_workers (int): Optional. The number of worker processes used by the inference server.
If None, server will use one worker per vCPU.

Returns:
sagemaker.chainer.model.ChainerModel: A SageMaker ``ChainerModel`` object.
See :func:`~sagemaker.chainer.model.ChainerModel` for full details.
"""
return ChainerModel(self.model_data, self.role, self.entry_point, source_dir=self.source_dir,
enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name,
container_log_level=self.container_log_level, code_location=self.code_location,
py_version=self.py_version, framework_version=self.framework_version,
model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session)

@classmethod
def _prepare_init_params_from_job_description(cls, job_details):
"""Convert the job description to init params that can be handled by the class constructor

Args:
job_details: the returned job details from a describe_training_job API call.

Returns:
dictionary: The transformed init_params

"""
init_params = super(Chainer, cls)._prepare_init_params_from_job_description(job_details)

for argument in [Chainer._use_mpi, Chainer._num_processes, Chainer._process_slots_per_host,
Chainer._additional_mpi_options]:

value = init_params['hyperparameters'].pop(argument, None)
if value:
init_params[argument[len('sagemaker_'):]] = value

framework, py_version, tag = framework_name_from_image(init_params.pop('image'))

init_params['py_version'] = py_version
init_params['framework_version'] = framework_version_from_tag(tag)

training_job_name = init_params['base_job_name']

if framework != cls.__framework_name__:
raise ValueError("Training job: {} didn't use image for requested framework".format(training_job_name))
return init_params
95 changes: 95 additions & 0 deletions src/sagemaker/chainer/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import sagemaker
from sagemaker.fw_utils import create_image_uri
from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME
from sagemaker.chainer.defaults import CHAINER_VERSION
from sagemaker.predictor import RealTimePredictor, npy_serializer, numpy_deserializer
from sagemaker.utils import name_from_image


class ChainerPredictor(RealTimePredictor):
"""A RealTimePredictor for inference against Chainer Endpoints.

This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for Chainer
inference."""

def __init__(self, endpoint_name, sagemaker_session=None):
"""Initialize an ``ChainerPredictor``.

Args:
endpoint_name (str): The name of the endpoint to perform inference on.
sagemaker_session (sagemaker.session.Session): Session object which manages interactions with
Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one
using the default AWS configuration chain.
"""
super(ChainerPredictor, self).__init__(endpoint_name, sagemaker_session, npy_serializer, numpy_deserializer)


class ChainerModel(FrameworkModel):
"""An Chainer SageMaker ``Model`` that can be deployed to a SageMaker ``Endpoint``."""

__framework_name__ = 'chainer'

def __init__(self, model_data, role, entry_point, image=None, py_version='py3', framework_version=CHAINER_VERSION,
predictor_cls=ChainerPredictor, model_server_workers=None, **kwargs):
"""Initialize an ChainerModel.

Args:
model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file.
role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs
that create Amazon SageMaker endpoints use this role to access training data and model artifacts.
After the endpoint is created, the inference code might use the IAM role,
if it needs to access an AWS resource.
entry_point (str): Path (absolute or relative) to the Python source file which should be executed
as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5.
image (str): A Docker image URI (default: None). If not specified, a default image for Chainer will be used.
py_version (str): Python version you want to use for executing your model training code (default: 'py2').
framework_version (str): Chainer version you want to use for executing your model training code.
predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor
with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of
invoking this function on the created endpoint name.
model_server_workers (int): Optional. The number of worker processes used by the inference server.
If None, server will use one worker per vCPU.
**kwargs: Keyword arguments passed to the ``FrameworkModel`` initializer.
"""
super(ChainerModel, self).__init__(model_data, image, role, entry_point, predictor_cls=predictor_cls,
**kwargs)
self.py_version = py_version
self.framework_version = framework_version
self.model_server_workers = model_server_workers

def prepare_container_def(self, instance_type):
"""Return a container definition with framework configuration set in model environment variables.

Args:
instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.

Returns:
dict[str, str]: A container definition object usable with the CreateModel API.
"""
deploy_image = self.image
if not deploy_image:
region_name = self.sagemaker_session.boto_session.region_name
deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type,
self.framework_version, self.py_version)
deploy_key_prefix = self.key_prefix or self.name or name_from_image(deploy_image)
self._upload_code(deploy_key_prefix)
deploy_env = dict(self.env)
deploy_env.update(self._framework_env_vars())

if self.model_server_workers:
deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers)
return sagemaker.container_def(deploy_image, self.model_data, deploy_env)
1 change: 1 addition & 0 deletions src/sagemaker/content_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
CONTENT_TYPE_JSON = 'application/json'
CONTENT_TYPE_CSV = 'text/csv'
CONTENT_TYPE_OCTET_STREAM = 'application/octet-stream'
CONTENT_TYPE_NPY = 'application/x-npy'
2 changes: 1 addition & 1 deletion src/sagemaker/fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def framework_name_from_image(image_name):
else:
# extract framework, python version and image tag
# We must support both the legacy and current image name format.
name_pattern = re.compile('^sagemaker-(tensorflow|mxnet):(.*?)-(.*?)-(py2|py3)$')
name_pattern = re.compile('^sagemaker-(tensorflow|mxnet|chainer):(.*?)-(.*?)-(py2|py3)$')
legacy_name_pattern = re.compile('^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu):(.*)$')
name_match = name_pattern.match(sagemaker_match.group(8))
legacy_match = legacy_name_pattern.match(sagemaker_match.group(8))
Expand Down
Loading