Skip to content

Add Chainer 4.1.0 #278

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
CHANGELOG
=========

1.6.0
=====

* feature: Add Chainer 4.1.0 support
* bug-fix: Use chainer_full_version fixture in Chainer integration tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we add test related changes in change log.


1.5.4
=====

Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ You can install from source by cloning this repository and issuing a pip install

git clone https://github.com/aws/sagemaker-python-sdk.git
python setup.py sdist
pip install dist/sagemaker-1.5.4.tar.gz
pip install dist/sagemaker-1.6.0.tar.gz

Supported Python versions
~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -236,7 +236,7 @@ Chainer SageMaker Estimators

With Chainer Estimators, you can train and host Chainer models on Amazon SageMaker.

Supported versions of Chainer: ``4.0.0``
Supported versions of Chainer: ``4.0.0``, ``4.1.0``.

You can visit the Chainer repository at https://github.com/chainer/chainer.

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def read(fname):


setup(name="sagemaker",
version="1.5.4",
version="1.6.0",
description="Open source library for training and deploying models on Amazon SageMaker.",
packages=find_packages('src'),
package_dir={'': 'src'},
Expand Down
2 changes: 1 addition & 1 deletion src/sagemaker/chainer/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# language governing permissions and limitations under the License.
from __future__ import absolute_import

CHAINER_VERSION = '4.0.0'
CHAINER_VERSION = '4.1.0'
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def pytorch_version(request):
return request.param


@pytest.fixture(scope='module', params=['4.0', '4.0.0'])
@pytest.fixture(scope='module', params=['4.0', '4.0.0', '4.1', '4.1.0'])
def chainer_version(request):
return request.param

Expand All @@ -106,6 +106,6 @@ def pytorch_full_version(request):
return request.param


@pytest.fixture(scope='module', params=['4.0.0'])
@pytest.fixture(scope='module', params=['4.0.0', '4.1.0'])
def chainer_full_version(request):
return request.param
29 changes: 17 additions & 12 deletions tests/integ/test_chainer_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pytest
import numpy

from sagemaker.chainer.defaults import CHAINER_VERSION
from sagemaker.chainer.estimator import Chainer
from sagemaker.chainer.model import ChainerModel
from sagemaker.utils import sagemaker_timestamp
Expand All @@ -26,25 +27,26 @@


@pytest.fixture(scope='module')
def chainer_training_job(sagemaker_session):
return _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1)
def chainer_training_job(sagemaker_session, chainer_full_version):
return _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1, chainer_full_version)


def test_distributed_cpu_training(sagemaker_session):
_run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 2)
def test_distributed_cpu_training(sagemaker_session, chainer_full_version):
_run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 2, chainer_full_version)


def test_distributed_gpu_training(sagemaker_session):
_run_mnist_training_job(sagemaker_session, "ml.p2.xlarge", 2)
def test_distributed_gpu_training(sagemaker_session, chainer_full_version):
_run_mnist_training_job(sagemaker_session, "ml.p2.xlarge", 2, chainer_full_version)


def test_training_with_additional_hyperparameters(sagemaker_session):
def test_training_with_additional_hyperparameters(sagemaker_session, chainer_full_version):
with timeout(minutes=15):
script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
data_path = os.path.join(DATA_DIR, 'chainer_mnist')

chainer = Chainer(entry_point=script_path, role='SageMakerRole',
train_instance_count=1, train_instance_type="ml.c4.xlarge",
framework_version=chainer_full_version,
sagemaker_session=sagemaker_session, hyperparameters={'epochs': 1},
use_mpi=True,
num_processes=2,
Expand Down Expand Up @@ -75,8 +77,7 @@ def test_deploy_model(chainer_training_job, sagemaker_session):
desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=chainer_training_job)
model_data = desc['ModelArtifacts']['S3ModelArtifacts']
script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
model = ChainerModel(model_data, 'SageMakerRole', entry_point=script_path,
sagemaker_session=sagemaker_session)
model = ChainerModel(model_data, 'SageMakerRole', entry_point=script_path, sagemaker_session=sagemaker_session)
predictor = model.deploy(1, "ml.m4.xlarge", endpoint_name=endpoint_name)
_predict_and_assert(predictor)

Expand All @@ -85,7 +86,8 @@ def test_async_fit(sagemaker_session):
endpoint_name = 'test-chainer-attach-deploy-{}'.format(sagemaker_timestamp())

with timeout(minutes=5):
training_job_name = _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1, wait=False)
training_job_name = _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1,
chainer_full_version=CHAINER_VERSION, wait=False)

print("Waiting to re-attach to the training job: %s" % training_job_name)
time.sleep(20)
Expand All @@ -97,12 +99,13 @@ def test_async_fit(sagemaker_session):
_predict_and_assert(predictor)


def test_failed_training_job(sagemaker_session):
def test_failed_training_job(sagemaker_session, chainer_full_version):
with timeout(minutes=15):
script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'failure_script.py')
data_path = os.path.join(DATA_DIR, 'chainer_mnist')

chainer = Chainer(entry_point=script_path, role='SageMakerRole',
framework_version=chainer_full_version,
train_instance_count=1, train_instance_type='ml.c4.xlarge',
sagemaker_session=sagemaker_session)

Expand All @@ -113,7 +116,8 @@ def test_failed_training_job(sagemaker_session):
chainer.fit(train_input)


def _run_mnist_training_job(sagemaker_session, instance_type, instance_count, wait=True):
def _run_mnist_training_job(sagemaker_session, instance_type, instance_count,
chainer_full_version, wait=True):
with timeout(minutes=15):

script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py') if instance_type == 1 else \
Expand All @@ -122,6 +126,7 @@ def _run_mnist_training_job(sagemaker_session, instance_type, instance_count, wa
data_path = os.path.join(DATA_DIR, 'chainer_mnist')

chainer = Chainer(entry_point=script_path, role='SageMakerRole',
framework_version=chainer_full_version,
train_instance_count=instance_count, train_instance_type=instance_type,
sagemaker_session=sagemaker_session, hyperparameters={'epochs': 1})

Expand Down