Merge remote-tracking branch 'origin/master' into mvs-tfs

mvsusp · mvsusp · commit ada8b2a79e9f · 2019-05-15T13:55:16.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,39 @@
 # Changelog
 
+## v1.20.3 (2019-05-15)
+
+### Bug fixes and other changes
+
+ * run tests if buildspec.yml has been modified
+ * skip local file check for TF requirements file when source_dir is an S3 URI
+
+### Documentation changes
+
+ * fix docs in regards to transform_fn for mxnet
+
+## v1.20.2 (2019-05-13)
+
+### Bug fixes and other changes
+
+ * pin pytest version to 4.4.1 to avoid pluggy version conflict
+
+## v1.20.1 (2019-05-09)
+
+### Bug fixes and other changes
+
+ * update TrainingInputMode with s3_input InputMode
+
+## v1.20.0 (2019-05-08)
+
+### Features
+
+ * add RL Ray 0.6.5 support
+
+### Bug fixes and other changes
+
+ * prevent false positive PR test results
+ * adjust Ray test script for Ray 0.6.5
+
 ## v1.19.1 (2019-05-06)
 
 ### Bug fixes and other changes
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.19.2.dev0
+1.20.4.dev0
diff --git a/buildspec.yml b/buildspec.yml
@@ -17,36 +17,21 @@ phases:
       # run unit tests
       - AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
         AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
-        tox -e py27,py36 -- tests/unit
+        tox -e py36,py27 -- tests/unit
 
       # run notebook test
       - |
-        if has-matching-changes "src/*.py" "setup.py" "setup.cfg"; then
+        if has-matching-changes "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"; then
           echo "running notebook test"
-          python setup.py sdist
-          aws s3 --region us-west-2 cp ./dist/sagemaker-*.tar.gz s3://sagemaker-python-sdk-pr/sagemaker.tar.gz
-          aws s3 cp s3://sagemaker-mead-cli/mead-nb-test.tar.gz mead-nb-test.tar.gz
-          tar -xzf mead-nb-test.tar.gz
-          git clone --depth 1 https://github.com/awslabs/amazon-sagemaker-examples.git
-          JAVA_HOME=$(get-java-home)
-          echo "set JAVA_HOME=$JAVA_HOME"
-          SAGEMAKER_ROLE_ARN=$(get-sagemaker-role-arn)
-          echo "set SAGEMAKER_ROLE_ARN=$SAGEMAKER_ROLE_ARN"
-          ./runtime/bin/mead-run-nb-test \
-            --instance-type ml.c4.8xlarge \
-            --region us-west-2 \
-            --lifecycle-config-name install-python-sdk \
-            --notebook-instance-role-arn $SAGEMAKER_ROLE_ARN \
-            ./amazon-sagemaker-examples/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_batch_transform_mnist.ipynb
+          ./tests/scripts/run-notebook-test.sh
          else
            echo "skipping notebook test"
          fi
 
       # run integration tests
       - |
-        if has-matching-changes "tests/" "src/*.py" "setup.py" "setup.cfg"; then
-          IGNORE_COVERAGE=- tox -e py36 -- tests/integ -n 24 --boxed --reruns 2
-          IGNORE_COVERAGE=- tox -e py27 -- tests/integ -n 24 --boxed --reruns 2
+        if has-matching-changes "tests/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml"; then
+          IGNORE_COVERAGE=- tox -e py36,py27 -- tests/integ -n 24 --boxed --reruns 2
         else
           echo "skipping integration tests"
         fi
diff --git a/doc/using_mxnet.rst b/doc/using_mxnet.rst
@@ -688,8 +688,7 @@ The default implementation expects ``prediction`` to be an ``NDArray`` and can s
 Using ``transform_fn``
 ''''''''''''''''''''''
 
-If you would rather not structure your code around the three methods described above, you can instead define your own ``transform_fn`` to handle inference requests.
-This will override any implementation of ``input_fn``, ``predict_fn``, or ``output_fn``.
+If you would rather not structure your code around the three methods described above, you can instead define your own ``transform_fn`` to handle inference requests. An error will be thrown if a ``transform_fn`` is present in conjunction with any ``input_fn``, ``predict_fn``, and/or ``output_fn``.
 ``transform_fn`` has the following signature:
 
 .. code:: python
diff --git a/setup.py b/setup.py
@@ -60,7 +60,7 @@ def read_version():
       install_requires=required_packages,
 
       extras_require={
-          'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-rerunfailures',
+          'test': ['tox', 'flake8', 'pytest==4.4.1', 'pytest-cov', 'pytest-rerunfailures',
                    'pytest-xdist', 'mock', 'tensorflow>=1.3.0', 'contextlib2',
                    'awslogs', 'pandas']},
 
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -569,6 +569,12 @@ def start_new(cls, estimator, inputs):
         train_args['tags'] = estimator.tags
         train_args['metric_definitions'] = estimator.metric_definitions
 
+        if isinstance(inputs, s3_input):
+            if 'InputMode' in inputs.config:
+                logging.debug('Selecting s3_input\'s input_mode ({}) for TrainingInputMode.'
+                              .format(inputs.config['InputMode']))
+                train_args['input_mode'] = inputs.config['InputMode']
+
         if estimator.enable_network_isolation():
             train_args['enable_network_isolation'] = True
 
diff --git a/src/sagemaker/rl/README.rst b/src/sagemaker/rl/README.rst
@@ -7,7 +7,7 @@ With Reinforcement Learning (RL) Estimators, you can train reinforcement learnin
 Supported versions of Coach: ``0.11.1``, ``0.10.1`` with TensorFlow, ``0.11.0`` with TensorFlow or MXNet.
 For more information about Coach, see https://github.com/NervanaSystems/coach
 
-Supported versions of Ray: ``0.5.3`` with TensorFlow.
+Supported versions of Ray: ``0.6.5``, ``0.5.3`` with TensorFlow.
 For more information about Ray, see https://github.com/ray-project/ray
 
 For information about using RL with the SageMaker Python SDK, see https://sagemaker.readthedocs.io/en/stable/using_rl.html.
@@ -23,19 +23,19 @@ SageMaker runs RL Estimator scripts in either Python 3.5 for MXNet or Python 3.6
 
 The Docker images have the following dependencies installed:
 
-+-------------------------+-------------------+-------------------+-------------------+
-| Dependencies            |      Coach 0.10.1 |      Coach 0.11.0 |         Ray 0.5.3 |
-+-------------------------+-------------------+-------------------+-------------------+
-| Python                  |               3.6 |     3.5(MXNet) or |               3.6 |
-|                         |                   |   3.6(TensorFlow) |                   |
-+-------------------------+-------------------+-------------------+-------------------+
-| CUDA (GPU image only)   |               9.0 |               9.0 |               9.0 |
-+-------------------------+-------------------+-------------------+-------------------+
-| DL Framework            | TensorFlow-1.11.0 |    MXNet-1.3.0 or | TensorFlow-1.11.0 |
-|                         |                   | TensorFlow-1.11.0 |                   |
-+-------------------------+-------------------+-------------------+-------------------+
-| gym                     |            0.10.5 |            0.10.5 |            0.10.5 |
-+-------------------------+-------------------+-------------------+-------------------+
++-------------------------+-------------------+-------------------+-------------------+-------------------+-------------------+
+| Dependencies            |      Coach 0.10.1 |      Coach 0.11.0 |      Coach 0.11.1 |         Ray 0.5.3 |         Ray 0.6.5 |
++-------------------------+-------------------+-------------------+-------------------+-------------------+-------------------+
+| Python                  |               3.6 |  3.5 (MXNet) or   |               3.6 |               3.6 |               3.6 |
+|                         |                   |  3.6 (TensorFlow) |                   |                   |                   |
++-------------------------+-------------------+-------------------+-------------------+-------------------+-------------------+
+| CUDA (GPU image only)   |               9.0 |               9.0 |               9.0 |               9.0 |               9.0 |
++-------------------------+-------------------+-------------------+-------------------+-------------------+-------------------+
+| DL Framework            | TensorFlow-1.11.0 | MXNet-1.3.0 or    | TensorFlow-1.12.0 | TensorFlow-1.11.0 | TensorFlow-1.12.0 |
+|                         |                   | TensorFlow-1.11.0 |                   |                   |                   |
++-------------------------+-------------------+-------------------+-------------------+-------------------+-------------------+
+| gym                     |            0.10.5 |            0.10.5 |            0.11.0 |            0.10.5 |            0.12.1 |
++-------------------------+-------------------+-------------------+-------------------+-------------------+-------------------+
 
 The Docker images extend Ubuntu 16.04.
 
diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py
@@ -53,7 +53,13 @@
         },
         '0.5': {
             'tensorflow': '1.11'
-        }
+        },
+        '0.6.5': {
+            'tensorflow': '1.12'
+        },
+        '0.6': {
+            'tensorflow': '1.12'
+        },
     }
 }
 
@@ -73,7 +79,7 @@ class RLEstimator(Framework):
 
     COACH_LATEST_VERSION_TF = '0.11.1'
     COACH_LATEST_VERSION_MXNET = '0.11.0'
-    RAY_LATEST_VERSION = '0.5.3'
+    RAY_LATEST_VERSION = '0.6.5'
 
     def __init__(self, entry_point, toolkit=None, toolkit_version=None, framework=None,
                  source_dir=None, hyperparameters=None, image_name=None,
diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py
@@ -283,6 +283,9 @@ def _validate_requirements_file(self, requirements_file):
         if not self.source_dir:
             raise ValueError('Must specify source_dir along with a requirements file.')
 
+        if self.source_dir.lower().startswith('s3://'):
+            return
+
         if os.path.isabs(requirements_file):
             raise ValueError('Requirements file {} is not a path relative to source_dir.'.format(
                 requirements_file))
diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py
@@ -15,6 +15,7 @@
 import importlib
 import inspect
 import json
+import logging
 from enum import Enum
 
 import sagemaker
@@ -26,6 +27,7 @@
 from sagemaker.parameter import (CategoricalParameter, ContinuousParameter,
                                  IntegerParameter, ParameterRange)
 from sagemaker.session import Session
+from sagemaker.session import s3_input
 from sagemaker.utils import base_name_from_image, name_from_base, to_str
 
 AMAZON_ESTIMATOR_MODULE = 'sagemaker'
@@ -640,6 +642,12 @@ def start_new(cls, tuner, inputs):
         tuner_args['warm_start_config'] = warm_start_config_req
         tuner_args['early_stopping_type'] = tuner.early_stopping_type
 
+        if isinstance(inputs, s3_input):
+            if 'InputMode' in inputs.config:
+                logging.debug('Selecting s3_input\'s input_mode ({}) for TrainingInputMode.'
+                              .format(inputs.config['InputMode']))
+                tuner_args['input_mode'] = inputs.config['InputMode']
+
         if isinstance(tuner.estimator, sagemaker.algorithm.AlgorithmEstimator):
             tuner_args['algorithm_arn'] = tuner.estimator.algorithm_arn
         else:
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -140,7 +140,7 @@ def rl_coach_mxnet_version(request):
     return request.param
 
 
-@pytest.fixture(scope='module', params=['0.5', '0.5.3'])
+@pytest.fixture(scope='module', params=['0.5', '0.5.3', '0.6', '0.6.5'])
 def rl_ray_version(request):
     return request.param
 
diff --git a/tests/data/ray_cartpole/train_ray.py b/tests/data/ray_cartpole/train_ray.py
@@ -5,7 +5,7 @@
 from ray.tune.logger import pretty_print
 
 # Based on https://github.com/ray-project/ray/blob/master/doc/source/rllib-training.rst#python-api
-ray.init(redirect_output=False, redirect_worker_output=False)
+ray.init(log_to_driver=False)
 config = ppo.DEFAULT_CONFIG.copy()
 config["num_gpus"] = int(os.environ.get("SM_NUM_GPUS", 0))
 checkpoint_dir = os.environ.get("SM_MODEL_DIR", '/Users/nadzeya/gym')
diff --git a/tests/integ/test_chainer_train.py b/tests/integ/test_chainer_train.py
@@ -1,4 +1,4 @@
-# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
@@ -21,7 +21,7 @@
 from sagemaker.chainer.defaults import CHAINER_VERSION
 from sagemaker.chainer.estimator import Chainer
 from sagemaker.chainer.model import ChainerModel
-from sagemaker.utils import sagemaker_timestamp
+from sagemaker.utils import unique_name_from_base
 import tests.integ
 from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES
 from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
@@ -62,14 +62,15 @@ def test_training_with_additional_hyperparameters(sagemaker_session, chainer_ful
         test_input = chainer.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                            key_prefix='integ-test-data/chainer_mnist/test')
 
-        chainer.fit({'train': train_input, 'test': test_input})
+        job_name = unique_name_from_base('test-chainer-training')
+        chainer.fit({'train': train_input, 'test': test_input}, job_name=job_name)
         return chainer.latest_training_job.name
 
 
 @pytest.mark.canary_quick
 @pytest.mark.regional_testing
 def test_attach_deploy(chainer_training_job, sagemaker_session):
-    endpoint_name = 'test-chainer-attach-deploy-{}'.format(sagemaker_timestamp())
+    endpoint_name = unique_name_from_base('test-chainer-attach-deploy')
 
     with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
         estimator = Chainer.attach(chainer_training_job, sagemaker_session=sagemaker_session)
@@ -78,7 +79,7 @@ def test_attach_deploy(chainer_training_job, sagemaker_session):
 
 
 def test_deploy_model(chainer_training_job, sagemaker_session):
-    endpoint_name = 'test-chainer-deploy-model-{}'.format(sagemaker_timestamp())
+    endpoint_name = unique_name_from_base('test-chainer-deploy-model')
     with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
         desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=chainer_training_job)
         model_data = desc['ModelArtifacts']['S3ModelArtifacts']
@@ -89,15 +90,14 @@ def test_deploy_model(chainer_training_job, sagemaker_session):
 
 
 def test_async_fit(sagemaker_session):
-    endpoint_name = 'test-chainer-attach-deploy-{}'.format(sagemaker_timestamp())
-
     with timeout(minutes=5):
         training_job_name = _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1,
                                                     chainer_full_version=CHAINER_VERSION, wait=False)
 
         print("Waiting to re-attach to the training job: %s" % training_job_name)
         time.sleep(20)
 
+    endpoint_name = unique_name_from_base('test-chainer-async-fit')
     with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
         print("Re-attaching now to: %s" % training_job_name)
         estimator = Chainer.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
@@ -115,7 +115,7 @@ def test_failed_training_job(sagemaker_session, chainer_full_version):
                           sagemaker_session=sagemaker_session)
 
         with pytest.raises(ValueError) as e:
-            chainer.fit()
+            chainer.fit(job_name=unique_name_from_base('test-chainer-training'))
         assert 'ExecuteUserScriptError' in str(e.value)
 
 
@@ -138,7 +138,8 @@ def _run_mnist_training_job(sagemaker_session, instance_type, instance_count,
         test_input = chainer.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                            key_prefix='integ-test-data/chainer_mnist/test')
 
-        chainer.fit({'train': train_input, 'test': test_input}, wait=wait)
+        job_name = unique_name_from_base('test-chainer-training')
+        chainer.fit({'train': train_input, 'test': test_input}, wait=wait, job_name=job_name)
         return chainer.latest_training_job.name
 
 
diff --git a/tests/scripts/run-notebook-test.sh b/tests/scripts/run-notebook-test.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Run a test against a SageMaker notebook
+# Only runs within the SDK's CI/CD environment
+
+set -euo pipefail
+
+python setup.py sdist
+aws s3 --region us-west-2 cp ./dist/sagemaker-*.tar.gz s3://sagemaker-python-sdk-pr/sagemaker.tar.gz
+aws s3 cp s3://sagemaker-mead-cli/mead-nb-test.tar.gz mead-nb-test.tar.gz
+tar -xzf mead-nb-test.tar.gz
+git clone --depth 1 https://github.com/awslabs/amazon-sagemaker-examples.git
+export JAVA_HOME=$(get-java-home)
+echo "set JAVA_HOME=$JAVA_HOME"
+export SAGEMAKER_ROLE_ARN=$(get-sagemaker-role-arn)
+echo "set SAGEMAKER_ROLE_ARN=$SAGEMAKER_ROLE_ARN"
+./runtime/bin/mead-run-nb-test \
+--instance-type ml.c4.8xlarge \
+--region us-west-2 \
+--lifecycle-config-name install-python-sdk \
+--notebook-instance-role-arn $SAGEMAKER_ROLE_ARN \
+./amazon-sagemaker-examples/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_batch_transform_mnist.ipynb
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
@@ -315,6 +315,17 @@ def test_augmented_manifest(sagemaker_session):
     assert s3_data_source['AttributeNames'] == ['foo', 'bar']
 
 
+def test_s3_input_mode(sagemaker_session):
+    expected_input_mode = 'Pipe'
+    fw = DummyFramework(entry_point=SCRIPT_PATH, role='DummyRole', sagemaker_session=sagemaker_session,
+                        train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
+                        enable_cloudwatch_metrics=True)
+    fw.fit(inputs=s3_input('s3://mybucket/train_manifest', input_mode=expected_input_mode))
+
+    actual_input_mode = sagemaker_session.method_calls[1][2]['input_mode']
+    assert actual_input_mode == expected_input_mode
+
+
 def test_shuffle_config(sagemaker_session):
     fw = DummyFramework(entry_point=SCRIPT_PATH, role='DummyRole', sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
diff --git a/tests/unit/test_tuner.py b/tests/unit/test_tuner.py
@@ -28,6 +28,7 @@
 from sagemaker.tuner import (_TuningJob, create_identical_dataset_and_algorithm_tuner,
                              create_transfer_learning_tuner, HyperparameterTuner, WarmStartConfig,
                              WarmStartTypes)
+from sagemaker.session import s3_input
 
 DATA_DIR = os.path.join(os.path.dirname(__file__), '..', 'data')
 MODEL_DATA = "s3://bucket/model.tar.gz"
@@ -286,6 +287,31 @@ def test_fit_mxnet_with_vpc_config(sagemaker_session, tuner):
     assert tune_kwargs['vpc_config'] == {'Subnets': subnets, 'SecurityGroupIds': security_group_ids}
 
 
+def test_s3_input_mode(sagemaker_session, tuner):
+    expected_input_mode = 'Pipe'
+
+    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'failure_script.py')
+    mxnet = MXNet(entry_point=script_path,
+                  role=ROLE,
+                  framework_version=FRAMEWORK_VERSION,
+                  train_instance_count=TRAIN_INSTANCE_COUNT,
+                  train_instance_type=TRAIN_INSTANCE_TYPE,
+                  sagemaker_session=sagemaker_session)
+    tuner.estimator = mxnet
+
+    tags = [{'Name': 'some-tag-without-a-value'}]
+    tuner.tags = tags
+
+    hyperparameter_ranges = {'num_components': IntegerParameter(2, 4),
+                             'algorithm_mode': CategoricalParameter(['regular', 'randomized'])}
+    tuner._hyperparameter_ranges = hyperparameter_ranges
+
+    tuner.fit(inputs=s3_input('s3://mybucket/train_manifest', input_mode=expected_input_mode))
+
+    actual_input_mode = sagemaker_session.method_calls[1][2]['input_mode']
+    assert actual_input_mode == expected_input_mode
+
+
 def test_fit_pca_with_inter_container_traffic_encryption_flag(sagemaker_session, tuner):
     pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
               base_job_name='pca', sagemaker_session=sagemaker_session,