Make InputDataConfig optional for training. (#459)

nadiaya · jesterhazy · commit 163bffdf08c7 · 2018-11-06T15:39:47.000-08:00
* Make InputDataConfig optional for training.

* Update boto3 dependency to make sure boto support no InputDataConfig.

* Update changelog.

* Add missing assertion for chainer failure script test.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,12 @@
 CHANGELOG
 =========
 
+1.13.1.dev
+==========
+
+* feature: Estimator: make input channels optional
+
+
 1.13.0
 ======
 
diff --git a/setup.py b/setup.py
@@ -53,7 +53,7 @@ def read(fname):
       ],
 
       # Declare minimal set for installation
-      install_requires=['boto3>=1.4.8', 'numpy>=1.9.0', 'protobuf>=3.1', 'scipy>=0.19.0',
+      install_requires=['boto3>=1.9.38', 'numpy>=1.9.0', 'protobuf>=3.1', 'scipy>=0.19.0',
                         'urllib3 >=1.21, <1.23',
                         'PyYAML>=3.2', 'protobuf3-to-dict>=0.1.5', 'docker-compose>=1.21.0'],
 
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -176,7 +176,7 @@ def _prepare_for_training(self, job_name=None):
             else:
                 self.output_path = 's3://{}/'.format(self.sagemaker_session.default_bucket())
 
-    def fit(self, inputs, wait=True, logs=True, job_name=None):
+    def fit(self, inputs=None, wait=True, logs=True, job_name=None):
         """Train a model using the input training dataset.
 
         The API calls the Amazon SageMaker CreateTrainingJob API to start model training.
diff --git a/src/sagemaker/job.py b/src/sagemaker/job.py
@@ -64,6 +64,7 @@ def _load_config(inputs, estimator):
 
         model_channel = _Job._prepare_model_channel(input_config, estimator.model_uri, estimator.model_channel_name)
         if model_channel:
+            input_config = [] if input_config is None else input_config
             input_config.append(model_channel)
 
         return {'input_config': input_config,
@@ -75,6 +76,9 @@ def _load_config(inputs, estimator):
 
     @staticmethod
     def _format_inputs_to_input_config(inputs):
+        if inputs is None:
+            return None
+
         # Deferred import due to circular dependency
         from sagemaker.amazon.amazon_estimator import RecordSet
         if isinstance(inputs, RecordSet):
@@ -130,9 +134,10 @@ def _prepare_model_channel(input_config, model_uri=None, model_channel_name=None
         elif not model_channel_name:
             raise ValueError('Expected a pre-trained model channel name if a model URL is specified.')
 
-        for channel in input_config:
-            if channel['ChannelName'] == model_channel_name:
-                raise ValueError('Duplicate channels not allowed.')
+        if input_config:
+            for channel in input_config:
+                if channel['ChannelName'] == model_channel_name:
+                    raise ValueError('Duplicate channels not allowed.')
 
         model_input = _Job._format_model_uri_input(model_uri)
         model_channel = _Job._convert_input_to_channel(model_channel_name, model_input)
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -257,14 +257,16 @@ def train(self, image, input_mode, input_config, role, job_name, output_config,
                 'TrainingImage': image,
                 'TrainingInputMode': input_mode
             },
-            'InputDataConfig': input_config,
             'OutputDataConfig': output_config,
             'TrainingJobName': job_name,
             'StoppingCondition': stop_condition,
             'ResourceConfig': resource_config,
             'RoleArn': role,
         }
 
+        if input_config is not None:
+            train_request['InputDataConfig'] = input_config
+
         if hyperparameters and len(hyperparameters) > 0:
             train_request['HyperParameters'] = hyperparameters
 
diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py
@@ -207,7 +207,7 @@ def _validate_requirements_file(self, requirements_file):
         if not os.path.exists(os.path.join(self.source_dir, requirements_file)):
             raise ValueError('Requirements file {} does not exist.'.format(requirements_file))
 
-    def fit(self, inputs, wait=True, logs=True, job_name=None, run_tensorboard_locally=False):
+    def fit(self, inputs=None, wait=True, logs=True, job_name=None, run_tensorboard_locally=False):
         """Train a model using the input training dataset.
 
         See :func:`~sagemaker.estimator.EstimatorBase.fit` for more details.
diff --git a/tests/integ/test_chainer_train.py b/tests/integ/test_chainer_train.py
@@ -105,18 +105,15 @@ def test_async_fit(sagemaker_session):
 def test_failed_training_job(sagemaker_session, chainer_full_version):
     with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
         script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'failure_script.py')
-        data_path = os.path.join(DATA_DIR, 'chainer_mnist')
 
         chainer = Chainer(entry_point=script_path, role='SageMakerRole',
                           framework_version=chainer_full_version, py_version=PYTHON_VERSION,
                           train_instance_count=1, train_instance_type='ml.c4.xlarge',
                           sagemaker_session=sagemaker_session)
 
-        train_input = chainer.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
-                                                            key_prefix='integ-test-data/chainer_mnist/train')
-
-        with pytest.raises(ValueError):
-            chainer.fit(train_input)
+        with pytest.raises(ValueError) as e:
+            chainer.fit()
+        assert 'This failure is expected' in str(e.value)
 
 
 def _run_mnist_training_job(sagemaker_session, instance_type, instance_count,
diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py
@@ -105,15 +105,11 @@ def test_async_fit(sagemaker_session, mxnet_full_version):
 def test_failed_training_job(sagemaker_session, mxnet_full_version):
     with timeout():
         script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'failure_script.py')
-        data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
 
         mx = MXNet(entry_point=script_path, role='SageMakerRole', framework_version=mxnet_full_version,
                    py_version=PYTHON_VERSION, train_instance_count=1, train_instance_type='ml.c4.xlarge',
                    sagemaker_session=sagemaker_session)
 
-        train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
-                                                       key_prefix='integ-test-data/mxnet_mnist/train-failure')
-
         with pytest.raises(ValueError) as e:
-            mx.fit(train_input)
+            mx.fit()
         assert 'This failure is expected' in str(e.value)
diff --git a/tests/integ/test_pytorch_train.py b/tests/integ/test_pytorch_train.py
@@ -106,7 +106,7 @@ def test_failed_training_job(sagemaker_session, pytorch_full_version):
         pytorch = _get_pytorch_estimator(sagemaker_session, pytorch_full_version, entry_point=script_path)
 
         with pytest.raises(ValueError) as e:
-            pytorch.fit(_upload_training_data(pytorch))
+            pytorch.fit()
         assert 'This failure is expected' in str(e.value)
 
 
diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py
@@ -160,8 +160,6 @@ def test_failed_tf_training(sagemaker_session, tf_full_version):
                                train_instance_type='ml.c4.xlarge',
                                sagemaker_session=sagemaker_session)
 
-        inputs = estimator.sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf-failure')
-
         with pytest.raises(ValueError) as e:
-            estimator.fit(inputs)
+            estimator.fit()
         assert 'This failure is expected' in str(e.value)
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
@@ -706,19 +706,10 @@ def test_unsupported_type_in_dict():
 #################################################################################
 # Tests for the generic Estimator class
 
-BASE_TRAIN_CALL = {
+NO_INPUT_TRAIN_CALL = {
     'hyperparameters': {},
     'image': IMAGE_NAME,
-    'input_config': [{
-        'DataSource': {
-            'S3DataSource': {
-                'S3DataDistributionType': 'FullyReplicated',
-                'S3DataType': 'S3Prefix',
-                'S3Uri': 's3://bucket/training-prefix'
-            }
-        },
-        'ChannelName': 'train'
-    }],
+    'input_config': None,
     'input_mode': 'File',
     'output_config': {'S3OutputPath': OUTPUT_PATH},
     'resource_config': {
@@ -731,12 +722,43 @@ def test_unsupported_type_in_dict():
     'vpc_config': None
 }
 
+INPUT_CONFIG = [{
+    'DataSource': {
+        'S3DataSource': {
+            'S3DataDistributionType': 'FullyReplicated',
+            'S3DataType': 'S3Prefix',
+            'S3Uri': 's3://bucket/training-prefix'
+        }
+    },
+    'ChannelName': 'train'
+}]
+
+BASE_TRAIN_CALL = dict(NO_INPUT_TRAIN_CALL)
+BASE_TRAIN_CALL.update({'input_config': INPUT_CONFIG})
+
 HYPERPARAMS = {'x': 1, 'y': 'hello'}
 STRINGIFIED_HYPERPARAMS = dict([(x, str(y)) for x, y in HYPERPARAMS.items()])
 HP_TRAIN_CALL = dict(BASE_TRAIN_CALL)
 HP_TRAIN_CALL.update({'hyperparameters': STRINGIFIED_HYPERPARAMS})
 
 
+def test_generic_to_fit_no_input(sagemaker_session):
+    e = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, output_path=OUTPUT_PATH,
+                  sagemaker_session=sagemaker_session)
+
+    e.fit()
+
+    sagemaker_session.train.assert_called_once()
+    assert len(sagemaker_session.train.call_args[0]) == 0
+    args = sagemaker_session.train.call_args[1]
+    assert args['job_name'].startswith(IMAGE_NAME)
+
+    args.pop('job_name')
+    args.pop('role')
+
+    assert args == NO_INPUT_TRAIN_CALL
+
+
 def test_generic_to_fit_no_hps(sagemaker_session):
     e = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, output_path=OUTPUT_PATH,
                   sagemaker_session=sagemaker_session)
diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py
@@ -86,6 +86,29 @@ def test_load_config_with_model_channel(estimator):
     assert config['stop_condition']['MaxRuntimeInSeconds'] == MAX_RUNTIME
 
 
+def test_load_config_with_model_channel_no_inputs(estimator):
+    estimator.model_uri = MODEL_URI
+    estimator.model_channel_name = CHANNEL_NAME
+
+    config = _Job._load_config(inputs=None, estimator=estimator)
+
+    assert config['input_config'][0]['DataSource']['S3DataSource']['S3Uri'] == MODEL_URI
+    assert config['input_config'][0]['ChannelName'] == CHANNEL_NAME
+    assert config['role'] == ROLE
+    assert config['output_config']['S3OutputPath'] == S3_OUTPUT_PATH
+    assert 'KmsKeyId' not in config['output_config']
+    assert config['resource_config']['InstanceCount'] == INSTANCE_COUNT
+    assert config['resource_config']['InstanceType'] == INSTANCE_TYPE
+    assert config['resource_config']['VolumeSizeInGB'] == VOLUME_SIZE
+    assert config['stop_condition']['MaxRuntimeInSeconds'] == MAX_RUNTIME
+
+
+def test_format_inputs_none():
+    channels = _Job._format_inputs_to_input_config(inputs=None)
+
+    assert channels is None
+
+
 def test_format_inputs_to_input_config_string():
     inputs = BUCKET_NAME