Merge branch 'master' into trt

icywang86rui · web-flow · commit b23687764ee9 · 2021-03-26T09:48:49.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,21 @@
 # Changelog
 
+## v2.32.0 (2021-03-26)
+
+### Features
+
+ * upgrade neo mxnet to 1.8
+ * Enable Profiler in China Regions
+
+### Bug Fixes and Other Changes
+
+ * use workflow parameters in training hyperparameters (#2114) (#2115)
+ * skip HuggingFace tests in regions without p2 instances
+
+### Documentation Changes
+
+ * add Feature Store methods docs
+
 ## v2.31.1 (2021-03-23)
 
 ### Bug Fixes and Other Changes
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.31.2.dev0
+2.32.1.dev0
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -52,6 +52,9 @@
     _region_supports_profiler,
     get_mp_parameters,
 )
+from sagemaker.workflow.properties import Properties
+from sagemaker.workflow.parameters import Parameter
+from sagemaker.workflow.entities import Expression
 from sagemaker.inputs import TrainingInput
 from sagemaker.job import _Job
 from sagemaker.local import LocalSession
@@ -1456,7 +1459,10 @@ def _get_train_args(cls, estimator, inputs, experiment_config):
 
         current_hyperparameters = estimator.hyperparameters()
         if current_hyperparameters is not None:
-            hyperparameters = {str(k): str(v) for (k, v) in current_hyperparameters.items()}
+            hyperparameters = {
+                str(k): (v if isinstance(v, (Parameter, Expression, Properties)) else str(v))
+                for (k, v) in current_hyperparameters.items()
+            }
 
         train_args = config.copy()
         train_args["input_mode"] = estimator.input_mode
diff --git a/src/sagemaker/image_uri_config/neo-mxnet.json b/src/sagemaker/image_uri_config/neo-mxnet.json
@@ -2,20 +2,21 @@
     "processors": ["cpu", "gpu"],
     "scope": ["inference"],
     "version_aliases": {
-        "0.12.1": "1.7",
-        "1.0.0": "1.7",
-        "1.1.0": "1.7",
-        "1.2": "1.7",
-        "1.2.0": "1.7",
-        "1.2.1": "1.7",
-        "1.3": "1.7",
-        "1.3.0": "1.7",
-        "1.4": "1.7",
-        "1.4.0": "1.7",
-        "1.4.1": "1.7"
+        "0.12.1": "1.8",
+        "1.0.0": "1.8",
+        "1.1.0": "1.8",
+        "1.2": "1.8",
+        "1.2.0": "1.8",
+        "1.2.1": "1.8",
+        "1.3": "1.8",
+        "1.3.0": "1.8",
+        "1.4": "1.8",
+        "1.4.0": "1.8",
+        "1.4.1": "1.8",
+        "1.7": "1.8"
     },
     "versions": {
-        "1.7": {
+        "1.8": {
             "py_versions": ["py3"],
             "registries": {
                 "af-south-1": "774647643957",
diff --git a/src/sagemaker/processing.py b/src/sagemaker/processing.py
@@ -32,6 +32,7 @@
 from sagemaker.session import Session
 from sagemaker.network import NetworkConfig  # noqa: F401 # pylint: disable=unused-import
 from sagemaker.workflow.properties import Properties
+from sagemaker.workflow.parameters import Parameter
 from sagemaker.workflow.entities import Expression
 from sagemaker.dataset_definition.inputs import S3Input, DatasetDefinition
 from sagemaker.apiutils._base_types import ApiObject
@@ -292,7 +293,9 @@ def _normalize_inputs(self, inputs=None, kms_key=None):
                 if isinstance(file_input.source, Properties) or file_input.dataset_definition:
                     normalized_inputs.append(file_input)
                     continue
-
+                if isinstance(file_input.s3_input.s3_uri, (Parameter, Expression, Properties)):
+                    normalized_inputs.append(file_input)
+                    continue
                 # If the source is a local path, upload it to S3
                 # and save the S3 uri in the ProcessingInput source.
                 parse_result = urlparse(file_input.s3_input.s3_uri)
@@ -340,8 +343,7 @@ def _normalize_outputs(self, outputs=None):
                 # Generate a name for the ProcessingOutput if it doesn't have one.
                 if output.output_name is None:
                     output.output_name = "output-{}".format(count)
-                # if the output's destination is a workflow expression, do no normalization
-                if isinstance(output.destination, Expression):
+                if isinstance(output.destination, (Parameter, Expression, Properties)):
                     normalized_outputs.append(output)
                     continue
                 # If the output's destination is not an s3_uri, create one.
@@ -1099,7 +1101,7 @@ def _create_s3_input(self):
             self.s3_data_type = self.s3_input.s3_data_type
             self.s3_input_mode = self.s3_input.s3_input_mode
             self.s3_data_distribution_type = self.s3_input.s3_data_distribution_type
-        elif self.source and self.destination:
+        elif self.source is not None and self.destination is not None:
             self.s3_input = S3Input(
                 s3_uri=self.source,
                 local_path=self.destination,
diff --git a/src/sagemaker/workflow/pipeline.py b/src/sagemaker/workflow/pipeline.py
@@ -83,7 +83,7 @@ def create(
 
         Args:
             role_arn (str): The role arn that is assumed by the pipeline to create step artifacts.
-            pipeline_description (str): A description of the pipeline.
+            description (str): A description of the pipeline.
             experiment_name (str): The name of the experiment.
             tags (List[Dict[str, str]]): A list of {"Key": "string", "Value": "string"} dicts as
                 tags.
diff --git a/tests/data/mxnet_mnist/mnist_neo.py b/tests/data/mxnet_mnist/mnist_neo.py
@@ -105,7 +105,7 @@ def train(
 
 
 def model_fn(path_to_model_files):
-    import neomxnet  # noqa: F401
+    import neomx  # noqa: F401
 
     ctx = mx.cpu()
     sym, arg_params, aux_params = mx.model.load_checkpoint(
@@ -120,7 +120,7 @@ def model_fn(path_to_model_files):
 
 
 def transform_fn(mod, payload, input_content_type, requested_output_content_type):
-    import neomxnet  # noqa: F401
+    import neomx  # noqa: F401
 
     if input_content_type != "application/vnd+python.numpy+binary":
         raise RuntimeError("Input content type must be application/vnd+python.numpy+binary")
diff --git a/tests/integ/test_huggingface.py b/tests/integ/test_huggingface.py
@@ -17,11 +17,15 @@
 import pytest
 
 from sagemaker.huggingface import HuggingFace
+from tests import integ
 from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
 from tests.integ.timeout import timeout
 
 
 @pytest.mark.release
+@pytest.mark.skipif(
+    integ.test_region() in integ.TRAINING_NO_P2_REGIONS, reason="no ml.p2 instances in this region"
+)
 def test_huggingface_training(
     sagemaker_session,
     gpu_instance_type,
diff --git a/tests/integ/test_neo_mxnet.py b/tests/integ/test_neo_mxnet.py
@@ -71,7 +71,7 @@ def test_attach_deploy(
 
         estimator.compile_model(
             target_instance_family=cpu_instance_family,
-            input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
+            input_shape={"data": [1, 1, 28, 28]},
             output_path=estimator.output_path,
         )
 
@@ -121,7 +121,7 @@ def test_deploy_model(
 
         model.compile(
             target_instance_family=cpu_instance_family,
-            input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
+            input_shape={"data": [1, 1, 28, 28]},
             role=role,
             job_name=unique_name_from_base("test-deploy-model-compilation-job"),
             output_path="/".join(model_data.split("/")[:-1]),
@@ -163,7 +163,7 @@ def test_inferentia_deploy_model(
 
         model.compile(
             target_instance_family=inf_instance_family,
-            input_shape={"data": [1, 1, 28, 28], "softmax_label": [1]},
+            input_shape={"data": [1, 1, 28, 28]},
             role=role,
             job_name=unique_name_from_base("test-deploy-model-compilation-job"),
             output_path="/".join(model_data.split("/")[:-1]),
diff --git a/tests/integ/test_tfs.py b/tests/integ/test_tfs.py
@@ -161,6 +161,9 @@ def test_predict_with_accelerator(tfs_predictor_with_accelerator):
 
 
 @pytest.mark.local_mode
+@pytest.mark.skip(
+    reason="This test is broken due to a regression." "This test should be reenabled later."
+)
 def test_predict_with_entry_point(tfs_predictor_with_model_and_entry_point_same_tar):
     input_data = {"instances": [1.0, 2.0, 5.0]}
     expected_result = {"predictions": [4.0, 4.5, 6.0]}
@@ -170,6 +173,9 @@ def test_predict_with_entry_point(tfs_predictor_with_model_and_entry_point_same_
 
 
 @pytest.mark.local_mode
+@pytest.mark.skip(
+    reason="This test is broken due to a regression." "This test should be reenabled later."
+)
 def test_predict_with_model_and_entry_point_and_dependencies_separated(
     tfs_predictor_with_model_and_entry_point_and_dependencies,
 ):
diff --git a/tests/unit/sagemaker/workflow/test_steps.py b/tests/unit/sagemaker/workflow/test_steps.py
@@ -35,6 +35,7 @@
 from sagemaker.network import NetworkConfig
 from sagemaker.transformer import Transformer
 from sagemaker.workflow.properties import Properties
+from sagemaker.workflow.parameters import ParameterString, ParameterInteger
 from sagemaker.workflow.steps import (
     ProcessingStep,
     Step,
@@ -112,16 +113,27 @@ def test_custom_step():
 
 
 def test_training_step(sagemaker_session):
+    instance_type_parameter = ParameterString(name="InstanceType", default_value="c4.4xlarge")
+    instance_count_parameter = ParameterInteger(name="InstanceCount", default_value=1)
+    data_source_uri_parameter = ParameterString(
+        name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest"
+    )
+    training_epochs_parameter = ParameterInteger(name="TrainingEpochs", default_value=5)
+    training_batch_size_parameter = ParameterInteger(name="TrainingBatchSize", default_value=500)
     estimator = Estimator(
         image_uri=IMAGE_URI,
         role=ROLE,
-        instance_count=1,
-        instance_type="c4.4xlarge",
+        instance_count=instance_count_parameter,
+        instance_type=instance_type_parameter,
         profiler_config=ProfilerConfig(system_monitor_interval_millis=500),
+        hyperparameters={
+            "batch-size": training_batch_size_parameter,
+            "epochs": training_epochs_parameter,
+        },
         rules=[],
         sagemaker_session=sagemaker_session,
     )
-    inputs = TrainingInput(f"s3://{BUCKET}/train_manifest")
+    inputs = TrainingInput(s3_data=data_source_uri_parameter)
     cache_config = CacheConfig(enable_caching=True, expire_after="PT1H")
     step = TrainingStep(
         name="MyTrainingStep", estimator=estimator, inputs=inputs, cache_config=cache_config
@@ -131,22 +143,26 @@ def test_training_step(sagemaker_session):
         "Type": "Training",
         "Arguments": {
             "AlgorithmSpecification": {"TrainingImage": IMAGE_URI, "TrainingInputMode": "File"},
+            "HyperParameters": {
+                "batch-size": training_batch_size_parameter,
+                "epochs": training_epochs_parameter,
+            },
             "InputDataConfig": [
                 {
                     "ChannelName": "training",
                     "DataSource": {
                         "S3DataSource": {
                             "S3DataDistributionType": "FullyReplicated",
                             "S3DataType": "S3Prefix",
-                            "S3Uri": f"s3://{BUCKET}/train_manifest",
+                            "S3Uri": data_source_uri_parameter,
                         }
                     },
                 }
             ],
             "OutputDataConfig": {"S3OutputPath": f"s3://{BUCKET}/"},
             "ResourceConfig": {
-                "InstanceCount": 1,
-                "InstanceType": "c4.4xlarge",
+                "InstanceCount": instance_count_parameter,
+                "InstanceType": instance_type_parameter,
                 "VolumeSizeInGB": 30,
             },
             "RoleArn": ROLE,
@@ -162,16 +178,21 @@ def test_training_step(sagemaker_session):
 
 
 def test_processing_step(sagemaker_session):
+    processing_input_data_uri_parameter = ParameterString(
+        name="ProcessingInputDataUri", default_value=f"s3://{BUCKET}/processing_manifest"
+    )
+    instance_type_parameter = ParameterString(name="InstanceType", default_value="ml.m4.4xlarge")
+    instance_count_parameter = ParameterInteger(name="InstanceCount", default_value=1)
     processor = Processor(
         image_uri=IMAGE_URI,
         role=ROLE,
-        instance_count=1,
-        instance_type="ml.m4.4xlarge",
+        instance_count=instance_count_parameter,
+        instance_type=instance_type_parameter,
         sagemaker_session=sagemaker_session,
     )
     inputs = [
         ProcessingInput(
-            source=f"s3://{BUCKET}/processing_manifest",
+            source=processing_input_data_uri_parameter,
             destination="processing_manifest",
         )
     ]
@@ -198,14 +219,14 @@ def test_processing_step(sagemaker_session):
                         "S3DataDistributionType": "FullyReplicated",
                         "S3DataType": "S3Prefix",
                         "S3InputMode": "File",
-                        "S3Uri": "s3://my-bucket/processing_manifest",
+                        "S3Uri": processing_input_data_uri_parameter,
                     },
                 }
             ],
             "ProcessingResources": {
                 "ClusterConfig": {
-                    "InstanceCount": 1,
-                    "InstanceType": "ml.m4.4xlarge",
+                    "InstanceCount": instance_count_parameter,
+                    "InstanceType": instance_type_parameter,
                     "VolumeSizeInGB": 30,
                 }
             },