feature: Neo: Add Granular Target Description support for compilation (#1752)

apivovarov · web-flow · commit 20751c9f3f4b · 2020-07-28T16:47:46.000-07:00
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -512,6 +512,10 @@ def compile_model(
         framework_version=None,
         compile_max_run=15 * 60,
         tags=None,
+        target_platform_os=None,
+        target_platform_arch=None,
+        target_platform_accelerator=None,
+        compiler_options=None,
         **kwargs
     ):
         """Compile a Neo model using the input model.
@@ -536,6 +540,21 @@ def compile_model(
             tags (list[dict]): List of tags for labeling a compilation job. For
                 more, see
                 https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
+            target_platform_os (str): Target Platform OS, for example: 'LINUX'.
+                For allowed strings see
+                https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html.
+                It can be used instead of target_instance_family.
+            target_platform_arch (str): Target Platform Architecture, for example: 'X86_64'.
+                For allowed strings see
+                https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html.
+                It can be used instead of target_instance_family.
+            target_platform_accelerator (str, optional): Target Platform Accelerator,
+                for example: 'NVIDIA'. For allowed strings see
+                https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html.
+                It can be used instead of target_instance_family.
+            compiler_options (dict, optional): Additional parameters for compiler.
+                Compiler Options are TargetPlatform / target_instance_family specific. See
+                https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html for details.
             **kwargs: Passed to invocation of ``create_model()``.
                 Implementations may customize ``create_model()`` to accept
                 ``**kwargs`` to customize model creation during deploy. For
@@ -565,6 +584,10 @@ def compile_model(
             compile_max_run,
             framework=framework,
             framework_version=framework_version,
+            target_platform_os=target_platform_os,
+            target_platform_arch=target_platform_arch,
+            target_platform_accelerator=target_platform_accelerator,
+            compiler_options=compiler_options,
         )
         return self._compiled_models[target_instance_family]
 
diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py
@@ -215,6 +215,10 @@ def _compilation_job_config(
         job_name,
         framework,
         tags,
+        target_platform_os=None,
+        target_platform_arch=None,
+        target_platform_accelerator=None,
+        compiler_options=None,
     ):
         """
         Args:
@@ -226,20 +230,46 @@ def _compilation_job_config(
             job_name:
             framework:
             tags:
+            target_platform_os:
+            target_platform_arch:
+            target_platform_accelerator:
+            compiler_options:
         """
         input_model_config = {
             "S3Uri": self.model_data,
-            "DataInputConfig": input_shape
-            if not isinstance(input_shape, dict)
-            else json.dumps(input_shape),
+            "DataInputConfig": json.dumps(input_shape)
+            if isinstance(input_shape, dict)
+            else input_shape,
             "Framework": framework,
         }
         role = self.sagemaker_session.expand_role(role)
         output_model_config = {
-            "TargetDevice": target_instance_type,
             "S3OutputLocation": output_path,
         }
 
+        if target_instance_type is not None:
+            output_model_config["TargetDevice"] = target_instance_type
+        else:
+            if target_platform_os is None and target_platform_arch is None:
+                raise ValueError(
+                    "target_instance_type or (target_platform_os and target_platform_arch) "
+                    "should be provided"
+                )
+            target_platform = {
+                "Os": target_platform_os,
+                "Arch": target_platform_arch,
+            }
+            if target_platform_accelerator is not None:
+                target_platform["Accelerator"] = target_platform_accelerator
+            output_model_config["TargetPlatform"] = target_platform
+
+        if compiler_options is not None:
+            output_model_config["CompilerOptions"] = (
+                json.dumps(compiler_options)
+                if isinstance(compiler_options, dict)
+                else compiler_options
+            )
+
         return {
             "input_model_config": input_model_config,
             "output_model_config": output_model_config,
@@ -320,6 +350,10 @@ def compile(
         compile_max_run=5 * 60,
         framework=None,
         framework_version=None,
+        target_platform_os=None,
+        target_platform_arch=None,
+        target_platform_accelerator=None,
+        compiler_options=None,
     ):
         """Compile this ``Model`` with SageMaker Neo.
 
@@ -328,6 +362,9 @@ def compile(
                 run your model after compilation, for example: ml_c5. For allowed
                 strings see
                 https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html.
+                Alternatively, you can select an OS, Architecture and Accelerator using
+                ``target_platform_os``, ``target_platform_arch``,
+                and ``target_platform_accelerator``.
             input_shape (dict): Specifies the name and shape of the expected
                 inputs for your trained model in json dictionary form, for
                 example: {'data': [1,3,1024,1024]}, or {'var1': [1,1,28,28],
@@ -345,6 +382,21 @@ def compile(
                 model. Allowed values: 'mxnet', 'tensorflow', 'keras', 'pytorch',
                 'onnx', 'xgboost'
             framework_version (str):
+            target_platform_os (str): Target Platform OS, for example: 'LINUX'.
+                For allowed strings see
+                https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html.
+                It can be used instead of target_instance_family.
+            target_platform_arch (str): Target Platform Architecture, for example: 'X86_64'.
+                For allowed strings see
+                https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html.
+                It can be used instead of target_instance_family.
+            target_platform_accelerator (str, optional): Target Platform Accelerator,
+                for example: 'NVIDIA'. For allowed strings see
+                https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html.
+                It can be used instead of target_instance_family.
+            compiler_options (dict, optional): Additional parameters for compiler.
+                Compiler Options are TargetPlatform / target_instance_family specific. See
+                https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html for details.
 
         Returns:
             sagemaker.model.Model: A SageMaker ``Model`` object. See
@@ -375,31 +427,41 @@ def compile(
             job_name,
             framework,
             tags,
+            target_platform_os,
+            target_platform_arch,
+            target_platform_accelerator,
+            compiler_options,
         )
         self.sagemaker_session.compile_model(**config)
         job_status = self.sagemaker_session.wait_for_compilation_job(job_name)
         self.model_data = job_status["ModelArtifacts"]["S3ModelArtifacts"]
-        if target_instance_family.startswith("ml_"):
-            self.image = self._neo_image(
-                self.sagemaker_session.boto_region_name,
-                target_instance_family,
-                framework,
-                framework_version,
-            )
-            self._is_compiled_model = True
-        elif target_instance_family.startswith(INFERENTIA_INSTANCE_PREFIX):
-            self.image = self._inferentia_image(
-                self.sagemaker_session.boto_region_name,
-                target_instance_family,
-                framework,
-                framework_version,
-            )
-            self._is_compiled_model = True
+        if target_instance_family is not None:
+            if target_instance_family.startswith("ml_"):
+                self.image = self._neo_image(
+                    self.sagemaker_session.boto_region_name,
+                    target_instance_family,
+                    framework,
+                    framework_version,
+                )
+                self._is_compiled_model = True
+            elif target_instance_family.startswith(INFERENTIA_INSTANCE_PREFIX):
+                self.image = self._inferentia_image(
+                    self.sagemaker_session.boto_region_name,
+                    target_instance_family,
+                    framework,
+                    framework_version,
+                )
+                self._is_compiled_model = True
+            else:
+                LOGGER.warning(
+                    "The instance type %s is not supported for deployment via SageMaker."
+                    "Please deploy the model manually.",
+                    target_instance_family,
+                )
         else:
             LOGGER.warning(
-                "The instance type %s is not supported to deploy via SageMaker,"
-                "please deploy the model manually.",
-                target_instance_family,
+                "Devices described by Target Platform OS, Architecture and Accelerator are not"
+                "supported for deployment via SageMaker. Please deploy the model manually."
             )
         return self
 
diff --git a/tests/unit/sagemaker/model/test_neo.py b/tests/unit/sagemaker/model/test_neo.py
@@ -95,6 +95,45 @@ def test_compile_model_for_edge_device_tflite(sagemaker_session):
     assert model._is_compiled_model is False
 
 
+def test_compile_model_linux_arm64_nvidia(sagemaker_session):
+    sagemaker_session.wait_for_compilation_job = Mock(
+        return_value=DESCRIBE_COMPILATION_JOB_RESPONSE
+    )
+    model = _create_model(sagemaker_session)
+    model.compile(
+        target_instance_family=None,
+        input_shape={"data": [1, 3, 1024, 1024]},
+        output_path="s3://output",
+        role="role",
+        framework="tensorflow",
+        job_name="compile-model",
+        target_platform_os="LINUX",
+        target_platform_arch="ARM64",
+        target_platform_accelerator="NVIDIA",
+        compiler_options={"gpu-code": "sm_72", "trt-ver": "6.0.1", "cuda-ver": "10.1"},
+    )
+    assert model._is_compiled_model is False
+
+
+def test_compile_model_android_armv7(sagemaker_session):
+    sagemaker_session.wait_for_compilation_job = Mock(
+        return_value=DESCRIBE_COMPILATION_JOB_RESPONSE
+    )
+    model = _create_model(sagemaker_session)
+    model.compile(
+        target_instance_family=None,
+        input_shape={"data": [1, 3, 1024, 1024]},
+        output_path="s3://output",
+        role="role",
+        framework="tensorflow",
+        job_name="compile-model",
+        target_platform_os="ANDROID",
+        target_platform_arch="ARM_EABI",
+        compiler_options={"ANDROID_PLATFORM": 25, "mattr": ["+neon"]},
+    )
+    assert model._is_compiled_model is False
+
+
 def test_compile_model_for_cloud(sagemaker_session):
     sagemaker_session.wait_for_compilation_job = Mock(
         return_value=DESCRIBE_COMPILATION_JOB_RESPONSE