aws · ahsan-z-khan · Feb 25, 2021 · Feb 10, 2021 · Feb 15, 2021 · Feb 16, 2021
@@ -2,14 +2,73 @@
     "processors": ["cpu", "gpu"],
     "scope": ["inference"],
     "version_aliases": {
-        "0.4.0": "1.4.0",
-        "1.0.0": "1.4.0",
-        "1.1.0": "1.4.0",
-        "1.2.0": "1.4.0",
-        "1.3.0": "1.4.0"
+        "0.4.0": "1.4",
+        "1.0.0": "1.4",
+        "1.1.0": "1.4",
+        "1.2.0": "1.4",
+        "1.3.0": "1.4",
+        "1.4.0": "1.4"
     },
     "versions": {
-        "1.4.0": {
+        "1.4": {
+            "py_versions": ["py3"],
+            "registries": {
+                "af-south-1": "774647643957",
+                "ap-east-1": "110948597952",
+                "ap-northeast-1": "941853720454",
+                "ap-northeast-2": "151534178276",
+                "ap-south-1": "763008648453",
+                "ap-southeast-1": "324986816169",
+                "ap-southeast-2": "355873309152",
+                "ca-central-1": "464438896020",
+                "cn-north-1": "472730292857",
+                "cn-northwest-1": "474822919863",
+                "eu-central-1": "746233611703",
+                "eu-north-1": "601324751636",
+                "eu-south-1": "966458181534",
+                "eu-west-1": "802834080501",
+                "eu-west-2": "205493899709",
+                "eu-west-3": "254080097072",
+                "me-south-1": "836785723513",
+                "sa-east-1": "756306329178",
+                "us-east-1": "785573368785",
+                "us-east-2": "007439368137",
+                "us-gov-west-1": "263933020539",
+                "us-west-1": "710691900526",
+                "us-west-2": "301217895009"
+            },
+            "repository": "sagemaker-inference-pytorch"
+        },
+        "1.5": {
+            "py_versions": ["py3"],
+            "registries": {
+                "af-south-1": "774647643957",
+                "ap-east-1": "110948597952",
+                "ap-northeast-1": "941853720454",
+                "ap-northeast-2": "151534178276",
+                "ap-south-1": "763008648453",
+                "ap-southeast-1": "324986816169",
+                "ap-southeast-2": "355873309152",
+                "ca-central-1": "464438896020",
+                "cn-north-1": "472730292857",
+                "cn-northwest-1": "474822919863",
+                "eu-central-1": "746233611703",
+                "eu-north-1": "601324751636",
+                "eu-south-1": "966458181534",
+                "eu-west-1": "802834080501",
+                "eu-west-2": "205493899709",
+                "eu-west-3": "254080097072",
+                "me-south-1": "836785723513",
+                "sa-east-1": "756306329178",
+                "us-east-1": "785573368785",
+                "us-east-2": "007439368137",
+                "us-gov-west-1": "263933020539",
+                "us-west-1": "710691900526",
+                "us-west-2": "301217895009"
+            },
+            "repository": "sagemaker-inference-pytorch"
+        },
+        "1.6": {
             "py_versions": ["py3"],
             "registries": {
                 "af-south-1": "774647643957",

@@ -16,6 +16,7 @@
 import json
 import logging
 import os
+import re
 
 import sagemaker
 from sagemaker import (
@@ -398,6 +399,7 @@ def _compilation_job_config(
         target_platform_arch=None,
         target_platform_accelerator=None,
         compiler_options=None,
+        framework_version=None,
     ):
         """Placeholder Docstring"""
         input_model_config = {
@@ -407,6 +409,14 @@ def _compilation_job_config(
             else input_shape,
             "Framework": framework.upper(),
         }
+
+        if (
+            framework.lower() == "pytorch"
+            and re.match("(?=^ml_)(?!ml_inf)", target_instance_type) is not None
+            and framework_version is not None
+        ):
+            input_model_config["FrameworkVersion"] = utils.get_short_version(framework_version)
+
         role = self.sagemaker_session.expand_role(role)
         output_model_config = {
             "S3OutputLocation": output_path,
@@ -572,7 +582,8 @@ def compile(
             framework (str): The framework that is used to train the original
                 model. Allowed values: 'mxnet', 'tensorflow', 'keras', 'pytorch',
                 'onnx', 'xgboost'
-            framework_version (str):
+            framework_version (str): The version of framework, for example:
+                '1.5' for PyTorch
             target_platform_os (str): Target Platform OS, for example: 'LINUX'.
                 For allowed strings see
                 https://docs.aws.amazon.com/sagemaker/latest/dg/API_OutputConfig.html.
@@ -626,11 +637,11 @@ def compile(
             target_platform_arch,
             target_platform_accelerator,
             compiler_options,
+            framework_version,
         )
         self.sagemaker_session.compile_model(**config)
         job_status = self.sagemaker_session.wait_for_compilation_job(job_name)
         self.model_data = job_status["ModelArtifacts"]["S3ModelArtifacts"]
-
         if target_instance_family is not None:
             if target_instance_family.startswith("ml_"):
                 self.image_uri = self._compilation_image_uri(

@@ -182,6 +182,26 @@ def pytorch_eia_py_version():
     return "py3"
 
 
+@pytest.fixture(scope="module")
+def neo_pytorch_latest_py_version():
+    return "py3"
+
+
+@pytest.fixture(scope="module")
+def neo_pytorch_compilation_job_name():
+    return utils.name_from_base("pytorch-neo-model")
+
+
+@pytest.fixture(scope="module")
+def neo_pytorch_target_device():
+    return "ml_c5"
+
+
+@pytest.fixture(scope="module")
+def neo_pytorch_cpu_instance_type():
+    return "ml.c5.xlarge"
+
+
 @pytest.fixture(scope="module")
 def xgboost_framework_version(xgboost_version):
     if xgboost_version in ("1", "latest"):

@@ -0,0 +1,90 @@
+# Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import io
+import json
+import logging
+import os
+import pickle
+
+import numpy as np
+import torch
+import neopytorch
+import torchvision.transforms as transforms
+from PIL import Image  # Training container doesn't have this package
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+
+def transform_fn(model, payload, request_content_type, response_content_type):
+
+    logger.info("Invoking user-defined transform function")
+
+    if request_content_type != "application/octet-stream":
+        raise RuntimeError(
+            "Content type must be application/octet-stream. Provided: {0}".format(
+                request_content_type
+            )
+        )
+
+    # preprocess image
+    decoded = Image.open(io.BytesIO(payload))
+    preprocess = transforms.Compose(
+        [
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
+    normalized = preprocess(decoded)
+    batchified = normalized.unsqueeze(0)
+
+    # predict
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    batchified = batchified.to(device)
+    result = model.forward(batchified)
+
+    # Softmax (assumes batch size 1)
+    result = np.squeeze(result.cpu().numpy())
+    result_exp = np.exp(result - np.max(result))
+    result = result_exp / np.sum(result_exp)
+
+    response_body = json.dumps(result.tolist())
+    content_type = "application/json"
+
+    return response_body, content_type
+
+
+def model_fn(model_dir):
+
+    logger.info("model_fn")
+    neopytorch.config(model_dir=model_dir, neo_runtime=True)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # The compiled model is saved as "compiled.pt"
+    model = torch.jit.load(os.path.join(model_dir, "compiled.pt"), map_location=device)
+
+    # It is recommended to run warm-up inference during model load
+    sample_input_path = os.path.join(model_dir, "sample_input.pkl")
+    with open(sample_input_path, "rb") as input_file:
+        model_input = pickle.load(input_file)
+    if torch.is_tensor(model_input):
+        model_input = model_input.to(device)
+        model(model_input)
+    elif isinstance(model_input, tuple):
+        model_input = (inp.to(device) for inp in model_input if torch.is_tensor(inp))
+        model(*model_input)
+    else:
+        print("Only supports a torch tensor or a tuple of torch tensors")
+
+    return model