aws
diff --git a/‎sagemaker_neo_compilation_jobs/pytorch_torchvision/code/resnet18.py
Lines changed: 78 additions & 0 deletions b/‎sagemaker_neo_compilation_jobs/pytorch_torchvision/code/resnet18.py
Lines changed: 78 additions & 0 deletions
diff --git a/‎sagemaker_neo_compilation_jobs/pytorch_torchvision/pytorch_torchvision_neo.ipynb
Lines changed: 50 additions & 157 deletions b/‎sagemaker_neo_compilation_jobs/pytorch_torchvision/pytorch_torchvision_neo.ipynb
Lines changed: 50 additions & 157 deletions
@@ -0,0 +1,78 @@
+import io
+import json
+import logging
+import os
+import pickle
+
+import numpy as np
+import torch
+import torchvision.transforms as transforms
+from PIL import Image  # Training container doesn't have this package
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+
+def transform_fn(model, payload, request_content_type,
+                 response_content_type):
+
+    logger.info('Invoking user-defined transform function')
+
+    if request_content_type != 'application/octet-stream':
+        raise RuntimeError(
+            'Content type must be application/octet-stream. Provided: {0}'.format(request_content_type))
+
+    # preprocess
+    decoded = Image.open(io.BytesIO(payload))
+    preprocess = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[
+                0.485, 0.456, 0.406], std=[
+                0.229, 0.224, 0.225]),
+    ])
+    normalized = preprocess(decoded)
+    batchified = normalized.unsqueeze(0)
+
+    # predict
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    batchified = batchified.to(device)
+    result = model.forward(batchified)
+
+    # Softmax (assumes batch size 1)
+    result = np.squeeze(result.cpu().numpy())
+    result_exp = np.exp(result - np.max(result))
+    result = result_exp / np.sum(result_exp)
+
+    response_body = json.dumps(result.tolist())
+    content_type = 'application/json'
+
+    return response_body, content_type
+
+
+def model_fn(model_dir):
+
+    logger.info('model_fn')
+    with torch.neo.config(model_dir=model_dir, neo_runtime=True):
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # The compiled model is saved as "compiled.pt"
+        model = torch.jit.load(os.path.join(model_dir, 'compiled.pt'))
+        model = model.to(device)
+
+        # It is recommended to run warm-up inference during model load
+        sample_input_path = os.path.join(model_dir, 'sample_input.pkl')
+        with open(sample_input_path, 'rb') as input_file:
+            model_input = pickle.load(input_file)
+        if torch.is_tensor(model_input):
+            model_input = model_input.to(device)
+            model(model_input)
+        elif isinstance(model_input, tuple):
+            model_input = (inp.to(device)
+                           for inp in model_input if torch.is_tensor(inp))
+            model(*model_input)
+        else:
+            print("Only supports a torch tensor or a tuple of torch tensors")
+
+        return model
@@ -11,7 +11,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Amazon SageMaker Neo is API to compile machine learning models to optimize them for our choice of hardward targets. Currently, Neo supports pre-trained PyTorch models from [TorchVision](https://pytorch.org/docs/stable/torchvision/models.html). General support for other PyTorch models is forthcoming."
+    "Amazon SageMaker Neo is an API to compile machine learning models to optimize them for our choice of hardward targets. Currently, Neo supports pre-trained PyTorch models from [TorchVision](https://pytorch.org/docs/stable/torchvision/models.html). General support for other PyTorch models is forthcoming."
    ]
   },
   {
@@ -20,7 +20,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!~/anaconda3/envs/pytorch_p36/bin/pip install torch==1.2.0 torchvision==0.4.0"
+    "!~/anaconda3/envs/pytorch_p36/bin/pip install torch==1.4.0 torchvision==0.5.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!~/anaconda3/envs/pytorch_p36/bin/pip install --upgrade sagemaker"
    ]
   },
   {
@@ -34,7 +43,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We'll import [ResNet18](https://arxiv.org/abs/1512.03385) model from TorchVision and create a model artifact `model.tar.gz`:"
+    "We'll import [ResNet18](https://arxiv.org/abs/1512.03385) model from TorchVision and create a model artifact `model.tar.gz`."
    ]
   },
   {
@@ -60,14 +69,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Invoke Neo Compilation API"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We then forward the model artifact to Neo Compilation API:"
+    "### Upload the model archive to S3"
    ]
   },
   {
@@ -87,111 +89,29 @@
     "bucket = sess.default_bucket()\n",
     "\n",
     "compilation_job_name = name_from_base('TorchVision-ResNet18-Neo')\n",
+    "prefix = compilation_job_name+'/model'\n",
     "\n",
-    "model_key = '{}/model/model.tar.gz'.format(compilation_job_name)\n",
-    "model_path = 's3://{}/{}'.format(bucket, model_key)\n",
-    "boto3.resource('s3').Bucket(bucket).upload_file('model.tar.gz', model_key)\n",
+    "model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)\n",
     "\n",
-    "sm_client = boto3.client('sagemaker')\n",
     "data_shape = '{\"input0\":[1,3,224,224]}'\n",
     "target_device = 'ml_c5'\n",
     "framework = 'PYTORCH'\n",
-    "framework_version = '1.2.0'\n",
+    "framework_version = '1.4.0'\n",
     "compiled_model_path = 's3://{}/{}/output'.format(bucket, compilation_job_name)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "response = sm_client.create_compilation_job(\n",
-    "    CompilationJobName=compilation_job_name,\n",
-    "    RoleArn=role,\n",
-    "    InputConfig={\n",
-    "        'S3Uri': model_path,\n",
-    "        'DataInputConfig': data_shape,\n",
-    "        'Framework': framework\n",
-    "    },\n",
-    "    OutputConfig={\n",
-    "        'S3OutputLocation': compiled_model_path,\n",
-    "        'TargetDevice': target_device\n",
-    "    },\n",
-    "    StoppingCondition={\n",
-    "        'MaxRuntimeInSeconds': 300\n",
-    "    }\n",
-    ")\n",
-    "print(response)\n",
-    "\n",
-    "# Poll every 30 sec\n",
-    "while True:\n",
-    "    response = sm_client.describe_compilation_job(CompilationJobName=compilation_job_name)\n",
-    "    if response['CompilationJobStatus'] == 'COMPLETED':\n",
-    "        break\n",
-    "    elif response['CompilationJobStatus'] == 'FAILED':\n",
-    "        raise RuntimeError('Compilation failed')\n",
-    "    print('Compiling ...')\n",
-    "    time.sleep(30)\n",
-    "print('Done!')\n",
-    "\n",
-    "# Extract compiled model artifact\n",
-    "compiled_model_path = response['ModelArtifacts']['S3ModelArtifacts']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create prediction endpoint"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To create a prediction endpoint, we first specify two additional functions, to be used with Neo Deep Learning Runtime:\n",
-    "\n",
-    "* `neo_preprocess(payload, content_type)`: Function that takes in the payload and Content-Type of each incoming request and returns a NumPy array. Here, the payload is byte-encoded NumPy array, so the function simply decodes the bytes to obtain the NumPy array.\n",
-    "* `neo_postprocess(result)`: Function that takes the prediction results produced by Deep Learining Runtime and returns the response body"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pygmentize resnet18.py"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Upload the Python script containing the two functions to S3:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "source_key = '{}/source/sourcedir.tar.gz'.format(compilation_job_name)\n",
-    "source_path = 's3://{}/{}'.format(bucket, source_key)\n",
-    "\n",
-    "with tarfile.open('sourcedir.tar.gz', 'w:gz') as f:\n",
-    "    f.add('resnet18.py')\n",
-    "\n",
-    "boto3.resource('s3').Bucket(bucket).upload_file('sourcedir.tar.gz', source_key)"
+    "## Invoke Neo Compilation API"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We then create a SageMaker model record:"
+    "### Create a PyTorch SageMaker model"
    ]
   },
   {
@@ -200,31 +120,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sagemaker.model import NEO_IMAGE_ACCOUNT\n",
-    "from sagemaker.fw_utils import create_image_uri\n",
-    "\n",
-    "model_name = name_from_base('TorchVision-ResNet18-Neo')\n",
+    "from sagemaker.pytorch.model import PyTorchModel\n",
+    "from sagemaker.predictor import Predictor\n",
     "\n",
-    "image_uri = create_image_uri(region, 'neo-' + framework.lower(), target_device.replace('_', '.'),\n",
-    "                             framework_version, py_version='py3', account=NEO_IMAGE_ACCOUNT[region])\n",
-    "\n",
-    "response = sm_client.create_model(\n",
-    "    ModelName=model_name,\n",
-    "    PrimaryContainer={\n",
-    "        'Image': image_uri,\n",
-    "        'ModelDataUrl': compiled_model_path,\n",
-    "        'Environment': { 'SAGEMAKER_SUBMIT_DIRECTORY': source_path }\n",
-    "    },\n",
-    "    ExecutionRoleArn=role\n",
-    ")\n",
-    "print(response)"
+    "sagemaker_model = PyTorchModel(model_data=model_path,\n",
+    "                               predictor_cls=Predictor,\n",
+    "                               framework_version = framework_version,\n",
+    "                               role=role,\n",
+    "                               sagemaker_session=sess,\n",
+    "                               entry_point='resnet18.py',\n",
+    "                               source_dir='code',\n",
+    "                               py_version='py3',\n",
+    "                               env={'MMS_DEFAULT_RESPONSE_TIMEOUT': '500'}\n",
+    "                              )"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Then we create an Endpoint Configuration:"
+    "### Use Neo compiler to compile the model"
    ]
   },
   {
@@ -233,28 +148,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "config_name = model_name\n",
-    "\n",
-    "response = sm_client.create_endpoint_config(\n",
-    "    EndpointConfigName=config_name,\n",
-    "    ProductionVariants=[\n",
-    "        {\n",
-    "            'VariantName': 'default-variant-name',\n",
-    "            'ModelName': model_name,\n",
-    "            'InitialInstanceCount': 1,\n",
-    "            'InstanceType': 'ml.c5.xlarge',\n",
-    "            'InitialVariantWeight': 1.0\n",
-    "        },\n",
-    "    ],\n",
-    ")\n",
-    "print(response)"
+    "compiled_model = sagemaker_model.compile(target_instance_family=target_device, \n",
+    "                                         input_shape=data_shape,\n",
+    "                                         job_name=compilation_job_name,\n",
+    "                                         role=role,\n",
+    "                                         framework=framework.lower(),\n",
+    "                                         framework_version=framework_version,\n",
+    "                                         output_path=compiled_model_path\n",
+    "                                        )"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Finally, we create an Endpoint:"
+    "## Deploy the model"
    ]
   },
   {
@@ -263,20 +171,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "endpoint_name = model_name + '-Endpoint'\n",
-    "\n",
-    "response = sm_client.create_endpoint(\n",
-    "    EndpointName=endpoint_name,\n",
-    "    EndpointConfigName=config_name,\n",
-    ")\n",
-    "print(response)\n",
-    "\n",
-    "print('Creating endpoint ...')\n",
-    "waiter = sm_client.get_waiter('endpoint_in_service')\n",
-    "waiter.wait(EndpointName=endpoint_name)\n",
-    "\n",
-    "response = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
-    "print(response)"
+    "predictor = compiled_model.deploy(initial_instance_count = 1,\n",
+    "                                  instance_type = 'ml.c5.9xlarge'\n",
+    "                                 )"
    ]
   },
   {
@@ -301,19 +198,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import json\n",
     "import numpy as np\n",
-    "\n",
-    "sm_runtime = boto3.Session().client('sagemaker-runtime')\n",
+    "import json\n",
     "\n",
     "with open('cat.jpg', 'rb') as f:\n",
     "    payload = f.read()\n",
+    "    payload = bytearray(payload) \n",
     "\n",
-    "response = sm_runtime.invoke_endpoint(EndpointName=endpoint_name,\n",
-    "                                      ContentType='application/x-image',\n",
-    "                                      Body=payload)\n",
-    "print(response)\n",
-    "result = json.loads(response['Body'].read().decode())\n",
+    "response = predictor.predict(payload)\n",
+    "result = json.loads(response.decode())\n",
     "print('Most likely class: {}'.format(np.argmax(result)))"
    ]
   },
@@ -346,7 +239,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sess.delete_endpoint(endpoint_name)"
+    "sess.delete_endpoint(predictor.endpoint_name)"
    ]
   }
  ],
@@ -366,9 +259,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.10"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }