Skip to content

Commit a907008

Browse files
kkoppolu1EC2 Default User
andauthored
Kkoppolu inference examples (#1587)
* Compilation examples changes for new inference containers Update examples for PyTorch - to use the new inference containers - Use SageMaker 2.x * Clear outputs Clear outputs in the notebook * Fix typo Fix typo in text box * Undo change to iterations in old way Undo change to iterations in old way * Code Review feedback Organize imports Code Review feedback * CR Use new inference containers for both uncompiled and compiled flows. * CR Remove incorrect code comments * Update versions of torch and torchvision Co-authored-by: EC2 Default User <[email protected]>
1 parent 55d26f5 commit a907008

File tree

7 files changed

+327
-347
lines changed

7 files changed

+327
-347
lines changed
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import io
2+
import json
3+
import logging
4+
import os
5+
import pickle
6+
7+
import numpy as np
8+
import torch
9+
import torchvision.transforms as transforms
10+
from PIL import Image # Training container doesn't have this package
11+
12+
logger = logging.getLogger(__name__)
13+
logger.setLevel(logging.DEBUG)
14+
15+
16+
def transform_fn(model, payload, request_content_type,
17+
response_content_type):
18+
19+
logger.info('Invoking user-defined transform function')
20+
21+
if request_content_type != 'application/octet-stream':
22+
raise RuntimeError(
23+
'Content type must be application/octet-stream. Provided: {0}'.format(request_content_type))
24+
25+
# preprocess
26+
decoded = Image.open(io.BytesIO(payload))
27+
preprocess = transforms.Compose([
28+
transforms.Resize(256),
29+
transforms.CenterCrop(224),
30+
transforms.ToTensor(),
31+
transforms.Normalize(
32+
mean=[
33+
0.485, 0.456, 0.406], std=[
34+
0.229, 0.224, 0.225]),
35+
])
36+
normalized = preprocess(decoded)
37+
batchified = normalized.unsqueeze(0)
38+
39+
# predict
40+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
41+
batchified = batchified.to(device)
42+
result = model.forward(batchified)
43+
44+
# Softmax (assumes batch size 1)
45+
result = np.squeeze(result.cpu().numpy())
46+
result_exp = np.exp(result - np.max(result))
47+
result = result_exp / np.sum(result_exp)
48+
49+
response_body = json.dumps(result.tolist())
50+
content_type = 'application/json'
51+
52+
return response_body, content_type
53+
54+
55+
def model_fn(model_dir):
56+
57+
logger.info('model_fn')
58+
with torch.neo.config(model_dir=model_dir, neo_runtime=True):
59+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
60+
# The compiled model is saved as "compiled.pt"
61+
model = torch.jit.load(os.path.join(model_dir, 'compiled.pt'))
62+
model = model.to(device)
63+
64+
# It is recommended to run warm-up inference during model load
65+
sample_input_path = os.path.join(model_dir, 'sample_input.pkl')
66+
with open(sample_input_path, 'rb') as input_file:
67+
model_input = pickle.load(input_file)
68+
if torch.is_tensor(model_input):
69+
model_input = model_input.to(device)
70+
model(model_input)
71+
elif isinstance(model_input, tuple):
72+
model_input = (inp.to(device)
73+
for inp in model_input if torch.is_tensor(inp))
74+
model(*model_input)
75+
else:
76+
print("Only supports a torch tensor or a tuple of torch tensors")
77+
78+
return model

sagemaker_neo_compilation_jobs/pytorch_torchvision/pytorch_torchvision_neo.ipynb

Lines changed: 50 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"cell_type": "markdown",
1212
"metadata": {},
1313
"source": [
14-
"Amazon SageMaker Neo is API to compile machine learning models to optimize them for our choice of hardward targets. Currently, Neo supports pre-trained PyTorch models from [TorchVision](https://pytorch.org/docs/stable/torchvision/models.html). General support for other PyTorch models is forthcoming."
14+
"Amazon SageMaker Neo is an API to compile machine learning models to optimize them for our choice of hardward targets. Currently, Neo supports pre-trained PyTorch models from [TorchVision](https://pytorch.org/docs/stable/torchvision/models.html). General support for other PyTorch models is forthcoming."
1515
]
1616
},
1717
{
@@ -20,7 +20,16 @@
2020
"metadata": {},
2121
"outputs": [],
2222
"source": [
23-
"!~/anaconda3/envs/pytorch_p36/bin/pip install torch==1.2.0 torchvision==0.4.0"
23+
"!~/anaconda3/envs/pytorch_p36/bin/pip install torch==1.4.0 torchvision==0.5.0"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": null,
29+
"metadata": {},
30+
"outputs": [],
31+
"source": [
32+
"!~/anaconda3/envs/pytorch_p36/bin/pip install --upgrade sagemaker"
2433
]
2534
},
2635
{
@@ -34,7 +43,7 @@
3443
"cell_type": "markdown",
3544
"metadata": {},
3645
"source": [
37-
"We'll import [ResNet18](https://arxiv.org/abs/1512.03385) model from TorchVision and create a model artifact `model.tar.gz`:"
46+
"We'll import [ResNet18](https://arxiv.org/abs/1512.03385) model from TorchVision and create a model artifact `model.tar.gz`."
3847
]
3948
},
4049
{
@@ -60,14 +69,7 @@
6069
"cell_type": "markdown",
6170
"metadata": {},
6271
"source": [
63-
"## Invoke Neo Compilation API"
64-
]
65-
},
66-
{
67-
"cell_type": "markdown",
68-
"metadata": {},
69-
"source": [
70-
"We then forward the model artifact to Neo Compilation API:"
72+
"### Upload the model archive to S3"
7173
]
7274
},
7375
{
@@ -87,111 +89,29 @@
8789
"bucket = sess.default_bucket()\n",
8890
"\n",
8991
"compilation_job_name = name_from_base('TorchVision-ResNet18-Neo')\n",
92+
"prefix = compilation_job_name+'/model'\n",
9093
"\n",
91-
"model_key = '{}/model/model.tar.gz'.format(compilation_job_name)\n",
92-
"model_path = 's3://{}/{}'.format(bucket, model_key)\n",
93-
"boto3.resource('s3').Bucket(bucket).upload_file('model.tar.gz', model_key)\n",
94+
"model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)\n",
9495
"\n",
95-
"sm_client = boto3.client('sagemaker')\n",
9696
"data_shape = '{\"input0\":[1,3,224,224]}'\n",
9797
"target_device = 'ml_c5'\n",
9898
"framework = 'PYTORCH'\n",
99-
"framework_version = '1.2.0'\n",
99+
"framework_version = '1.4.0'\n",
100100
"compiled_model_path = 's3://{}/{}/output'.format(bucket, compilation_job_name)"
101101
]
102102
},
103-
{
104-
"cell_type": "code",
105-
"execution_count": null,
106-
"metadata": {},
107-
"outputs": [],
108-
"source": [
109-
"response = sm_client.create_compilation_job(\n",
110-
" CompilationJobName=compilation_job_name,\n",
111-
" RoleArn=role,\n",
112-
" InputConfig={\n",
113-
" 'S3Uri': model_path,\n",
114-
" 'DataInputConfig': data_shape,\n",
115-
" 'Framework': framework\n",
116-
" },\n",
117-
" OutputConfig={\n",
118-
" 'S3OutputLocation': compiled_model_path,\n",
119-
" 'TargetDevice': target_device\n",
120-
" },\n",
121-
" StoppingCondition={\n",
122-
" 'MaxRuntimeInSeconds': 300\n",
123-
" }\n",
124-
")\n",
125-
"print(response)\n",
126-
"\n",
127-
"# Poll every 30 sec\n",
128-
"while True:\n",
129-
" response = sm_client.describe_compilation_job(CompilationJobName=compilation_job_name)\n",
130-
" if response['CompilationJobStatus'] == 'COMPLETED':\n",
131-
" break\n",
132-
" elif response['CompilationJobStatus'] == 'FAILED':\n",
133-
" raise RuntimeError('Compilation failed')\n",
134-
" print('Compiling ...')\n",
135-
" time.sleep(30)\n",
136-
"print('Done!')\n",
137-
"\n",
138-
"# Extract compiled model artifact\n",
139-
"compiled_model_path = response['ModelArtifacts']['S3ModelArtifacts']"
140-
]
141-
},
142-
{
143-
"cell_type": "markdown",
144-
"metadata": {},
145-
"source": [
146-
"## Create prediction endpoint"
147-
]
148-
},
149-
{
150-
"cell_type": "markdown",
151-
"metadata": {},
152-
"source": [
153-
"To create a prediction endpoint, we first specify two additional functions, to be used with Neo Deep Learning Runtime:\n",
154-
"\n",
155-
"* `neo_preprocess(payload, content_type)`: Function that takes in the payload and Content-Type of each incoming request and returns a NumPy array. Here, the payload is byte-encoded NumPy array, so the function simply decodes the bytes to obtain the NumPy array.\n",
156-
"* `neo_postprocess(result)`: Function that takes the prediction results produced by Deep Learining Runtime and returns the response body"
157-
]
158-
},
159-
{
160-
"cell_type": "code",
161-
"execution_count": null,
162-
"metadata": {},
163-
"outputs": [],
164-
"source": [
165-
"!pygmentize resnet18.py"
166-
]
167-
},
168103
{
169104
"cell_type": "markdown",
170105
"metadata": {},
171106
"source": [
172-
"Upload the Python script containing the two functions to S3:"
173-
]
174-
},
175-
{
176-
"cell_type": "code",
177-
"execution_count": null,
178-
"metadata": {},
179-
"outputs": [],
180-
"source": [
181-
"source_key = '{}/source/sourcedir.tar.gz'.format(compilation_job_name)\n",
182-
"source_path = 's3://{}/{}'.format(bucket, source_key)\n",
183-
"\n",
184-
"with tarfile.open('sourcedir.tar.gz', 'w:gz') as f:\n",
185-
" f.add('resnet18.py')\n",
186-
"\n",
187-
"boto3.resource('s3').Bucket(bucket).upload_file('sourcedir.tar.gz', source_key)"
107+
"## Invoke Neo Compilation API"
188108
]
189109
},
190110
{
191111
"cell_type": "markdown",
192112
"metadata": {},
193113
"source": [
194-
"We then create a SageMaker model record:"
114+
"### Create a PyTorch SageMaker model"
195115
]
196116
},
197117
{
@@ -200,31 +120,26 @@
200120
"metadata": {},
201121
"outputs": [],
202122
"source": [
203-
"from sagemaker.model import NEO_IMAGE_ACCOUNT\n",
204-
"from sagemaker.fw_utils import create_image_uri\n",
205-
"\n",
206-
"model_name = name_from_base('TorchVision-ResNet18-Neo')\n",
123+
"from sagemaker.pytorch.model import PyTorchModel\n",
124+
"from sagemaker.predictor import Predictor\n",
207125
"\n",
208-
"image_uri = create_image_uri(region, 'neo-' + framework.lower(), target_device.replace('_', '.'),\n",
209-
" framework_version, py_version='py3', account=NEO_IMAGE_ACCOUNT[region])\n",
210-
"\n",
211-
"response = sm_client.create_model(\n",
212-
" ModelName=model_name,\n",
213-
" PrimaryContainer={\n",
214-
" 'Image': image_uri,\n",
215-
" 'ModelDataUrl': compiled_model_path,\n",
216-
" 'Environment': { 'SAGEMAKER_SUBMIT_DIRECTORY': source_path }\n",
217-
" },\n",
218-
" ExecutionRoleArn=role\n",
219-
")\n",
220-
"print(response)"
126+
"sagemaker_model = PyTorchModel(model_data=model_path,\n",
127+
" predictor_cls=Predictor,\n",
128+
" framework_version = framework_version,\n",
129+
" role=role,\n",
130+
" sagemaker_session=sess,\n",
131+
" entry_point='resnet18.py',\n",
132+
" source_dir='code',\n",
133+
" py_version='py3',\n",
134+
" env={'MMS_DEFAULT_RESPONSE_TIMEOUT': '500'}\n",
135+
" )"
221136
]
222137
},
223138
{
224139
"cell_type": "markdown",
225140
"metadata": {},
226141
"source": [
227-
"Then we create an Endpoint Configuration:"
142+
"### Use Neo compiler to compile the model"
228143
]
229144
},
230145
{
@@ -233,28 +148,21 @@
233148
"metadata": {},
234149
"outputs": [],
235150
"source": [
236-
"config_name = model_name\n",
237-
"\n",
238-
"response = sm_client.create_endpoint_config(\n",
239-
" EndpointConfigName=config_name,\n",
240-
" ProductionVariants=[\n",
241-
" {\n",
242-
" 'VariantName': 'default-variant-name',\n",
243-
" 'ModelName': model_name,\n",
244-
" 'InitialInstanceCount': 1,\n",
245-
" 'InstanceType': 'ml.c5.xlarge',\n",
246-
" 'InitialVariantWeight': 1.0\n",
247-
" },\n",
248-
" ],\n",
249-
")\n",
250-
"print(response)"
151+
"compiled_model = sagemaker_model.compile(target_instance_family=target_device, \n",
152+
" input_shape=data_shape,\n",
153+
" job_name=compilation_job_name,\n",
154+
" role=role,\n",
155+
" framework=framework.lower(),\n",
156+
" framework_version=framework_version,\n",
157+
" output_path=compiled_model_path\n",
158+
" )"
251159
]
252160
},
253161
{
254162
"cell_type": "markdown",
255163
"metadata": {},
256164
"source": [
257-
"Finally, we create an Endpoint:"
165+
"## Deploy the model"
258166
]
259167
},
260168
{
@@ -263,20 +171,9 @@
263171
"metadata": {},
264172
"outputs": [],
265173
"source": [
266-
"endpoint_name = model_name + '-Endpoint'\n",
267-
"\n",
268-
"response = sm_client.create_endpoint(\n",
269-
" EndpointName=endpoint_name,\n",
270-
" EndpointConfigName=config_name,\n",
271-
")\n",
272-
"print(response)\n",
273-
"\n",
274-
"print('Creating endpoint ...')\n",
275-
"waiter = sm_client.get_waiter('endpoint_in_service')\n",
276-
"waiter.wait(EndpointName=endpoint_name)\n",
277-
"\n",
278-
"response = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
279-
"print(response)"
174+
"predictor = compiled_model.deploy(initial_instance_count = 1,\n",
175+
" instance_type = 'ml.c5.9xlarge'\n",
176+
" )"
280177
]
281178
},
282179
{
@@ -301,19 +198,15 @@
301198
"metadata": {},
302199
"outputs": [],
303200
"source": [
304-
"import json\n",
305201
"import numpy as np\n",
306-
"\n",
307-
"sm_runtime = boto3.Session().client('sagemaker-runtime')\n",
202+
"import json\n",
308203
"\n",
309204
"with open('cat.jpg', 'rb') as f:\n",
310205
" payload = f.read()\n",
206+
" payload = bytearray(payload) \n",
311207
"\n",
312-
"response = sm_runtime.invoke_endpoint(EndpointName=endpoint_name,\n",
313-
" ContentType='application/x-image',\n",
314-
" Body=payload)\n",
315-
"print(response)\n",
316-
"result = json.loads(response['Body'].read().decode())\n",
208+
"response = predictor.predict(payload)\n",
209+
"result = json.loads(response.decode())\n",
317210
"print('Most likely class: {}'.format(np.argmax(result)))"
318211
]
319212
},
@@ -346,7 +239,7 @@
346239
"metadata": {},
347240
"outputs": [],
348241
"source": [
349-
"sess.delete_endpoint(endpoint_name)"
242+
"sess.delete_endpoint(predictor.endpoint_name)"
350243
]
351244
}
352245
],
@@ -366,9 +259,9 @@
366259
"name": "python",
367260
"nbconvert_exporter": "python",
368261
"pygments_lexer": "ipython3",
369-
"version": "3.6.5"
262+
"version": "3.6.10"
370263
}
371264
},
372265
"nbformat": 4,
373-
"nbformat_minor": 2
266+
"nbformat_minor": 4
374267
}

0 commit comments

Comments
 (0)