Skip to content

Commit 3017651

Browse files
vivekgangasani“Vivek
andauthored
Badges vivek (#292)
* [adding badges for the lmi v7 notebooks] * [adding badges for the lmi v7 notebooks] * [adding badges for the lmi v7 notebooks] * [adding badges for the lmi v7 notebooks] * [adding badges for the lmi v7 notebooks] * [adding badges for the lmi v7 notebooks] * [adding badges for the lmi v7 notebooks] * [adding badges for the lmi v7 notebooks] --------- Co-authored-by: “Vivek <“[email protected]”>
1 parent 76bdd34 commit 3017651

File tree

2 files changed

+142
-45
lines changed

2 files changed

+142
-45
lines changed

inference/generativeai/llm-workshop/deploy-V7-lmi/llama2_70b-lmi-trtllm.ipynb

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,21 @@
55
"id": "2c31f83d-be31-47b9-af3b-a4285576b4db",
66
"metadata": {},
77
"source": [
8-
"Host Llama2-70B on Amazon SageMaker using LMI V7 container"
8+
"# Host Llama2-70B on Amazon SageMaker using TRT-LLM LMI container"
9+
]
10+
},
11+
{
12+
"cell_type": "markdown",
13+
"id": "5d63a9c2",
14+
"metadata": {},
15+
"source": [
16+
"---\n",
17+
"\n",
18+
"This notebooks CI test result for us-west-2 is as follows. CI test results in other regions can be found at the end of the notebook.\n",
19+
"\n",
20+
"![This us-west-2 badge failed to load. Check your devices internet connectivity, otherwise the service currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
21+
"\n",
22+
"---"
923
]
1024
},
1125
{
@@ -35,7 +49,7 @@
3549
},
3650
"outputs": [],
3751
"source": [
38-
"%pip install sagemaker boto3 --upgrade"
52+
"%pip install sagemaker boto3 awscli huggingface_hub --upgrade --quiet"
3953
]
4054
},
4155
{
@@ -79,8 +93,7 @@
7993
"sess = sagemaker.session.Session() # sagemaker session for interacting with different AWS APIs\n",
8094
"bucket = sess.default_bucket() # bucket to house artifacts\n",
8195
"model_bucket = sess.default_bucket() # bucket to house artifacts\n",
82-
"s3_code_prefix = \"hf-large-model-djl/code_llama2\" # folder within bucket where code artifact will go\n",
83-
"\n",
96+
"s3_code_prefix = \"hf-large-model-djl/code_llama2\"\n",
8497
"region = sess._region_name\n",
8598
"account_id = sess.account_id()\n",
8699
"\n",
@@ -169,7 +182,7 @@
169182
"tags": []
170183
},
171184
"source": [
172-
"## Create serving.properties file, upload model to S3 and provid ethe inference container\n",
185+
"## Create serving.properties file, upload model to S3 and provide the inference container\n",
173186
"\n",
174187
"SageMaker Large Model Inference containers can be used to host models without any additional inference code. You also have the option to provide your inference script if you need any custom pre-processing of the input data or postprocessing of the model's predictions.\n",
175188
"\n",
@@ -202,7 +215,6 @@
202215
"- `engine`: The engine for DJL to use. \n",
203216
"- `option.model_id`: This can be the S3 uri of the pre-trained model or the model id of a pretrained model hosted inside a model repository on huggingface.co (https://huggingface.co/models).\n",
204217
"- `option.tensor_parallel_degree`: Set to the number of GPU devices over which Accelerate needs to partition the model. This parameter also controls the no of workers per model which will be started up when DJL serving runs. As an example if we have a 8 GPU machine and we are creating 8 partitions then we will have 1 worker per model to serve the requests.\n",
205-
"- `option.rolling_batch` : This parameter enables rolling batch of inputs\n",
206218
"- `option.max_rolling_batch_size`: Sets the max batch size \n",
207219
"- `option.model_loading_timeout` : Sets the timeout value for downloading and loading the model to serve inference\n",
208220
"\n",
@@ -227,13 +239,13 @@
227239
"outputs": [],
228240
"source": [
229241
"%%writefile ./code_llama2/serving.properties\n",
230-
"engine=MPI\n",
231-
"option.model_id={{s3url}}\n",
232-
"option.tensor_parallel_degree=8\n",
233-
"option.use_custom_all_reduce=true\n",
234-
"option.output_formatter=json\n",
235-
"option.max_rolling_batch_size=64\n",
236-
"option.model_loading_timeout=3600"
242+
"engine = MPI\n",
243+
"option.model_id = {{s3url}}\n",
244+
"option.tensor_parallel_degree = 8\n",
245+
"option.use_custom_all_reduce = true\n",
246+
"option.output_formatter = json\n",
247+
"option.max_rolling_batch_size = 64\n",
248+
"option.model_loading_timeout = 3600"
237249
]
238250
},
239251
{
@@ -270,11 +282,10 @@
270282
},
271283
"outputs": [],
272284
"source": [
273-
"\n",
274285
"inference_image_uri = sagemaker.image_uris.retrieve(\n",
275286
" framework=\"djl-tensorrtllm\", region=region, version=\"0.25.0\"\n",
276287
")\n",
277-
"print(f\"Image going to be used is ---- > {inference_image_uri}\")\n"
288+
"print(f\"Image going to be used is ---- > {inference_image_uri}\")"
278289
]
279290
},
280291
{
@@ -360,10 +371,11 @@
360371
"create_model_response = sm_client.create_model(\n",
361372
" ModelName=model_name,\n",
362373
" ExecutionRoleArn=role,\n",
363-
" PrimaryContainer={\"Image\": inference_image_uri,\n",
364-
" \"ModelDataUrl\": s3_code_artifact,\n",
365-
" 'Environment': {'MODEL_LOADING_TIMEOUT': '3600'}\n",
366-
" }\n",
374+
" PrimaryContainer={\n",
375+
" \"Image\": inference_image_uri,\n",
376+
" \"ModelDataUrl\": s3_code_artifact,\n",
377+
" \"Environment\": {\"MODEL_LOADING_TIMEOUT\": \"3600\"},\n",
378+
" },\n",
367379
")\n",
368380
"model_arn = create_model_response[\"ModelArn\"]\n",
369381
"\n",
@@ -475,7 +487,7 @@
475487
" \"parameters\": {\n",
476488
" \"do_sample\": True,\n",
477489
" \"max_new_tokens\": 256,\n",
478-
" \"top_k\" : 5,\n",
490+
" \"top_k\": 5,\n",
479491
" },\n",
480492
" }\n",
481493
" ),\n",
@@ -530,12 +542,44 @@
530542
]
531543
},
532544
{
533-
"cell_type": "code",
534-
"execution_count": null,
535-
"id": "b0c4f2f8-0474-473b-9e75-ff55ddcfffdf",
545+
"cell_type": "markdown",
546+
"id": "bc98aaf3",
536547
"metadata": {},
537-
"outputs": [],
538-
"source": []
548+
"source": [
549+
"## Notebook CI Test Results\n",
550+
"\n",
551+
"This notebook was tested in multiple regions. The test results are as follows, except for us-west-2 which is shown at the top of the notebook.\n",
552+
"\n",
553+
"![This us-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
554+
"\n",
555+
"![This us-east-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
556+
"\n",
557+
"![This us-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
558+
"\n",
559+
"![This ca-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ca-central-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
560+
"\n",
561+
"![This sa-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/sa-east-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
562+
"\n",
563+
"![This eu-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
564+
"\n",
565+
"![This eu-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
566+
"\n",
567+
"![This eu-west-3 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-3/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
568+
"\n",
569+
"![This eu-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-central-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
570+
"\n",
571+
"![This eu-north-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-north-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
572+
"\n",
573+
"![This ap-southeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
574+
"\n",
575+
"![This ap-southeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
576+
"\n",
577+
"![This ap-northeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
578+
"\n",
579+
"![This ap-northeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n",
580+
"\n",
581+
"![This ap-south-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-south-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b-lmi-trtllm.ipynb)\n"
582+
]
539583
}
540584
],
541585
"metadata": {

inference/generativeai/llm-workshop/deploy-V7-lmi/llama2_70b_lmi_v7.ipynb

Lines changed: 73 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,21 @@
55
"id": "2c31f83d-be31-47b9-af3b-a4285576b4db",
66
"metadata": {},
77
"source": [
8-
"Host Llama2-70B on Amazon SageMaker using LMI V7 container"
8+
"# Host Llama2-70B on Amazon SageMaker using LMI V7 container"
9+
]
10+
},
11+
{
12+
"cell_type": "markdown",
13+
"id": "57299e11",
14+
"metadata": {},
15+
"source": [
16+
"---\n",
17+
"\n",
18+
"This notebooks CI test result for us-west-2 is as follows. CI test results in other regions can be found at the end of the notebook.\n",
19+
"\n",
20+
"![This us-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
21+
"\n",
22+
"---"
923
]
1024
},
1125
{
@@ -35,7 +49,7 @@
3549
},
3650
"outputs": [],
3751
"source": [
38-
"%pip install sagemaker boto3 --upgrade"
52+
"%pip install sagemaker boto3 awscli huggingface_hub --upgrade --quiet"
3953
]
4054
},
4155
{
@@ -79,8 +93,7 @@
7993
"sess = sagemaker.session.Session() # sagemaker session for interacting with different AWS APIs\n",
8094
"bucket = sess.default_bucket() # bucket to house artifacts\n",
8195
"model_bucket = sess.default_bucket() # bucket to house artifacts\n",
82-
"s3_code_prefix = \"hf-large-model-djl/code_llama2\" # folder within bucket where code artifact will go\n",
83-
"\n",
96+
"s3_code_prefix = \"hf-large-model-djl/code_llama2\"\n",
8497
"region = sess._region_name\n",
8598
"account_id = sess.account_id()\n",
8699
"\n",
@@ -169,7 +182,7 @@
169182
"tags": []
170183
},
171184
"source": [
172-
"## Create serving.properties file, upload model to S3 and provid ethe inference container\n",
185+
"## Create serving.properties file, upload model to S3 and provide the inference container\n",
173186
"\n",
174187
"SageMaker Large Model Inference containers can be used to host models without any additional inference code. You also have the option to provide your inference script if you need any custom pre-processing of the input data or postprocessing of the model's predictions.\n",
175188
"\n",
@@ -227,14 +240,14 @@
227240
"outputs": [],
228241
"source": [
229242
"%%writefile ./code_llama2/serving.properties\n",
230-
"engine=MPI\n",
231-
"option.model_id={{s3url}}\n",
232-
"option.task=text-generation\n",
233-
"option.tensor_parallel_degree=8\n",
234-
"option.rolling_batch=lmi-dist\n",
235-
"option.output_formatter=json\n",
236-
"option.max_rolling_batch_size=64\n",
237-
"option.model_loading_timeout=3600"
243+
"engine = MPI\n",
244+
"option.model_id = {{s3url}}\n",
245+
"option.task = text - generation\n",
246+
"option.tensor_parallel_degree = 8\n",
247+
"option.rolling_batch = lmi - dist\n",
248+
"option.output_formatter = json\n",
249+
"option.max_rolling_batch_size = 64\n",
250+
"option.model_loading_timeout = 3600"
238251
]
239252
},
240253
{
@@ -271,11 +284,10 @@
271284
},
272285
"outputs": [],
273286
"source": [
274-
"\n",
275287
"inference_image_uri = sagemaker.image_uris.retrieve(\n",
276288
" framework=\"djl-deepspeed\", region=region, version=\"0.25.0\"\n",
277289
")\n",
278-
"print(f\"Image going to be used is ---- > {inference_image_uri}\")\n"
290+
"print(f\"Image going to be used is ---- > {inference_image_uri}\")"
279291
]
280292
},
281293
{
@@ -361,10 +373,11 @@
361373
"create_model_response = sm_client.create_model(\n",
362374
" ModelName=model_name,\n",
363375
" ExecutionRoleArn=role,\n",
364-
" PrimaryContainer={\"Image\": inference_image_uri,\n",
365-
" \"ModelDataUrl\": s3_code_artifact,\n",
366-
" 'Environment': {'MODEL_LOADING_TIMEOUT': '3600'}\n",
367-
" }\n",
376+
" PrimaryContainer={\n",
377+
" \"Image\": inference_image_uri,\n",
378+
" \"ModelDataUrl\": s3_code_artifact,\n",
379+
" \"Environment\": {\"MODEL_LOADING_TIMEOUT\": \"3600\"},\n",
380+
" },\n",
368381
")\n",
369382
"model_arn = create_model_response[\"ModelArn\"]\n",
370383
"\n",
@@ -476,7 +489,7 @@
476489
" \"parameters\": {\n",
477490
" \"do_sample\": True,\n",
478491
" \"max_new_tokens\": 256,\n",
479-
" \"top_k\" : 5,\n",
492+
" \"top_k\": 5,\n",
480493
" },\n",
481494
" }\n",
482495
" ),\n",
@@ -529,6 +542,46 @@
529542
"sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)\n",
530543
"sm_client.delete_model(ModelName=model_name)"
531544
]
545+
},
546+
{
547+
"cell_type": "markdown",
548+
"id": "db1e98bd",
549+
"metadata": {},
550+
"source": [
551+
"## Notebook CI Test Results\n",
552+
"\n",
553+
"This notebook was tested in multiple regions. The test results are as follows, except for us-west-2 which is shown at the top of the notebook.\n",
554+
"\n",
555+
"![This us-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
556+
"\n",
557+
"![This us-east-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
558+
"\n",
559+
"![This us-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
560+
"\n",
561+
"![This ca-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ca-central-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
562+
"\n",
563+
"![This sa-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/sa-east-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
564+
"\n",
565+
"![This eu-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
566+
"\n",
567+
"![This eu-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
568+
"\n",
569+
"![This eu-west-3 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-3/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
570+
"\n",
571+
"![This eu-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-central-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
572+
"\n",
573+
"![This eu-north-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-north-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
574+
"\n",
575+
"![This ap-southeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
576+
"\n",
577+
"![This ap-southeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
578+
"\n",
579+
"![This ap-northeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
580+
"\n",
581+
"![This ap-northeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-2/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n",
582+
"\n",
583+
"![This ap-south-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-south-1/inference|generativeai|llm-workshop|deploy-V7-lmi|llama2_70b_lmi_v7.ipynb)\n"
584+
]
532585
}
533586
],
534587
"metadata": {

0 commit comments

Comments
 (0)