Skip to content

Commit 79fa1f8

Browse files
authored
Serve using MMS and inference toolkit (#4)
* Update Dockerfiles to use PyTorch 1.2.0 * Add MMS changes * Serve inference using MMS
1 parent a1b8ebb commit 79fa1f8

30 files changed

+1653
-311
lines changed

buildspec.yml

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@ version: 0.2
22

33
env:
44
variables:
5-
FRAMEWORK_VERSION: '1.1.0'
6-
CPU_PY_VERSION: '2'
5+
FRAMEWORK_VERSION: '1.2.0'
6+
CPU_PY2_VERSION: '2'
7+
CPU_PY3_VERSION: '3'
78
CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
8-
GPU_PY_VERSION: '3'
9+
GPU_PY2_VERSION: '2'
10+
GPU_PY3_VERSION: '3'
911
GPU_INSTANCE_TYPE: 'ml.p2.xlarge'
1012
LOCAL_BASE_REPO: 'pytorch-base'
1113
ECR_REPO: 'sagemaker-test'
@@ -33,39 +35,26 @@ phases:
3335
# run unit tests
3436
- pytest test/unit
3537

36-
# build cpu base image
37-
- base_dir="docker/$FRAMEWORK_VERSION/base"
38-
- cpu_py2_base_tag="$FRAMEWORK_VERSION-cpu-py2"
39-
- cpu_py3_base_tag="$FRAMEWORK_VERSION-cpu-py3"
38+
# create wheel in dist/
39+
- python3 setup.py bdist_wheel
40+
- whl_name=$(ls dist/sagemaker_pytorch_serving_container*.whl)
41+
- cp $whl_name sagemaker_pytorch_serving_container-1.2-py2.py3-none-any.whl
42+
4043
- cpu_dockerfile="Dockerfile.cpu"
41-
- cd $base_dir
42-
- docker build -t $LOCAL_BASE_REPO:$cpu_py2_base_tag -f $cpu_dockerfile --build-arg py_version=2 .
43-
- docker build -t $LOCAL_BASE_REPO:$cpu_py3_base_tag -f $cpu_dockerfile --build-arg py_version=3 .
44-
- cd ../../../
45-
46-
# build gpu base image
47-
- gpu_py2_base_tag="$FRAMEWORK_VERSION-gpu-py2"
48-
- gpu_py3_base_tag="$FRAMEWORK_VERSION-gpu-py3"
4944
- gpu_dockerfile="Dockerfile.gpu"
50-
- cd $base_dir
51-
- docker build -t $LOCAL_BASE_REPO:$gpu_py2_base_tag -f $gpu_dockerfile --build-arg py_version=2 .
52-
- docker build -t $LOCAL_BASE_REPO:$gpu_py3_base_tag -f $gpu_dockerfile --build-arg py_version=3 .
53-
- cd ../../../
5445

55-
# create wheel
56-
- python3 setup.py bdist_wheel
57-
58-
# build cpu image
59-
- build_dir="docker/$FRAMEWORK_VERSION/final"
46+
# build py2 images
47+
- build_dir="docker/$FRAMEWORK_VERSION/py$CPU_PY2_VERSION"
6048
- CPU_PY2_TAG="$FRAMEWORK_VERSION-cpu-py2-$BUILD_ID"
61-
- CPU_PY3_TAG="$FRAMEWORK_VERSION-cpu-py3-$BUILD_ID"
49+
- GPU_PY2_TAG="$FRAMEWORK_VERSION-gpu-py2-$BUILD_ID"
6250
- docker build -f "$build_dir/$cpu_dockerfile" --build-arg py_version=2 -t $PREPROD_IMAGE:$CPU_PY2_TAG .
63-
- docker build -f "$build_dir/$cpu_dockerfile" --build-arg py_version=3 -t $PREPROD_IMAGE:$CPU_PY3_TAG .
51+
- docker build -f "$build_dir/$gpu_dockerfile" --build-arg py_version=2 -t $PREPROD_IMAGE:$GPU_PY2_TAG .
6452

65-
# build gpu image
66-
- GPU_PY2_TAG="$FRAMEWORK_VERSION-gpu-py2-$BUILD_ID"
53+
# build py3 image
54+
- build_dir="docker/$FRAMEWORK_VERSION/py$GPU_PY3_VERSION"
55+
- CPU_PY3_TAG="$FRAMEWORK_VERSION-cpu-py3-$BUILD_ID"
6756
- GPU_PY3_TAG="$FRAMEWORK_VERSION-gpu-py3-$BUILD_ID"
68-
- docker build -f "$build_dir/$gpu_dockerfile" --build-arg py_version=2 -t $PREPROD_IMAGE:$GPU_PY2_TAG .
57+
- docker build -f "$build_dir/$gpu_dockerfile" --build-arg py_version=3 -t $PREPROD_IMAGE:$CPU_PY3_TAG .
6958
- docker build -f "$build_dir/$gpu_dockerfile" --build-arg py_version=3 -t $PREPROD_IMAGE:$GPU_PY3_TAG .
7059

7160
# push images to ecr
@@ -84,8 +73,8 @@ phases:
8473
# run cpu integration tests
8574
- |
8675
if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
87-
pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY_VERSION --processor cpu --tag $CPU_PY3_TAG
88-
pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY_VERSION --processor cpu --tag $CPU_PY2_TAG
76+
pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --tag $CPU_PY3_TAG
77+
pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY2_VERSION --processor cpu --tag $CPU_PY2_TAG
8978
else
9079
echo "skipping cpu integration tests"
9180
fi
@@ -94,8 +83,8 @@ phases:
9483
- |
9584
if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
9685
printf "$SETUP_CMDS" > $SETUP_FILE
97-
py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY_VERSION --processor gpu --tag $GPU_PY3_TAG"
98-
py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY_VERSION --processor gpu --tag $GPU_PY2_TAG"
86+
py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY3_VERSION --processor gpu --tag $GPU_PY3_TAG"
87+
py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY2_VERSION --processor gpu --tag $GPU_PY2_TAG"
9988
remote-test --github-repo $GITHUB_REPO --test-cmd "$py3_cmd" --setup-file $SETUP_FILE --pr-number "$PR_NUM"
10089
else
10190
echo "skipping gpu integration tests"
@@ -104,17 +93,17 @@ phases:
10493
# run cpu sagemaker tests
10594
- |
10695
if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
107-
pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY3_TAG
108-
pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY2_TAG
96+
pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY3_TAG
97+
pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY2_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY2_TAG
10998
else
11099
echo "skipping cpu sagemaker tests"
111100
fi
112101
113102
# run gpu sagemaker tests
114103
- |
115104
if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
116-
pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GPU_PY3_TAG
117-
pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GPU_PY2_TAG
105+
pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY3_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GPU_PY3_TAG
106+
pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY2_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GPU_PY2_TAG
118107
else
119108
echo "skipping gpu sagemaker tests"
120109
fi

docker/1.2.0/py2/Dockerfile.cpu

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
FROM ubuntu:16.04
2+
3+
LABEL maintainer="Amazon AI"
4+
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
ca-certificates \
7+
cmake \
8+
curl \
9+
git \
10+
wget \
11+
vim \
12+
jq \
13+
libsm6 \
14+
libxext6 \
15+
libxrender-dev \
16+
build-essential \
17+
zlib1g-dev \
18+
libglib2.0-0 \
19+
libgl1-mesa-glx \
20+
openjdk-8-jdk-headless
21+
22+
# See http://bugs.python.org/issue19846
23+
ENV LANG C.UTF-8
24+
25+
# Add arguments to achieve the version, python and url
26+
ARG PYTHON_VERSION=2.7
27+
28+
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
29+
chmod +x ~/miniconda.sh && \
30+
~/miniconda.sh -b -p /opt/conda && \
31+
rm ~/miniconda.sh && \
32+
/opt/conda/bin/conda update conda && \
33+
/opt/conda/bin/conda install -y python=$PYTHON_VERSION \
34+
numpy==1.16.4 \
35+
scipy==1.2.1 \
36+
ipython==5.8.0 \
37+
mkl==2019.4 \
38+
mkl-include==2019.4 \
39+
cython==0.29.12 \
40+
typing==3.7.4 && \
41+
/opt/conda/bin/conda clean -ya
42+
ENV PATH /opt/conda/bin:$PATH
43+
44+
ARG PYTORCH_VERSION=1.2.0
45+
ARG TORCHVISION_VERSION=0.4.0
46+
ARG MMS_VERSION=1.0.5
47+
RUN conda install -c conda-forge awscli==1.16.210 opencv==4.0.1 && \
48+
conda install -y scikit-learn==0.20.3 \
49+
pandas==0.24.2 \
50+
pillow==6.1.0 \
51+
h5py==2.9.0 \
52+
requests==2.22.0 && \
53+
conda install pytorch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION cpuonly -c pytorch && \
54+
conda clean -ya && \
55+
pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org && \
56+
pip install --pre -U mxnet-model-server
57+
58+
RUN useradd -m model-server \
59+
&& mkdir -p /home/model-server/tmp \
60+
&& chown -R model-server /home/model-server
61+
62+
COPY docker/$PYTORCH_VERSION/py2/mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
63+
COPY docker/$PYTORCH_VERSION/py2/config.properties /home/model-server
64+
COPY src/sagemaker_pytorch_serving_container/deep_learning_container.py /usr/local/bin/deep_learning_container.py
65+
66+
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py && \
67+
chmod +x /usr/local/bin/deep_learning_container.py
68+
69+
COPY dist/sagemaker_pytorch_serving_container-1.2-py2.py3-none-any.whl /sagemaker_pytorch_serving_container-1.2-py2.py3-none-any.whl
70+
RUN pip install --no-cache-dir /sagemaker_pytorch_serving_container-1.2-py2.py3-none-any.whl && \
71+
rm /sagemaker_pytorch_serving_container-1.2-py2.py3-none-any.whl
72+
73+
ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
74+
75+
EXPOSE 8080 8081
76+
ENV TEMP=/home/model-server/tmp
77+
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
78+
CMD ["mxnet-model-server", "--start", "--mms-config", "/home/model-server/config.properties"]

docker/1.2.0/py2/Dockerfile.gpu

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
2+
# NCCL_VERSION=2.4.7, CUDNN_VERSION=7.6.2.24
3+
LABEL maintainer="Amazon AI"
4+
5+
RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
6+
build-essential \
7+
ca-certificates \
8+
cmake \
9+
cuda-command-line-tools-10-0 \
10+
cuda-cublas-10-0 \
11+
cuda-cufft-10-0 \
12+
cuda-curand-10-0 \
13+
cuda-cusolver-10-0 \
14+
cuda-cusparse-10-0 \
15+
libgomp1 \
16+
libibverbs-dev \
17+
curl \
18+
git \
19+
wget \
20+
vim \
21+
jq \
22+
libsm6 \
23+
libxext6 \
24+
libxrender-dev \
25+
build-essential \
26+
zlib1g-dev \
27+
libglib2.0-0 \
28+
libgl1-mesa-glx \
29+
openjdk-8-jdk-headless
30+
31+
# See http://bugs.python.org/issue19846
32+
ENV LANG C.UTF-8
33+
34+
# Add arguments to achieve the version, python and url
35+
ARG PYTHON_VERSION=2.7
36+
37+
# Install OpenSSH, Allow OpenSSH to talk to containers without asking for confirmation
38+
RUN apt-get install -y --no-install-recommends openssh-client openssh-server && \
39+
mkdir -p /var/run/sshd && \
40+
cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
41+
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
42+
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
43+
44+
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
45+
chmod +x ~/miniconda.sh && \
46+
~/miniconda.sh -b -p /opt/conda && \
47+
rm ~/miniconda.sh && \
48+
/opt/conda/bin/conda update conda && \
49+
/opt/conda/bin/conda install -y python=$PYTHON_VERSION \
50+
numpy==1.16.4 \
51+
scipy==1.2.1 \
52+
ipython==5.8.0 \
53+
mkl==2019.4 \
54+
mkl-include==2019.4 \
55+
cython==0.29.12 \
56+
typing==3.7.4 && \
57+
/opt/conda/bin/conda clean -ya
58+
ENV PATH /opt/conda/bin:$PATH
59+
60+
ARG PYTORCH_VERSION=1.2.0
61+
ARG TORCHVISION_VERSION=0.4.0
62+
ARG MMS_VERSION=1.0.5
63+
RUN conda install -c pytorch magma-cuda100 && \
64+
conda install -c conda-forge awscli==1.16.210 opencv==4.0.1 && \
65+
conda install -y scikit-learn==0.20.3 \
66+
pandas==0.24.2 \
67+
pillow==6.1.0 \
68+
h5py==2.9.0 \
69+
requests==2.22.0 && \
70+
conda install pytorch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION cudatoolkit=10.0 -c pytorch && \
71+
conda clean -ya && \
72+
/opt/conda/bin/conda config --set ssl_verify False && \
73+
pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org && \
74+
pip install --pre -U mxnet-model-server
75+
76+
RUN useradd -m model-server \
77+
&& mkdir -p /home/model-server/tmp \
78+
&& chown -R model-server /home/model-server
79+
80+
COPY docker/$PYTORCH_VERSION/py2/mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
81+
COPY docker/$PYTORCH_VERSION/py2/config.properties /home/model-server
82+
COPY src/sagemaker_pytorch_serving_container/deep_learning_container.py /usr/local/bin/deep_learning_container.py
83+
84+
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py && \
85+
chmod +x /usr/local/bin/deep_learning_container.py
86+
87+
# Install OpenSSH for MPI to communicate between containers, Allow OpenSSH to talk to containers without asking for confirmation
88+
RUN apt-get install -y --no-install-recommends openssh-client openssh-server && \
89+
mkdir -p /var/run/sshd && \
90+
cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
91+
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
92+
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
93+
94+
# RUN pip install --no-cache-dir 'opencv-python>=4.0,<4.1'
95+
96+
COPY dist/sagemaker_pytorch_serving_container-1.2-py2.py3-none-any.whl /sagemaker_pytorch_serving_container-1.2-py2.py3-none-any.whl
97+
RUN pip install --no-cache-dir /sagemaker_pytorch_serving_container-1.2-py2.py3-none-any.whl && \
98+
rm /sagemaker_pytorch_serving_container-1.2-py2.py3-none-any.whl
99+
100+
ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
101+
102+
EXPOSE 8080 8081
103+
ENV TEMP=/home/model-server/tmp
104+
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
105+
CMD ["mxnet-model-server", "--start", "--mms-config", "/home/model-server/config.properties"]

docker/1.2.0/py2/config.properties

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
vmargs=-Xmx128m -XX:-UseLargePages -XX:+UseG1GC -XX:MaxMetaspaceSize=32M -XX:MaxDirectMemorySize=10m -XX:+ExitOnOutOfMemoryError
2+
model_store=/opt/ml/model
3+
load_models=ALL
4+
inference_address=http://0.0.0.0:8080
5+
management_address=http://0.0.0.0:8081
6+
# management_address=unix:/tmp/management.sock
7+
# number_of_netty_threads=0
8+
# netty_client_threads=0
9+
# default_response_timeout=120
10+
# default_workers_per_model=0
11+
# job_queue_size=100
12+
# async_logging=false
13+
# number_of_gpu=1
14+
# cors_allowed_origin
15+
# cors_allowed_methods
16+
# cors_allowed_headers
17+
# keystore=src/test/resources/keystore.p12
18+
# keystore_pass=changeit
19+
# keystore_type=PKCS12
20+
# private_key_file=src/test/resources/key.pem
21+
# certificate_file=src/test/resources/certs.pem
22+
# max_response_size=6553500
23+
# max_request_size=6553500
24+
# blacklist_env_vars=
25+
# decode_input_request=false
26+
# enable_envvars_config=false

docker/1.2.0/py2/mms-entrypoint.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import shlex
2+
import subprocess
3+
import sys
4+
import os.path
5+
6+
from sagemaker_pytorch_serving_container import serving
7+
8+
if not os.path.exists("/opt/ml/input/config"):
9+
subprocess.call(['python', '/usr/local/bin/deep_learning_container.py', '&>/dev/null', '&'])
10+
11+
if sys.argv[1] == 'serve':
12+
serving.main()
13+
else:
14+
subprocess.check_call(shlex.split(' '.join(sys.argv[1:])))
15+
16+
# prevent docker exit
17+
subprocess.call(['tail', '-f', '/dev/null'])

0 commit comments

Comments
 (0)