add support for TensorFlow Serving 1.12

jesterhazy · Jonathan Esterhazy · commit a27e1f92a171 · 2018-12-06T17:26:21.000-08:00
diff --git a/container/sagemaker/nginx.conf.template b/container/sagemaker/nginx.conf.template
@@ -27,6 +27,7 @@ http {
     client_body_buffer_size 100m;
     subrequest_output_buffer_size 100m;
 
+    set $tfs_version %TFS_VERSION%;
     set $default_tfs_model %TFS_DEFAULT_MODEL_NAME%;
 
     location /tfs {
diff --git a/container/sagemaker/serve.py b/container/sagemaker/serve.py
@@ -26,9 +26,9 @@ def __init__(self):
         self._state = 'initializing'
         self._nginx = None
         self._tfs = None
+        self._tfs_version = os.environ.get('SAGEMAKER_TFS_VERSION', '1.12')
         self._nginx_http_port = os.environ.get('SAGEMAKER_BIND_TO_PORT', '8080')
         self._nginx_loglevel = os.environ.get('SAGEMAKER_TFS_NGINX_LOGLEVEL', 'error')
-
         self._tfs_default_model_name = os.environ.get('SAGEMAKER_TFS_DEFAULT_MODEL_NAME', None)
 
         if 'SAGEMAKER_SAFE_PORT_RANGE' in os.environ:
@@ -94,6 +94,7 @@ def _create_nginx_config(self):
         template = self._read_nginx_template()
         pattern = re.compile(r'%(\w+)%')
         template_values = {
+            'TFS_VERSION': self._tfs_version,
             'TFS_REST_PORT': self._tfs_rest_port,
             'TFS_DEFAULT_MODEL_NAME': self._tfs_default_model_name,
             'NGINX_HTTP_PORT': self._nginx_http_port,
@@ -115,6 +116,7 @@ def _read_nginx_template(self):
             return template
 
     def _start_tfs(self):
+        self._log_version('tensorflow_model_server --version', 'tensorflow version info:')
         tfs_config_path = '/sagemaker/model-config.cfg'
         cmd = "tensorflow_model_server --port={} --rest_api_port={} --model_config_file={}".format(
             self._tfs_grpc_port, self._tfs_rest_port, tfs_config_path)
@@ -124,10 +126,20 @@ def _start_tfs(self):
         self._tfs = p
 
     def _start_nginx(self):
+        self._log_version('/usr/sbin/nginx -V', 'nginx version info:')
         p = subprocess.Popen('/usr/sbin/nginx -c /sagemaker/nginx.conf'.split())
         log.info('started nginx (pid: %d)', p.pid)
         self._nginx = p
 
+    def _log_version(self, command, message):
+        try:
+            output = subprocess.check_output(
+                command.split(),
+                stderr=subprocess.STDOUT).decode('utf-8', 'backslashreplace').strip()
+            log.info('{}\n{}'.format(message, output))
+        except subprocess.CalledProcessError:
+            log.warning('failed to run command: %s', command)
+
     def _stop(self, *args):
         self._state = 'stopping'
         log.info('stopping services')
@@ -148,7 +160,6 @@ def start(self):
         self._state = 'starting'
         signal.signal(signal.SIGTERM, self._stop)
 
-        # TODO set env vars for ports etc
         self._create_tfs_config()
         self._create_nginx_config()
 
diff --git a/container/sagemaker/tensorflow-serving.js b/container/sagemaker/tensorflow-serving.js
@@ -14,12 +14,31 @@ function invocations(r) {
 }
 
 function ping(r) {
-    // TODO replace with call to Model Status API when Tensorflow Serving 1.12 is available
+    if ('1.11' == r.variables.tfs_version) {
+        return ping_tfs_1_11(r)
+    }
+
+    var uri = make_tfs_uri(r, false)
+
+    function callback (reply) {
+        if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) {
+            r.return(200)
+        } else {
+            r.error('failed ping' + reply.responseBody)
+            r.return(502)
+        }
+    }
+
+    r.subrequest(uri, callback)
+}
+
+function ping_tfs_1_11(r) {
     // hack for TF 1.11
     // send an arbitrary fixed request to the default model.
     // if response is 400, the model is ok (but input was bad), so return 200
     // also return 200 in unlikely case our request was really valid
-    var uri = make_tfs_uri(r)
+
+    var uri = make_tfs_uri(r, true)
     var options = {
         method: 'POST',
         body: '{"instances": "invalid"}'
@@ -46,7 +65,7 @@ function return_error(r, code, message) {
 }
 
 function tfs_json_request(r, json) {
-    var uri = make_tfs_uri(r)
+    var uri = make_tfs_uri(r, true)
     var options = {
         method: 'POST',
         body: json
@@ -65,14 +84,18 @@ function tfs_json_request(r, json) {
     r.subrequest(uri, options, callback)
 }
 
-function make_tfs_uri(r) {
+function make_tfs_uri(r, with_method) {
     var attributes = parse_custom_attributes(r)
 
     var uri = tfs_base_uri + (attributes['tfs-model-name'] || r.variables.default_tfs_model)
     if ('tfs-model-version' in attributes) {
         uri += '/versions/' + attributes['tfs-model-version']
     }
-    uri += ':' + (attributes['tfs-method'] || 'predict')
+
+    if (with_method) {
+        uri += ':' + (attributes['tfs-method'] || 'predict')
+    }
+
     return uri
 }
 
diff --git a/docker/1.11/Dockerfile.cpu b/docker/1.11/Dockerfile.cpu
@@ -16,4 +16,5 @@ RUN \
 
 COPY ./ /
 
+ENV SAGEMAKER_TFS_VERSION "1.11"
 ENV PATH "$PATH:/sagemaker"
diff --git a/docker/1.11/Dockerfile.gpu b/docker/1.11/Dockerfile.gpu
@@ -55,4 +55,5 @@ RUN \
 
 COPY ./ /
 
+ENV SAGEMAKER_TFS_VERSION "1.11"
 ENV PATH "$PATH:/sagemaker"
diff --git a/docker/1.12/Dockerfile.cpu b/docker/1.12/Dockerfile.cpu
@@ -0,0 +1,20 @@
+FROM tensorflow/serving:1.12.0 as tfs
+FROM ubuntu:16.04
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+
+COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server
+
+# nginx + njs
+RUN \
+    apt-get update && \
+    apt-get -y install --no-install-recommends curl && \
+    curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \
+    echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \
+    apt-get update && \
+    apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip && \
+    apt-get clean
+
+COPY ./ /
+
+ENV SAGEMAKER_TFS_VERSION "1.12"
+ENV PATH "$PATH:/sagemaker"
diff --git a/docker/1.12/Dockerfile.gpu b/docker/1.12/Dockerfile.gpu
@@ -0,0 +1,59 @@
+FROM tensorflow/serving:1.12.0-gpu as tfs
+FROM nvidia/cuda:9.0-base-ubuntu16.04
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+
+COPY --from=tfs /usr/bin/tensorflow_model_server /usr/bin/tensorflow_model_server
+
+# https://github.com/tensorflow/serving/blob/1.12.0/tensorflow_serving/tools/docker/Dockerfile.gpu
+ENV NCCL_VERSION=2.2.13
+ENV CUDNN_VERSION=7.2.1.38
+ENV TF_TENSORRT_VERSION=4.1.2
+
+RUN \
+    apt-get update && apt-get install -y --no-install-recommends \
+        ca-certificates \
+        cuda-command-line-tools-9-0 \
+        cuda-command-line-tools-9-0 \
+        cuda-cublas-9-0 \
+        cuda-cufft-9-0 \
+        cuda-curand-9-0 \
+        cuda-cusolver-9-0 \
+        cuda-cusparse-9-0 \
+        libcudnn7=${CUDNN_VERSION}-1+cuda9.0 \
+        libnccl2=${NCCL_VERSION}-1+cuda9.0 \
+        libgomp1 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0
+# adds a new list which contains libnvinfer library, so it needs another
+# 'apt-get update' to retrieve that list before it can actually install the
+# library.
+# We don't install libnvinfer-dev since we don't need to build against TensorRT,
+# and libnvinfer4 doesn't contain libnvinfer.a static library.
+RUN apt-get update && \
+    apt-get install --no-install-recommends \
+        nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
+    apt-get update && \
+    apt-get install --no-install-recommends \
+        libnvinfer4=${TF_TENSORRT_VERSION}-1+cuda9.0 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* && \
+    rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* && \
+    rm /usr/lib/x86_64-linux-gnu/libnvparsers*
+
+# nginx + njs
+RUN \
+    apt-get update && \
+    apt-get -y install --no-install-recommends curl && \
+    curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - && \
+    echo 'deb http://nginx.org/packages/ubuntu/ xenial nginx' >> /etc/apt/sources.list && \
+    apt-get update && \
+    apt-get -y install --no-install-recommends nginx nginx-module-njs python3 python3-pip && \
+    apt-get clean
+
+COPY ./ /
+
+ENV SAGEMAKER_TFS_VERSION "1.12"
+ENV PATH "$PATH:/sagemaker"
diff --git a/test/integration/test_container.py b/test/integration/test_container.py
@@ -23,16 +23,16 @@
 BASE_URL = 'http://localhost:8080/invocations'
 
 
-@pytest.fixture(scope='module', autouse=True)
-def container():
+@pytest.fixture(scope='module', autouse=True, params=['1.11', '1.12'])
+def container(request):
     model_dir = os.path.abspath('test/resources/models')
     command = 'docker run --name sagemaker-tensorflow-serving-test -v {}:/opt/ml/model:ro -p 8080:8080'.format(
         model_dir)
     command += ' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three'
     command += ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
     command += ' -e SAGEMAKER_BIND_TO_PORT=8080'
     command += ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-    command += ' sagemaker-tensorflow-serving:1.11.1-cpu serve'
+    command += ' sagemaker-tensorflow-serving:{}-cpu serve'.format(request.param)
     proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
 
     attempts = 0
@@ -56,7 +56,7 @@ def make_request(data, content_type='application/json', method='predict'):
             'tfs-model-name=half_plus_three,tfs-method=%s' % method
     }
     response = requests.post(BASE_URL, data=data, headers=headers)
-    return json.loads(response.content)
+    return json.loads(response.content.decode('utf-8'))
 
 
 def test_predict():

Original file line number	Diff line number	Diff line change
`@@ -16,4 +16,5 @@ RUN \`
`16`	`16`
`17`	`17`	`COPY ./ /`
`18`	`18`
	`19`	`+ENV SAGEMAKER_TFS_VERSION "1.11"`
`19`	`20`	`ENV PATH "$PATH:/sagemaker"`
Original file line number	Diff line number	Diff line change
`@@ -55,4 +55,5 @@ RUN \`
`55`	`55`
`56`	`56`	`COPY ./ /`
`57`	`57`
	`58`	`+ENV SAGEMAKER_TFS_VERSION "1.11"`
`58`	`59`	`ENV PATH "$PATH:/sagemaker"`