Nginx timeouts (#221)

matherit · web-flow · commit 1bd309b7be50 · 2022-05-16T11:18:57.000-05:00
* Add support for changing nginx proxy_read_timeout.

* Fix space in log message.

* Fix flake8 issue with string construction.

* Fix dockerfiles for build all script.

* Fix dockerfiles for build all script.

* Update log message to include prior Nginx timeout.

* Trigger build.

* Remove old log message.

* Trigger Build again.
diff --git a/README.md b/README.md
@@ -629,6 +629,15 @@ how long a Gunicorn worker may be silent before it is killed and restarted.
 # Defaults to 30.
 SAGEMAKER_GUNICORN_TIMEOUT_SECONDS="60"
 ```
+[Configures](http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_read_timeout)
+the timeout for reading a response from the proxied server.
+Note: If SAGEMAKER_GUNICORN_TIMEOUT_SECONDS is greater, 
+SAGEMAKER_NGINX_PROXY_READ_TIMEOUT_SECONDS will be set to the 
+value of SAGEMAKER_GUNICORN_TIMEOUT_SECONDS.
+```bash
+# Defaults to 60.
+SAGEMAKER_NGINX_PROXY_READ_TIMEOUT_SECONDS="120"
+```
 
 ## Deploying to Multi-Model Endpoint
 
diff --git a/docker/1.15/Dockerfile.gpu b/docker/1.15/Dockerfile.gpu
@@ -16,6 +16,7 @@ ENV LANG=C.UTF-8
 ENV NCCL_VERSION=2.4.7-1+cuda10.0
 ENV CUDNN_VERSION=7.5.1.10-1+cuda10.0
 ENV TF_TENSORRT_VERSION=5.0.2
+ENV TF_TENSORRT_LIB_VERSION=5.1.2
 ENV PYTHONDONTWRITEBYTECODE=1
 # Python won’t try to write .pyc or .pyo files on the import of source modules
 ENV PYTHONUNBUFFERED=1
@@ -27,6 +28,21 @@ ENV MODEL_NAME=model
 # Prevent docker build from getting stopped by request for user interaction
 ENV DEBIAN_FRONTEND=noninteractive
 
+# https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
+# Fix cuda repo's GPG key. Nvidia is no longer updating the machine-learning repo.
+# Need to manually pull and install necessary debs to continue using these versions.
+RUN rm /etc/apt/sources.list.d/cuda.list \
+&& rm /etc/apt/sources.list.d/nvidia-ml.list \
+&& apt-key del 7fa2af80 \
+&& apt-get update && apt-get install -y --no-install-recommends wget \
+&& wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \
+&& dpkg -i cuda-keyring_1.0-1_all.deb \
+&& wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libcudnn7_${CUDNN_VERSION}_amd64.deb \
+&& dpkg -i libcudnn7_${CUDNN_VERSION}_amd64.deb \
+&& wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnccl2_${NCCL_VERSION}_amd64.deb \
+&& dpkg -i libnccl2_${NCCL_VERSION}_amd64.deb \
+&& rm *.deb
+
 RUN apt-get update \
  && apt-get install -y --no-install-recommends \
     ca-certificates \
@@ -36,8 +52,6 @@ RUN apt-get update \
     cuda-curand-10-0 \
     cuda-cusolver-10-0 \
     cuda-cusparse-10-0 \
-    libcudnn7=${CUDNN_VERSION} \
-    libnccl2=${NCCL_VERSION} \
     libgomp1 \
     curl \
     git \
@@ -49,25 +63,6 @@ RUN apt-get update \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
 
-# The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-4.0.1-ga-cuda10.0
-# adds a new list which contains libnvinfer library, so it needs another
-# 'apt-get update' to retrieve that list before it can actually install the
-# library.
-# We don't install libnvinfer-dev since we don't need to build against TensorRT,
-# and libnvinfer4 doesn't contain libnvinfer.a static library.
-# Nvidia does not publish a TensorRT Runtime library for Ubuntu 18.04 with Cuda 10.1 support, so we stick with cuda 10.0.
-RUN apt-get update \
- && apt-get install -y --no-install-recommends \
-    nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0 \
- && apt-get update \
- && apt-get install -y --no-install-recommends \
-    libnvinfer5=${TF_TENSORRT_VERSION}-1+cuda10.0 \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/* \
- && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
- && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
- && rm /usr/lib/x86_64-linux-gnu/libnvparsers*
-
 RUN ${PIP} --no-cache-dir install --upgrade \
     pip \
     setuptools
@@ -106,6 +101,19 @@ RUN ${PIP} install -U --no-cache-dir \
  && ${PIP} install --no-dependencies --no-cache-dir \
     tensorflow-serving-api-gpu==1.15.0
 
+# https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
+# Fix cuda repo's GPG key. Nvidia is no longer updating the machine-learning repo.
+# Need to manually pull and install necessary debs to continue using these versions.
+RUN wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \
+&& dpkg -i nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \
+&& wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnvinfer5_${TF_TENSORRT_LIB_VERSION}-1+cuda10.0_amd64.deb \
+&& dpkg -i libnvinfer5_${TF_TENSORRT_LIB_VERSION}-1+cuda10.0_amd64.deb \
+&& rm *.deb \
+&& rm -rf /var/lib/apt/lists/* \
+&& rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
+&& rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
+&& rm /usr/lib/x86_64-linux-gnu/libnvparsers*
+
 COPY sagemaker /sagemaker
 
 RUN curl ${TF_MODEL_SERVER_SOURCE} -o /usr/bin/tensorflow_model_server \
diff --git a/docker/2.1/Dockerfile.gpu b/docker/2.1/Dockerfile.gpu
@@ -10,7 +10,8 @@ ARG TFS_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/2.1/Serving/GPU/te
 
 ENV NCCL_VERSION=2.4.7-1+cuda10.1
 ENV CUDNN_VERSION=7.6.2.24-1+cuda10.1
-ENV TF_TENSORRT_VERSION=6.0.1
+ENV TF_TENSORRT_VERSION=5.0.2
+ENV TF_TENSORRT_LIB_VERSION=6.0.1
 
 # See http://bugs.python.org/issue19846
 ENV LANG=C.UTF-8
@@ -25,6 +26,21 @@ ENV MODEL_NAME=model
 # Fix for the interactive mode during an install in step 21
 ENV DEBIAN_FRONTEND=noninteractive
 
+# https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
+# Fix cuda repo's GPG key. Nvidia is no longer updating the machine-learning repo.
+# Need to manually pull and install necessary debs to continue using these versions.
+RUN rm /etc/apt/sources.list.d/cuda.list \
+&& rm /etc/apt/sources.list.d/nvidia-ml.list \
+&& apt-key del 7fa2af80 \
+&& apt-get update && apt-get install -y --no-install-recommends wget \
+&& wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \
+&& dpkg -i cuda-keyring_1.0-1_all.deb \
+&& wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libcudnn7_${CUDNN_VERSION}_amd64.deb \
+&& dpkg -i libcudnn7_${CUDNN_VERSION}_amd64.deb \
+&& wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnccl2_${NCCL_VERSION}_amd64.deb \
+&& dpkg -i libnccl2_${NCCL_VERSION}_amd64.deb \
+&& rm *.deb
+
 # allow unauthenticated and allow downgrades for special libcublas library
 RUN apt-get update \
  && apt-get install -y --no-install-recommends --allow-unauthenticated --allow-downgrades\
@@ -37,8 +53,6 @@ RUN apt-get update \
     #cuda-cublas-dev not available with 10-1, install libcublas instead
     libcublas10=10.1.0.105-1 \
     libcublas-dev=10.1.0.105-1 \
-    libcudnn7=${CUDNN_VERSION} \
-    libnccl2=${NCCL_VERSION} \
     libgomp1 \
     curl \
     git \
@@ -52,21 +66,6 @@ RUN apt-get update \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
 
-# The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-4.0.1-ga-cuda10.0
-# adds a new list which contains libnvinfer library, so it needs another
-# 'apt-get update' to retrieve that list before it can actually install the
-# library.
-# We don't install libnvinfer-dev since we don't need to build against TensorRT,
-# and libnvinfer4 doesn't contain libnvinfer.a static library.
-RUN apt-get update \
-# nvinfer-runtime-trt-repo doesn't have a 1804-cuda10.1 version yet. see:
-# https://developer.download.nvidia.cn/compute/machine-learning/repos/ubuntu1804/x86_64/
- && apt-get install -y --no-install-recommends nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 \
- && apt-get update \
- && apt-get install -y --no-install-recommends libnvinfer6=${TF_TENSORRT_VERSION}-1+cuda10.1 \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/* 
-
 RUN ${PIP} --no-cache-dir install --upgrade \
     pip \
     setuptools
@@ -88,6 +87,17 @@ RUN apt-get update \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
 
+# https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212771
+# Nvidia is no longer updating the machine-learning repo.
+# Need to manually pull and install necessary debs to continue using these versions.
+# nvinfer-runtime-trt-repo doesn't have a 1804-cuda10.1 version.
+RUN wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \
+ && dpkg -i nvinfer-runtime-trt-repo-ubuntu1804-${TF_TENSORRT_VERSION}-ga-cuda10.0_1-1_amd64.deb \
+ && wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnvinfer6_${TF_TENSORRT_LIB_VERSION}-1+cuda10.1_amd64.deb \
+ && dpkg -i libnvinfer6_${TF_TENSORRT_LIB_VERSION}-1+cuda10.1_amd64.deb \
+ && rm *.deb \
+ && rm -rf /var/lib/apt/lists/*
+
 # cython, falcon, gunicorn, grpc
 RUN ${PIP} install -U --no-cache-dir \
     boto3 \
diff --git a/docker/build_artifacts/sagemaker/nginx.conf.template b/docker/build_artifacts/sagemaker/nginx.conf.template
@@ -17,6 +17,8 @@ http {
   access_log /dev/stdout combined;
   js_import tensorflowServing.js;
 
+  proxy_read_timeout %PROXY_READ_TIMEOUT%;  
+
   upstream tfs_upstream {
     %TFS_UPSTREAM%;
   }
diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py
@@ -67,6 +67,20 @@ def __init__(self):
         self._gunicorn_timeout_seconds = int(
             os.environ.get("SAGEMAKER_GUNICORN_TIMEOUT_SECONDS", 30)
         )
+        self._nginx_proxy_read_timeout_seconds = int(
+            os.environ.get("SAGEMAKER_NGINX_PROXY_READ_TIMEOUT_SECONDS", 60))
+
+        # Nginx proxy read timeout should not be less than the GUnicorn timeout. If it is, this
+        # can result in upstream time out errors.
+        if self._gunicorn_timeout_seconds > self._nginx_proxy_read_timeout_seconds:
+            log.info(
+                "GUnicorn timeout was higher than Nginx proxy read timeout."
+                " Setting Nginx proxy read timeout from {} seconds to {} seconds"
+                " to match GUnicorn timeout.".format(
+                    self._nginx_proxy_read_timeout_seconds, self._gunicorn_timeout_seconds
+                )
+            )
+            self._nginx_proxy_read_timeout_seconds = self._gunicorn_timeout_seconds
 
         if os.environ.get("OMP_NUM_THREADS") is None:
             os.environ["OMP_NUM_THREADS"] = "1"
@@ -270,6 +284,7 @@ def _create_nginx_config(self):
             "FORWARD_INVOCATION_REQUESTS": GUNICORN_INVOCATIONS
             if self._use_gunicorn
             else JS_INVOCATIONS,
+            "PROXY_READ_TIMEOUT": str(self._nginx_proxy_read_timeout_seconds),
         }
 
         config = pattern.sub(lambda x: template_values[x.group(1)], template)
diff --git a/test/integration/local/test_nginx_config.py b/test/integration/local/test_nginx_config.py
@@ -0,0 +1,119 @@
+# Copyright 2019-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+
+import os
+import subprocess
+
+import pytest
+
+
+@pytest.fixture(scope="session", autouse=True)
+def volume():
+    try:
+        model_dir = os.path.abspath("test/resources/models")
+        subprocess.check_call(
+            "docker volume create --name nginx_model_volume --opt type=none "
+            "--opt device={} --opt o=bind".format(model_dir).split()
+        )
+        yield model_dir
+    finally:
+        subprocess.check_call("docker volume rm nginx_model_volume".split())
+
+
+def test_run_nginx_with_default_parameters(docker_base_name, tag, runtime_config):
+    try:
+        command = (
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source=nginx_model_volume,target=/opt/ml/model,readonly"
+            " {}:{} serve"
+        ).format(runtime_config, docker_base_name, tag)
+
+        proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+        lines_seen = {
+            "error_log  /dev/stderr error;": 0,
+            "proxy_read_timeout 60;": 0,
+        }
+
+        for stdout_line in iter(proc.stdout.readline, ""):
+            stdout_line = str(stdout_line)
+            for line in lines_seen.keys():
+                if line in stdout_line:
+                    lines_seen[line] += 1
+            if "started nginx" in stdout_line:
+                for value in lines_seen.values():
+                    assert value == 1
+                break
+
+    finally:
+        subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
+
+
+def test_run_nginx_with_env_var_parameters(docker_base_name, tag, runtime_config):
+    try:
+        command = (
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source=nginx_model_volume,target=/opt/ml/model,readonly"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_NGINX_PROXY_READ_TIMEOUT_SECONDS=63"
+            " {}:{} serve"
+        ).format(runtime_config, docker_base_name, tag)
+
+        proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+        lines_seen = {
+            "error_log  /dev/stderr info;": 0,
+            "proxy_read_timeout 63;": 0,
+        }
+
+        for stdout_line in iter(proc.stdout.readline, ""):
+            stdout_line = str(stdout_line)
+            for line in lines_seen.keys():
+                if line in stdout_line:
+                    lines_seen[line] += 1
+            if "started nginx" in stdout_line:
+                for value in lines_seen.values():
+                    assert value == 1
+                break
+
+    finally:
+        subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
+
+def test_run_nginx_with_higher_gunicorn_parameter(docker_base_name, tag, runtime_config):
+    try:
+        command = (
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source=nginx_model_volume,target=/opt/ml/model,readonly"
+            " -e SAGEMAKER_NGINX_PROXY_READ_TIMEOUT_SECONDS=60"
+            " -e SAGEMAKER_GUNICORN_TIMEOUT_SECONDS=120"
+            " {}:{} serve"
+        ).format(runtime_config, docker_base_name, tag)
+
+        proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+        lines_seen = {
+            "proxy_read_timeout 120;": 0, # When GUnicorn is higher, set timeout to match.
+        }
+
+        for stdout_line in iter(proc.stdout.readline, ""):
+            stdout_line = str(stdout_line)
+            for line in lines_seen.keys():
+                if line in stdout_line:
+                    lines_seen[line] += 1
+            if "started nginx" in stdout_line:
+                for value in lines_seen.values():
+                    assert value == 1
+                break
+
+    finally:
+        subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())

Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,8 @@ http {`
`17`	`17`	`access_log /dev/stdout combined;`
`18`	`18`	`js_import tensorflowServing.js;`
`19`	`19`
	`20`	`+ proxy_read_timeout %PROXY_READ_TIMEOUT%;`
	`21`	`+`
`20`	`22`	`upstream tfs_upstream {`
`21`	`23`	`%TFS_UPSTREAM%;`
`22`	`24`	`}`