Add gunicorn timeout (#220)

matherit · mseth10 · web-flow · commit a58583d6007e · 2022-03-08T09:22:29.000-06:00
* Add env var that will manage GUnicorn setup timeout. * Ensure env var is treated as innt. * Update readme to include new env var section and update TOC. * Add SAGEMAKER_GUNICORN_LOGLEVEL description to readme. * expose gunicorn logging (#219) * Add in SAGEMAKER_GUNICORN_TIMEOUT_SECONDS. * Update readme with PR feedback. * Remove extra argument in format. * Remove setup timeout. * Make readme links clickable. * Remove boiler plate statements and instead link in descriptive comment of var. * Update capitalization of Gunicorn. Co-authored-by: Manu Seth <22492939+mseth10@users.noreply.github.com>
diff --git a/README.md b/README.md
@@ -42,7 +42,9 @@ For notebook examples, see: [Amazon SageMaker Examples](https://github.com/awsla
 3. [Running the tests](#running-the-tests)
 4. [Pre/Post-Processing](#pre/post-processing)
 5. [Deploying a TensorFlow Serving Model](#deploying-a-tensorflow-serving-model)
-6. [Deploying to Multi-Model Endpoint](#deploying-to-multi-model-endpoint)
+6. [Enable Batching](#enabling-batching)
+7. [Configurable SageMaker Environment Variables](#configurable-sagemaker-environment-variables)
+8. [Deploying to Multi-Model Endpoint](#deploying-to-multi-model-endpoint)
 
 ## Getting Started
 
@@ -612,6 +614,22 @@ SAGEMAKER_TFS_NUM_BATCH_THREADS="16"
 SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES="10000"
 ```
 
+## Configurable SageMaker Environment Variables
+The following environment variables can be set on a SageMaker Model or Transform Job if further configuration is required:
+
+[Configures](https://docs.gunicorn.org/en/stable/settings.html#loglevel)
+the logging level for Gunicorn.
+```bash
+# Defaults to "info"
+SAGEMAKER_GUNICORN_LOGLEVEL="debug"
+```
+[Configures](https://docs.gunicorn.org/en/stable/settings.html#timeout)
+how long a Gunicorn worker may be silent before it is killed and restarted.
+```bash
+# Defaults to 30.
+SAGEMAKER_GUNICORN_TIMEOUT_SECONDS="60"
+```
+
 ## Deploying to Multi-Model Endpoint
 
 SageMaker TensorFlow Serving container (version 1.5.0 and 2.1.0, CPU) now supports Multi-Model Endpoint. With this feature, you can deploy different models (not just different versions of a model) to a single endpoint.
diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py
@@ -64,6 +64,9 @@ def __init__(self):
         self._tfs_inter_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTER_OP_PARALLELISM", 0)
         self._tfs_intra_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTRA_OP_PARALLELISM", 0)
         self._gunicorn_worker_class = os.environ.get("SAGEMAKER_GUNICORN_WORKER_CLASS", "gevent")
+        self._gunicorn_timeout_seconds = int(
+            os.environ.get("SAGEMAKER_GUNICORN_TIMEOUT_SECONDS", 30)
+        )
 
         if os.environ.get("OMP_NUM_THREADS") is None:
             os.environ["OMP_NUM_THREADS"] = "1"
@@ -202,7 +205,7 @@ def _setup_gunicorn(self):
 
         gunicorn_command = (
             "gunicorn -b unix:/tmp/gunicorn.sock -k {} --chdir /sagemaker "
-            "--workers {} --threads {} --log-level {} "
+            "--workers {} --threads {} --log-level {} --timeout {} "
             "{}{} -e TFS_GRPC_PORTS={} -e TFS_REST_PORTS={} "
             "-e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} "
             "-e SAGEMAKER_TFS_WAIT_TIME_SECONDS={} "
@@ -212,6 +215,7 @@ def _setup_gunicorn(self):
             self._gunicorn_workers,
             self._gunicorn_threads,
             self._gunicorn_loglevel,
+            self._gunicorn_timeout_seconds,
             python_path_option,
             ",".join(python_path_content),
             self._tfs_grpc_concat_ports,
@@ -451,7 +455,7 @@ def start(self):
             self._setup_gunicorn()
             self._start_gunicorn()
             # make sure gunicorn is up
-            with self._timeout(seconds=30):
+            with self._timeout(seconds=self._gunicorn_timeout_seconds):
                 self._wait_for_gunicorn()
 
         self._start_nginx()