aws · matherit · Mar 8, 2022 · Mar 3, 2022 · Mar 4, 2022 · Mar 4, 2022
diff --git a/README.md b/README.md
@@ -42,7 +42,9 @@ For notebook examples, see: [Amazon SageMaker Examples](https://github.com/awsla
 3. [Running the tests](#running-the-tests)
 4. [Pre/Post-Processing](#pre/post-processing)
 5. [Deploying a TensorFlow Serving Model](#deploying-a-tensorflow-serving-model)
-6. [Deploying to Multi-Model Endpoint](#deploying-to-multi-model-endpoint)
+6. [Enable Batching](#enabling-batching)
+7. [Configurable SageMaker Environment Variables](#configurable-sagemaker-environment-variables)
+8. [Deploying to Multi-Model Endpoint](#deploying-to-multi-model-endpoint)
 
 ## Getting Started
 
@@ -612,6 +614,23 @@ SAGEMAKER_TFS_NUM_BATCH_THREADS="16"
 SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES="10000"
 ```
 
+## Configurable SageMaker Environment Variables
+The following environment variables can be set on a SageMaker Model or Transform Job if further configuration is required:
+
+```bash
+# Configures the logging level for GUnicorn.
+# When looking to set this environment variable, please refer to:
+# https://docs.gunicorn.org/en/stable/settings.html#loglevel
+# Defaults to "info"
+SAGEMAKER_GUNICORN_LOGLEVEL="debug"
+
+# Configures how long a GUnicorn worker may be silent before it is killed and restarted.
+# When looking to set this environment variable, please refer to:
+# https://docs.gunicorn.org/en/stable/settings.html#timeout
+# Defaults to 30.
+SAGEMAKER_GUNICORN_TIMEOUT_SECONDS="60"
+```
+
 ## Deploying to Multi-Model Endpoint
 
 SageMaker TensorFlow Serving container (version 1.5.0 and 2.1.0, CPU) now supports Multi-Model Endpoint. With this feature, you can deploy different models (not just different versions of a model) to a single endpoint.

diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py
@@ -64,6 +64,9 @@ def __init__(self):
         self._tfs_inter_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTER_OP_PARALLELISM", 0)
         self._tfs_intra_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTRA_OP_PARALLELISM", 0)
         self._gunicorn_worker_class = os.environ.get("SAGEMAKER_GUNICORN_WORKER_CLASS", "gevent")
+        self._gunicorn_timeout_seconds = int(
+            os.environ.get("SAGEMAKER_GUNICORN_TIMEOUT_SECONDS", 30)
+        )
 
         if os.environ.get("OMP_NUM_THREADS") is None:
             os.environ["OMP_NUM_THREADS"] = "1"
@@ -202,7 +205,7 @@ def _setup_gunicorn(self):
 
         gunicorn_command = (
             "gunicorn -b unix:/tmp/gunicorn.sock -k {} --chdir /sagemaker "
-            "--workers {} --threads {} --log-level {} "
+            "--workers {} --threads {} --log-level {} --timeout {} "
             "{}{} -e TFS_GRPC_PORTS={} -e TFS_REST_PORTS={} "
             "-e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} "
             "-e SAGEMAKER_TFS_WAIT_TIME_SECONDS={} "
@@ -212,6 +215,7 @@ def _setup_gunicorn(self):
             self._gunicorn_workers,
             self._gunicorn_threads,
             self._gunicorn_loglevel,
+            self._gunicorn_timeout_seconds,
             python_path_option,
             ",".join(python_path_content),
             self._tfs_grpc_concat_ports,
@@ -451,7 +455,7 @@ def start(self):
             self._setup_gunicorn()
             self._start_gunicorn()
             # make sure gunicorn is up
-            with self._timeout(seconds=30):
+            with self._timeout(seconds=self._gunicorn_timeout_seconds):
                 self._wait_for_gunicorn()
 
         self._start_nginx()