diff --git a/README.md b/README.md index d4729851..bedb1bea 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,9 @@ For notebook examples, see: [Amazon SageMaker Examples](https://github.com/awsla 3. [Running the tests](#running-the-tests) 4. [Pre/Post-Processing](#pre/post-processing) 5. [Deploying a TensorFlow Serving Model](#deploying-a-tensorflow-serving-model) -6. [Deploying to Multi-Model Endpoint](#deploying-to-multi-model-endpoint) +6. [Enable Batching](#enabling-batching) +7. [Configurable SageMaker Environment Variables](#configurable-sagemaker-environment-variables) +8. [Deploying to Multi-Model Endpoint](#deploying-to-multi-model-endpoint) ## Getting Started @@ -612,6 +614,22 @@ SAGEMAKER_TFS_NUM_BATCH_THREADS="16" SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES="10000" ``` +## Configurable SageMaker Environment Variables +The following environment variables can be set on a SageMaker Model or Transform Job if further configuration is required: + +[Configures](https://docs.gunicorn.org/en/stable/settings.html#loglevel) +the logging level for Gunicorn. +```bash +# Defaults to "info" +SAGEMAKER_GUNICORN_LOGLEVEL="debug" +``` +[Configures](https://docs.gunicorn.org/en/stable/settings.html#timeout) +how long a Gunicorn worker may be silent before it is killed and restarted. +```bash +# Defaults to 30. +SAGEMAKER_GUNICORN_TIMEOUT_SECONDS="60" +``` + ## Deploying to Multi-Model Endpoint SageMaker TensorFlow Serving container (version 1.5.0 and 2.1.0, CPU) now supports Multi-Model Endpoint. With this feature, you can deploy different models (not just different versions of a model) to a single endpoint. diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py index 96d532a0..a31e3e62 100644 --- a/docker/build_artifacts/sagemaker/serve.py +++ b/docker/build_artifacts/sagemaker/serve.py @@ -64,6 +64,9 @@ def __init__(self): self._tfs_inter_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTER_OP_PARALLELISM", 0) self._tfs_intra_op_parallelism = os.environ.get("SAGEMAKER_TFS_INTRA_OP_PARALLELISM", 0) self._gunicorn_worker_class = os.environ.get("SAGEMAKER_GUNICORN_WORKER_CLASS", "gevent") + self._gunicorn_timeout_seconds = int( + os.environ.get("SAGEMAKER_GUNICORN_TIMEOUT_SECONDS", 30) + ) if os.environ.get("OMP_NUM_THREADS") is None: os.environ["OMP_NUM_THREADS"] = "1" @@ -202,7 +205,7 @@ def _setup_gunicorn(self): gunicorn_command = ( "gunicorn -b unix:/tmp/gunicorn.sock -k {} --chdir /sagemaker " - "--workers {} --threads {} --log-level {} " + "--workers {} --threads {} --log-level {} --timeout {} " "{}{} -e TFS_GRPC_PORTS={} -e TFS_REST_PORTS={} " "-e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} " "-e SAGEMAKER_TFS_WAIT_TIME_SECONDS={} " @@ -212,6 +215,7 @@ def _setup_gunicorn(self): self._gunicorn_workers, self._gunicorn_threads, self._gunicorn_loglevel, + self._gunicorn_timeout_seconds, python_path_option, ",".join(python_path_content), self._tfs_grpc_concat_ports, @@ -451,7 +455,7 @@ def start(self): self._setup_gunicorn() self._start_gunicorn() # make sure gunicorn is up - with self._timeout(seconds=30): + with self._timeout(seconds=self._gunicorn_timeout_seconds): self._wait_for_gunicorn() self._start_nginx()