From 6ca91a237547530fcc46d6a36e4175d12863fa9a Mon Sep 17 00:00:00 2001
From: Chuyang Deng <chuyangd@amazon.com>
Date: Tue, 30 Jun 2020 16:14:09 -0700
Subject: [PATCH 1/5] ake quotes consistent

---
 docker/2.1/__init__.py                        |   0
 docker/2.1/deep_learning_container.py         | 109 +++++
 docker/2.1/dockerd-entrypoint.py              |  22 +
 docker/2.1/sagemaker/__init__.py              |  12 +
 docker/2.1/sagemaker/multi_model_utils.py     |  52 +++
 docker/2.1/sagemaker/nginx.conf.template      |  64 +++
 docker/2.1/sagemaker/python_service.py        | 397 ++++++++++++++++++
 docker/2.1/sagemaker/serve                    |   3 +
 docker/2.1/sagemaker/serve.py                 | 308 ++++++++++++++
 docker/2.1/sagemaker/tensorflow-serving.js    | 231 ++++++++++
 docker/2.1/sagemaker/tfs_utils.py             | 209 +++++++++
 .../deep_learning_container.py                |   2 +-
 docker/build_artifacts/dockerd-entrypoint.py  |   2 +-
 .../sagemaker/multi_model_utils.py            |   8 +-
 .../sagemaker/python_service.py               | 142 +++----
 docker/build_artifacts/sagemaker/serve.py     | 198 ++++-----
 .../sagemaker/tensorflow-serving.js           |  84 ++--
 docker/build_artifacts/sagemaker/tfs_utils.py | 104 ++---
 scripts/shared.sh                             |   2 +-
 test/integration/local/conftest.py            |  38 +-
 .../local/multi_model_endpoint_test_utils.py  |  24 +-
 test/integration/local/test_container.py      | 170 ++++----
 .../local/test_multi_model_endpoint.py        | 112 ++---
 .../local/test_pre_post_processing.py         |  78 ++--
 .../local/test_pre_post_processing_mme.py     |  76 ++--
 test/integration/local/test_tfs_batching.py   |  44 +-
 test/integration/sagemaker/conftest.py        | 114 ++---
 test/integration/sagemaker/test_ei.py         |  28 +-
 test/integration/sagemaker/test_tfs.py        |  26 +-
 test/integration/sagemaker/util.py            | 126 +++---
 30 files changed, 2096 insertions(+), 689 deletions(-)
 create mode 100644 docker/2.1/__init__.py
 create mode 100644 docker/2.1/deep_learning_container.py
 create mode 100644 docker/2.1/dockerd-entrypoint.py
 create mode 100644 docker/2.1/sagemaker/__init__.py
 create mode 100644 docker/2.1/sagemaker/multi_model_utils.py
 create mode 100644 docker/2.1/sagemaker/nginx.conf.template
 create mode 100644 docker/2.1/sagemaker/python_service.py
 create mode 100755 docker/2.1/sagemaker/serve
 create mode 100644 docker/2.1/sagemaker/serve.py
 create mode 100644 docker/2.1/sagemaker/tensorflow-serving.js
 create mode 100644 docker/2.1/sagemaker/tfs_utils.py

diff --git a/docker/2.1/__init__.py b/docker/2.1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/docker/2.1/deep_learning_container.py b/docker/2.1/deep_learning_container.py
new file mode 100644
index 00000000..1e82e61e
--- /dev/null
+++ b/docker/2.1/deep_learning_container.py
@@ -0,0 +1,109 @@
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import re
+import json
+import logging
+import requests
+
+
+def _validate_instance_id(instance_id):
+    """
+    Validate instance ID
+    """
+    instance_id_regex = r"^(i-\S{17})"
+    compiled_regex = re.compile(instance_id_regex)
+    match = compiled_regex.match(instance_id)
+
+    if not match:
+        return None
+
+    return match.group(1)
+
+
+def _retrieve_instance_id():
+    """
+    Retrieve instance ID from instance metadata service
+    """
+    instance_id = None
+    url = "http://169.254.169.254/latest/meta-data/instance-id"
+    response = requests_helper(url, timeout=0.1)
+
+    if response is not None:
+        instance_id = _validate_instance_id(response.text)
+
+    return instance_id
+
+
+def _retrieve_instance_region():
+    """
+    Retrieve instance region from instance metadata service
+    """
+    region = None
+    valid_regions = ['ap-northeast-1', 'ap-northeast-2', 'ap-southeast-1', 'ap-southeast-2',
+                     'ap-south-1', 'ca-central-1', 'eu-central-1', 'eu-north-1',
+                     'eu-west-1', 'eu-west-2', 'eu-west-3', 'sa-east-1',
+                     'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2']
+
+    url = "http://169.254.169.254/latest/dynamic/instance-identity/document"
+    response = requests_helper(url, timeout=0.1)
+
+    if response is not None:
+        response_json = json.loads(response.text)
+
+        if response_json['region'] in valid_regions:
+            region = response_json['region']
+
+    return region
+
+
+def query_bucket():
+    """
+    GET request on an empty object from an Amazon S3 bucket
+    """
+    response = None
+    instance_id = _retrieve_instance_id()
+    region = _retrieve_instance_region()
+
+    if instance_id is not None and region is not None:
+        url = ("https://aws-deep-learning-containers-{0}.s3.{0}.amazonaws.com"
+               "/dlc-containers.txt?x-instance-id={1}".format(region, instance_id))
+        response = requests_helper(url, timeout=0.2)
+
+    logging.debug("Query bucket finished: {}".format(response))
+
+    return response
+
+
+def requests_helper(url, timeout):
+    response = None
+    try:
+        response = requests.get(url, timeout=timeout)
+    except requests.exceptions.RequestException as e:
+        logging.error("Request exception: {}".format(e))
+
+    return response
+
+
+def main():
+    """
+    Invoke bucket query
+    """
+    # Logs are not necessary for normal run. Remove this line while debugging.
+    logging.getLogger().disabled = True
+
+    logging.basicConfig(level=logging.ERROR)
+    query_bucket()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/docker/2.1/dockerd-entrypoint.py b/docker/2.1/dockerd-entrypoint.py
new file mode 100644
index 00000000..fc4ce388
--- /dev/null
+++ b/docker/2.1/dockerd-entrypoint.py
@@ -0,0 +1,22 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+
+import os.path
+import subprocess
+import shlex
+import sys
+
+if not os.path.exists("/opt/ml/input/config"):
+    subprocess.call(["python", "/usr/local/bin/deep_learning_container.py", "&>/dev/null", "&"])
+
+subprocess.check_call(shlex.split(' '.join(sys.argv[1:])))
diff --git a/docker/2.1/sagemaker/__init__.py b/docker/2.1/sagemaker/__init__.py
new file mode 100644
index 00000000..04fbf5d9
--- /dev/null
+++ b/docker/2.1/sagemaker/__init__.py
@@ -0,0 +1,12 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
diff --git a/docker/2.1/sagemaker/multi_model_utils.py b/docker/2.1/sagemaker/multi_model_utils.py
new file mode 100644
index 00000000..5d2c47f4
--- /dev/null
+++ b/docker/2.1/sagemaker/multi_model_utils.py
@@ -0,0 +1,52 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import fcntl
+import signal
+import time
+from contextlib import contextmanager
+
+MODEL_CONFIG_FILE = "/sagemaker/model-config.cfg"
+DEFAULT_LOCK_FILE = "/sagemaker/lock-file.lock"
+
+
+@contextmanager
+def lock(path=DEFAULT_LOCK_FILE):
+    f = open(path, "w")
+    fd = f.fileno()
+    fcntl.lockf(fd, fcntl.LOCK_EX)
+
+    try:
+        yield
+    finally:
+        time.sleep(1)
+        fcntl.lockf(fd, fcntl.LOCK_UN)
+
+
+@contextmanager
+def timeout(seconds=60):
+    def _raise_timeout_error(signum, frame):
+        raise Exception(408, "Timed out after {} seconds".format(seconds))
+
+    try:
+        signal.signal(signal.SIGALRM, _raise_timeout_error)
+        signal.alarm(seconds)
+        yield
+    finally:
+        signal.alarm(0)
+
+
+class MultiModelException(Exception):
+    def __init__(self, code, msg):
+        Exception.__init__(self, code, msg)
+        self.code = code
+        self.msg = msg
diff --git a/docker/2.1/sagemaker/nginx.conf.template b/docker/2.1/sagemaker/nginx.conf.template
new file mode 100644
index 00000000..5ccfed3d
--- /dev/null
+++ b/docker/2.1/sagemaker/nginx.conf.template
@@ -0,0 +1,64 @@
+load_module modules/ngx_http_js_module.so;
+
+worker_processes auto;
+daemon off;
+pid /tmp/nginx.pid;
+error_log  /dev/stderr %NGINX_LOG_LEVEL%;
+
+worker_rlimit_nofile 4096;
+
+events {
+  worker_connections 2048;
+}
+
+http {
+  include /etc/nginx/mime.types;
+  default_type application/json;
+  access_log /dev/stdout combined;
+  js_include tensorflow-serving.js;
+
+  upstream tfs_upstream {
+    server localhost:%TFS_REST_PORT%;
+  }
+
+  upstream gunicorn_upstream {
+    server unix:/tmp/gunicorn.sock fail_timeout=1;
+  }
+
+  server {
+    listen %NGINX_HTTP_PORT% deferred;
+    client_max_body_size 0;
+    client_body_buffer_size 100m;
+    subrequest_output_buffer_size 100m;
+
+    set $tfs_version %TFS_VERSION%;
+    set $default_tfs_model %TFS_DEFAULT_MODEL_NAME%;
+
+    location /tfs {
+        rewrite ^/tfs/(.*) /$1  break;
+        proxy_redirect off;
+        proxy_pass_request_headers off;
+        proxy_set_header Content-Type 'application/json';
+        proxy_set_header Accept 'application/json';
+        proxy_pass http://tfs_upstream;
+    }
+
+    location /ping {
+        %FORWARD_PING_REQUESTS%;
+    }
+
+    location /invocations {
+        %FORWARD_INVOCATION_REQUESTS%;
+    }
+
+    location /models {
+        proxy_pass http://gunicorn_upstream/models;
+    }
+
+    location / {
+        return 404 '{"error": "Not Found"}';
+    }
+
+    keepalive_timeout 3;
+  }
+}
diff --git a/docker/2.1/sagemaker/python_service.py b/docker/2.1/sagemaker/python_service.py
new file mode 100644
index 00000000..0014b6dd
--- /dev/null
+++ b/docker/2.1/sagemaker/python_service.py
@@ -0,0 +1,397 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import bisect
+import importlib.util
+import json
+import logging
+import os
+import subprocess
+import time
+
+import falcon
+import requests
+
+from multi_model_utils import lock, timeout, MultiModelException
+import tfs_utils
+
+SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get("SAGEMAKER_MULTI_MODEL", "false").lower() == "true"
+INFERENCE_SCRIPT_PATH = "/opt/ml/{}/code/inference.py".format("models"
+                                                              if SAGEMAKER_MULTI_MODEL_ENABLED
+                                                              else "model")
+PYTHON_PROCESSING_ENABLED = os.path.exists(INFERENCE_SCRIPT_PATH)
+SAGEMAKER_BATCHING_ENABLED = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower()
+MODEL_CONFIG_FILE_PATH = "/sagemaker/model-config.cfg"
+TFS_GRPC_PORT = os.environ.get("TFS_GRPC_PORT")
+TFS_REST_PORT = os.environ.get("TFS_REST_PORT")
+SAGEMAKER_TFS_PORT_RANGE = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE")
+
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes"
+
+
+def default_handler(data, context):
+    """A default inference request handler that directly send post request to TFS rest port with
+    un-processed data and return un-processed response
+
+    :param data: input data
+    :param context: context instance that contains tfs_rest_uri
+    :return: inference response from TFS model server
+    """
+    response = requests.post(context.rest_uri, data=data)
+    return response.content, context.accept_header
+
+
+class PythonServiceResource:
+
+    def __init__(self):
+        if SAGEMAKER_MULTI_MODEL_ENABLED:
+            self._model_tfs_rest_port = {}
+            self._model_tfs_grpc_port = {}
+            self._model_tfs_pid = {}
+            self._tfs_ports = self._parse_sagemaker_port_range(SAGEMAKER_TFS_PORT_RANGE)
+        else:
+            self._tfs_grpc_port = TFS_GRPC_PORT
+            self._tfs_rest_port = TFS_REST_PORT
+
+        self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true"
+        self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None")
+
+        if PYTHON_PROCESSING_ENABLED:
+            self._handler, self._input_handler, self._output_handler = self._import_handlers()
+            self._handlers = self._make_handler(self._handler,
+                                                self._input_handler,
+                                                self._output_handler)
+        else:
+            self._handlers = default_handler
+
+    def on_post(self, req, res, model_name=None):
+        log.info(req.uri)
+        if model_name or "invocations" in req.uri:
+            self._handle_invocation_post(req, res, model_name)
+        else:
+            data = json.loads(req.stream.read().decode("utf-8"))
+            self._handle_load_model_post(res, data)
+
+    def _parse_sagemaker_port_range(self, port_range):
+        lower, upper = port_range.split('-')
+        lower = int(lower)
+        upper = lower + int((int(upper) - lower) * 0.9)  # only utilizing 90% of the ports
+        rest_port = lower
+        grpc_port = (lower + upper) // 2
+        tfs_ports = {
+            "rest_port": [port for port in range(rest_port, grpc_port)],
+            "grpc_port": [port for port in range(grpc_port, upper)],
+        }
+        return tfs_ports
+
+    def _ports_available(self):
+        with lock():
+            rest_ports = self._tfs_ports["rest_port"]
+            grpc_ports = self._tfs_ports["grpc_port"]
+        return len(rest_ports) > 0 and len(grpc_ports) > 0
+
+    def _handle_load_model_post(self, res, data):  # noqa: C901
+        model_name = data["model_name"]
+        base_path = data["url"]
+
+        # model is already loaded
+        if model_name in self._model_tfs_pid:
+            res.status = falcon.HTTP_409
+            res.body = json.dumps({
+                "error": "Model {} is already loaded.".format(model_name)
+            })
+
+        # check if there are available ports
+        if not self._ports_available():
+            res.status = falcon.HTTP_507
+            res.body = json.dumps({
+                "error": "Memory exhausted: no available ports to load the model."
+            })
+        with lock():
+            self._model_tfs_rest_port[model_name] = self._tfs_ports["rest_port"].pop()
+            self._model_tfs_grpc_port[model_name] = self._tfs_ports["grpc_port"].pop()
+
+        # validate model files are in the specified base_path
+        if self.validate_model_dir(base_path):
+            try:
+                tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path)
+                tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format(model_name)
+                log.info("tensorflow serving model config: \n%s\n", tfs_config)
+                os.makedirs(os.path.dirname(tfs_config_file))
+                with open(tfs_config_file, "w") as f:
+                    f.write(tfs_config)
+
+                batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format(
+                    model_name)
+                if self._tfs_enable_batching:
+                    tfs_utils.create_batching_config(batching_config_file)
+
+                cmd = tfs_utils.tfs_command(
+                    self._model_tfs_grpc_port[model_name],
+                    self._model_tfs_rest_port[model_name],
+                    tfs_config_file,
+                    self._tfs_enable_batching,
+                    batching_config_file,
+                )
+                p = subprocess.Popen(cmd.split())
+                self._wait_for_model(model_name)
+
+                log.info("started tensorflow serving (pid: %d)", p.pid)
+                # update model name <-> tfs pid map
+                self._model_tfs_pid[model_name] = p
+
+                res.status = falcon.HTTP_200
+                res.body = json.dumps({
+                    "success":
+                        "Successfully loaded model {}, "
+                        "listening on rest port {} "
+                        "and grpc port {}.".format(model_name,
+                                                   self._model_tfs_rest_port,
+                                                   self._model_tfs_grpc_port,)
+                })
+            except MultiModelException as multi_model_exception:
+                self._cleanup_config_file(tfs_config_file)
+                self._cleanup_config_file(batching_config_file)
+                if multi_model_exception.code == 409:
+                    res.status = falcon.HTTP_409
+                    res.body = multi_model_exception.msg
+                elif multi_model_exception.code == 408:
+                    res.status = falcon.HTTP_408
+                    res.body = multi_model_exception.msg
+                else:
+                    raise MultiModelException(falcon.HTTP_500, multi_model_exception.msg)
+            except FileExistsError as e:
+                res.status = falcon.HTTP_409
+                res.body = json.dumps({
+                    "error": "Model {} is already loaded. {}".format(model_name, str(e))
+                })
+            except OSError as os_error:
+                self._cleanup_config_file(tfs_config_file)
+                self._cleanup_config_file(batching_config_file)
+                if os_error.errno == 12:
+                    raise MultiModelException(falcon.HTTP_507,
+                                              "Memory exhausted: "
+                                              "not enough memory to start TFS instance")
+                else:
+                    raise MultiModelException(falcon.HTTP_500, os_error.strerror)
+        else:
+            res.status = falcon.HTTP_404
+            res.body = json.dumps({
+                "error":
+                    "Could not find valid base path {} for servable {}".format(base_path,
+                                                                               model_name)
+            })
+
+    def _cleanup_config_file(self, config_file):
+        if os.path.exists(config_file):
+            os.remove(config_file)
+
+    def _wait_for_model(self, model_name):
+        url = "http://localhost:{}/v1/models/{}".format(self._model_tfs_rest_port[model_name],
+                                                        model_name)
+        with timeout():
+            while True:
+                time.sleep(0.5)
+                try:
+                    response = requests.get(url)
+                    if response.status_code == 200:
+                        versions = json.loads(response.content)["model_version_status"]
+                        if all(version["state"] == "AVAILABLE" for version in versions):
+                            break
+                except ConnectionError:
+                    log.exception("Failed to load models.")
+
+    def _handle_invocation_post(self, req, res, model_name=None):
+        if SAGEMAKER_MULTI_MODEL_ENABLED:
+            if model_name:
+                if model_name not in self._model_tfs_rest_port:
+                    res.status = falcon.HTTP_404
+                    res.body = json.dumps({
+                        "error": "Model {} is not loaded yet.".format(model_name)
+                    })
+                    return
+                else:
+                    log.info("model name: {}".format(model_name))
+                    rest_port = self._model_tfs_rest_port[model_name]
+                    log.info("rest port: {}".format(str(self._model_tfs_rest_port[model_name])))
+                    grpc_port = self._model_tfs_grpc_port[model_name]
+                    log.info("grpc port: {}".format(str(self._model_tfs_grpc_port[model_name])))
+                    data, context = tfs_utils.parse_request(req, rest_port, grpc_port,
+                                                            self._tfs_default_model_name,
+                                                            model_name)
+            else:
+                res.status = falcon.HTTP_400
+                res.body = json.dumps({
+                    "error": "Invocation request does not contain model name."
+                })
+        else:
+            data, context = tfs_utils.parse_request(req, self._tfs_rest_port, self._tfs_grpc_port,
+                                                    self._tfs_default_model_name)
+
+        try:
+            res.status = falcon.HTTP_200
+            res.body, res.content_type = self._handlers(data, context)
+        except Exception as e:  # pylint: disable=broad-except
+            log.exception("exception handling request: {}".format(e))
+            res.status = falcon.HTTP_500
+            res.body = json.dumps({
+                "error": str(e)
+            }).encode("utf-8")  # pylint: disable=E1101
+
+    def _import_handlers(self):
+        spec = importlib.util.spec_from_file_location("inference", INFERENCE_SCRIPT_PATH)
+        inference = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(inference)
+
+        _custom_handler, _custom_input_handler, _custom_output_handler = None, None, None
+        if hasattr(inference, "handler"):
+            _custom_handler = inference.handler
+        elif hasattr(inference, "input_handler") and hasattr(inference, "output_handler"):
+            _custom_input_handler = inference.input_handler
+            _custom_output_handler = inference.output_handler
+        else:
+            raise NotImplementedError("Handlers are not implemented correctly in user script.")
+
+        return _custom_handler, _custom_input_handler, _custom_output_handler
+
+    def _make_handler(self, custom_handler, custom_input_handler, custom_output_handler):
+        if custom_handler:
+            return custom_handler
+
+        def handler(data, context):
+            processed_input = custom_input_handler(data, context)
+            response = requests.post(context.rest_uri, data=processed_input)
+            return custom_output_handler(response, context)
+
+        return handler
+
+    def on_get(self, req, res, model_name=None):  # pylint: disable=W0613
+        if model_name is None:
+            models_info = {}
+            uri = "http://localhost:{}/v1/models/{}"
+            for model, port in self._model_tfs_rest_port.items():
+                try:
+                    info = json.loads(requests.get(uri.format(port, model)).content)
+                    models_info[model] = info
+                except ValueError as e:
+                    log.exception("exception handling request: {}".format(e))
+                    res.status = falcon.HTTP_500
+                    res.body = json.dumps({
+                        "error": str(e)
+                    }).encode("utf-8")
+            res.status = falcon.HTTP_200
+            res.body = json.dumps(models_info)
+        else:
+            if model_name not in self._model_tfs_rest_port:
+                res.status = falcon.HTTP_404
+                res.body = json.dumps({
+                    "error": "Model {} is loaded yet.".format(model_name)
+                }).encode("utf-8")
+            else:
+                port = self._model_tfs_rest_port[model_name]
+                uri = "http://localhost:{}/v1/models/{}".format(port, model_name)
+                try:
+                    info = requests.get(uri)
+                    res.status = falcon.HTTP_200
+                    res.body = json.dumps({
+                        "model": info
+                    }).encode("utf-8")
+                except ValueError as e:
+                    log.exception("exception handling GET models request.")
+                    res.status = falcon.HTTP_500
+                    res.body = json.dumps({
+                        "error": str(e)
+                    }).encode("utf-8")
+
+    def on_delete(self, req, res, model_name):  # pylint: disable=W0613
+        if model_name not in self._model_tfs_pid:
+            res.status = falcon.HTTP_404
+            res.body = json.dumps({
+                "error": "Model {} is not loaded yet".format(model_name)
+            })
+        else:
+            try:
+                self._model_tfs_pid[model_name].kill()
+                os.remove("/sagemaker/tfs-config/{}/model-config.cfg".format(model_name))
+                os.rmdir("/sagemaker/tfs-config/{}".format(model_name))
+                release_rest_port = self._model_tfs_rest_port[model_name]
+                release_grpc_port = self._model_tfs_grpc_port[model_name]
+                with lock():
+                    bisect.insort(self._tfs_ports["rest_port"], release_rest_port)
+                    bisect.insort(self._tfs_ports["grpc_port"], release_grpc_port)
+                del self._model_tfs_rest_port[model_name]
+                del self._model_tfs_grpc_port[model_name]
+                del self._model_tfs_pid[model_name]
+                res.status = falcon.HTTP_200
+                res.body = json.dumps({
+                    "success": "Successfully unloaded model {}.".format(model_name)
+                })
+            except OSError as error:
+                res.status = falcon.HTTP_500
+                res.body = json.dumps({
+                    "error": str(error)
+                }).encode("utf-8")
+
+    def validate_model_dir(self, model_path):
+        # model base path doesn't exits
+        if not os.path.exists(model_path):
+            return False
+        versions = []
+        for _, dirs, _ in os.walk(model_path):
+            for dirname in dirs:
+                log.info("dirname: {}".format(dirname))
+                if dirname.isdigit():
+                    versions.append(dirname)
+        return self.validate_model_versions(versions)
+
+    def validate_model_versions(self, versions):
+        log.info(versions)
+        if not versions:
+            return False
+        for v in versions:
+            if v.isdigit():
+                # TensorFlow model server will succeed with any versions found
+                # even if there are directories that's not a valid model version,
+                # the loading will succeed.
+                return True
+        return False
+
+
+class PingResource:
+    def on_get(self, req, res):  # pylint: disable=W0613
+        res.status = falcon.HTTP_200
+
+
+class ServiceResources:
+    def __init__(self):
+        self._enable_python_processing = PYTHON_PROCESSING_ENABLED
+        self._enable_model_manager = SAGEMAKER_MULTI_MODEL_ENABLED
+        self._python_service_resource = PythonServiceResource()
+        self._ping_resource = PingResource()
+
+    def add_routes(self, application):
+        application.add_route("/ping", self._ping_resource)
+        application.add_route("/invocations", self._python_service_resource)
+
+        if self._enable_model_manager:
+            application.add_route("/models", self._python_service_resource)
+            application.add_route("/models/{model_name}", self._python_service_resource)
+            application.add_route("/models/{model_name}/invoke", self._python_service_resource)
+
+
+app = falcon.API()
+resources = ServiceResources()
+resources.add_routes(app)
diff --git a/docker/2.1/sagemaker/serve b/docker/2.1/sagemaker/serve
new file mode 100755
index 00000000..9fac6a93
--- /dev/null
+++ b/docker/2.1/sagemaker/serve
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python3 /sagemaker/serve.py
diff --git a/docker/2.1/sagemaker/serve.py b/docker/2.1/sagemaker/serve.py
new file mode 100644
index 00000000..7a539fe6
--- /dev/null
+++ b/docker/2.1/sagemaker/serve.py
@@ -0,0 +1,308 @@
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+
+import logging
+import os
+import re
+import signal
+import subprocess
+import tfs_utils
+
+from contextlib import contextmanager
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+JS_PING = "js_content ping"
+JS_INVOCATIONS = "js_content invocations"
+GUNICORN_PING = "proxy_pass http://gunicorn_upstream/ping"
+GUNICORN_INVOCATIONS = "proxy_pass http://gunicorn_upstream/invocations"
+
+PYTHON_LIB_PATH = "/opt/ml/model/code/lib"
+REQUIREMENTS_PATH = "/opt/ml/model/code/requirements.txt"
+INFERENCE_PATH = "/opt/ml/model/code/inference.py"
+
+
+class ServiceManager(object):
+    def __init__(self):
+        self._state = "initializing"
+        self._nginx = None
+        self._tfs = None
+        self._gunicorn = None
+        self._gunicorn_command = None
+        self._enable_python_service = os.path.exists(INFERENCE_PATH)
+        self._tfs_version = os.environ.get("SAGEMAKER_TFS_VERSION", "1.13")
+        self._nginx_http_port = os.environ.get("SAGEMAKER_BIND_TO_PORT", "8080")
+        self._nginx_loglevel = os.environ.get("SAGEMAKER_TFS_NGINX_LOGLEVEL", "error")
+        self._tfs_default_model_name = os.environ.get("SAGEMAKER_TFS_DEFAULT_MODEL_NAME", "None")
+        self._sagemaker_port_range = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE", None)
+        self._tfs_config_path = "/sagemaker/model-config.cfg"
+        self._tfs_batching_config_path = "/sagemaker/batching-config.cfg"
+
+        _enable_batching = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower()
+        _enable_multi_model_endpoint = os.environ.get("SAGEMAKER_MULTI_MODEL",
+                                                      "false").lower()
+
+        if _enable_batching not in ["true", "false"]:
+            raise ValueError("SAGEMAKER_TFS_ENABLE_BATCHING must be 'true' or 'false'")
+        self._tfs_enable_batching = _enable_batching == "true"
+
+        if _enable_multi_model_endpoint not in ["true", "false"]:
+            raise ValueError("SAGEMAKER_MULTI_MODEL must be 'true' or 'false'")
+        self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == "true"
+
+        self._use_gunicorn = self._enable_python_service or self._tfs_enable_multi_model_endpoint
+
+        if self._sagemaker_port_range is not None:
+            parts = self._sagemaker_port_range.split("-")
+            low = int(parts[0])
+            hi = int(parts[1])
+            if low + 2 > hi:
+                raise ValueError("not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})"
+                                 .format(self._sagemaker_port_range))
+            self._tfs_grpc_port = str(low)
+            self._tfs_rest_port = str(low + 1)
+        else:
+            # just use the standard default ports
+            self._tfs_grpc_port = "9000"
+            self._tfs_rest_port = "8501"
+
+        # set environment variable for python service
+        os.environ["TFS_GRPC_PORT"] = self._tfs_grpc_port
+        os.environ["TFS_REST_PORT"] = self._tfs_rest_port
+
+    def _create_tfs_config(self):
+        models = tfs_utils.find_models()
+        if not models:
+            raise ValueError("no SavedModel bundles found!")
+
+        if self._tfs_default_model_name == "None":
+            default_model = os.path.basename(models[0])
+            if default_model:
+                self._tfs_default_model_name = default_model
+                log.info("using default model name: {}".format(self._tfs_default_model_name))
+            else:
+                log.info("no default model detected")
+
+        # config (may) include duplicate 'config' keys, so we can't just dump a dict
+        config = "model_config_list: {\n"
+        for m in models:
+            config += "  config: {\n"
+            config += "    name: '{}',\n".format(os.path.basename(m))
+            config += "    base_path: '{}',\n".format(m)
+            config += "    model_platform: 'tensorflow'\n"
+            config += "  }\n"
+        config += "}\n"
+
+        log.info("tensorflow serving model config: \n%s\n", config)
+
+        with open("/sagemaker/model-config.cfg", "w") as f:
+            f.write(config)
+
+    def _setup_gunicorn(self):
+        python_path_content = []
+        python_path_option = ""
+
+        if self._enable_python_service:
+            lib_path_exists = os.path.exists(PYTHON_LIB_PATH)
+            requirements_exists = os.path.exists(REQUIREMENTS_PATH)
+            python_path_content = ["/opt/ml/model/code"]
+            python_path_option = "--pythonpath "
+
+            if lib_path_exists:
+                python_path_content.append(PYTHON_LIB_PATH)
+
+            if requirements_exists:
+                if lib_path_exists:
+                    log.warning("loading modules in '{}', ignoring requirements.txt"
+                                .format(PYTHON_LIB_PATH))
+                else:
+                    log.info("installing packages from requirements.txt...")
+                    pip_install_cmd = "pip3 install -r {}".format(REQUIREMENTS_PATH)
+                    try:
+                        subprocess.check_call(pip_install_cmd.split())
+                    except subprocess.CalledProcessError:
+                        log.error("failed to install required packages, exiting.")
+                        self._stop()
+                        raise ChildProcessError("failed to install required packages.")
+
+        gunicorn_command = (
+            "gunicorn -b unix:/tmp/gunicorn.sock -k gevent --chdir /sagemaker "
+            "{}{} -e TFS_GRPC_PORT={} -e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} "
+            "python_service:app").format(python_path_option, ",".join(python_path_content),
+                                         self._tfs_grpc_port, self._tfs_enable_multi_model_endpoint,
+                                         self._sagemaker_port_range)
+
+        log.info("gunicorn command: {}".format(gunicorn_command))
+        self._gunicorn_command = gunicorn_command
+
+    def _create_nginx_config(self):
+        template = self._read_nginx_template()
+        pattern = re.compile(r"%(\w+)%")
+
+        template_values = {
+            "TFS_VERSION": self._tfs_version,
+            "TFS_REST_PORT": self._tfs_rest_port,
+            "TFS_DEFAULT_MODEL_NAME": self._tfs_default_model_name,
+            "NGINX_HTTP_PORT": self._nginx_http_port,
+            "NGINX_LOG_LEVEL": self._nginx_loglevel,
+            "FORWARD_PING_REQUESTS": GUNICORN_PING if self._use_gunicorn else JS_PING,
+            "FORWARD_INVOCATION_REQUESTS": GUNICORN_INVOCATIONS if self._use_gunicorn
+            else JS_INVOCATIONS,
+        }
+
+        config = pattern.sub(lambda x: template_values[x.group(1)], template)
+        log.info("nginx config: \n%s\n", config)
+
+        with open("/sagemaker/nginx.conf", "w") as f:
+            f.write(config)
+
+    def _read_nginx_template(self):
+        with open("/sagemaker/nginx.conf.template", "r") as f:
+            template = f.read()
+            if not template:
+                raise ValueError("failed to read nginx.conf.template")
+
+            return template
+
+    def _start_tfs(self):
+        self._log_version("tensorflow_model_server --version', 'tensorflow version info:")
+        cmd = tfs_utils.tfs_command(
+            self._tfs_grpc_port,
+            self._tfs_rest_port,
+            self._tfs_config_path,
+            self._tfs_enable_batching,
+            self._tfs_batching_config_path,
+        )
+        log.info("tensorflow serving command: {}".format(cmd))
+        p = subprocess.Popen(cmd.split())
+        log.info("started tensorflow serving (pid: %d)", p.pid)
+        self._tfs = p
+
+    def _start_gunicorn(self):
+        self._log_version("gunicorn --version", "gunicorn version info:")
+        env = os.environ.copy()
+        env["TFS_DEFAULT_MODEL_NAME"] = self._tfs_default_model_name
+        p = subprocess.Popen(self._gunicorn_command.split(), env=env)
+        log.info("started gunicorn (pid: %d)", p.pid)
+        self._gunicorn = p
+
+    def _start_nginx(self):
+        self._log_version("/usr/sbin/nginx -V", "nginx version info:")
+        p = subprocess.Popen("/usr/sbin/nginx -c /sagemaker/nginx.conf".split())
+        log.info("started nginx (pid: %d)", p.pid)
+        self._nginx = p
+
+    def _log_version(self, command, message):
+        try:
+            output = subprocess.check_output(
+                command.split(),
+                stderr=subprocess.STDOUT).decode("utf-8", "backslashreplace").strip()
+            log.info("{}\n{}".format(message, output))
+        except subprocess.CalledProcessError:
+            log.warning("failed to run command: %s", command)
+
+    def _stop(self, *args):  # pylint: disable=W0613
+        self._state = "stopping"
+        log.info("stopping services")
+        try:
+            os.kill(self._nginx.pid, signal.SIGQUIT)
+        except OSError:
+            pass
+        try:
+            if self._gunicorn:
+                os.kill(self._gunicorn.pid, signal.SIGTERM)
+        except OSError:
+            pass
+        try:
+            os.kill(self._tfs.pid, signal.SIGTERM)
+        except OSError:
+            pass
+
+        self._state = "stopped"
+        log.info("stopped")
+
+    def _wait_for_gunicorn(self):
+        while True:
+            if os.path.exists("/tmp/gunicorn.sock"):
+                log.info("gunicorn server is ready!")
+                return
+
+    @contextmanager
+    def _timeout(self, seconds):
+        def _raise_timeout_error(signum, frame):
+            raise TimeoutError("time out after {} seconds".format(seconds))
+
+        try:
+            signal.signal(signal.SIGALRM, _raise_timeout_error)
+            signal.alarm(seconds)
+            yield
+        finally:
+            signal.alarm(0)
+
+    def start(self):
+        log.info("starting services")
+        self._state = "starting"
+        signal.signal(signal.SIGTERM, self._stop)
+
+        self._create_nginx_config()
+
+        if self._tfs_enable_batching:
+            log.info("batching is enabled")
+            tfs_utils.create_batching_config(self._tfs_batching_config_path)
+
+        if self._tfs_enable_multi_model_endpoint:
+            log.info("multi-model endpoint is enabled, TFS model servers will be started later")
+        else:
+            tfs_utils.create_tfs_config(
+                self._tfs_default_model_name,
+                self._tfs_config_path
+            )
+            self._create_tfs_config()
+            self._start_tfs()
+
+        if self._use_gunicorn:
+            self._setup_gunicorn()
+            self._start_gunicorn()
+            # make sure gunicorn is up
+            with self._timeout(seconds=30):
+                self._wait_for_gunicorn()
+
+        self._start_nginx()
+        self._state = "started"
+
+        while True:
+            pid, status = os.wait()
+
+            if self._state != "started":
+                break
+
+            if pid == self._nginx.pid:
+                log.warning("unexpected nginx exit (status: {}). restarting.".format(status))
+                self._start_nginx()
+
+            elif pid == self._tfs.pid:
+                log.warning(
+                    "unexpected tensorflow serving exit (status: {}). restarting.".format(status))
+                self._start_tfs()
+
+            elif self._gunicorn and pid == self._gunicorn.pid:
+                log.warning("unexpected gunicorn exit (status: {}). restarting."
+                            .format(status))
+                self._start_gunicorn()
+
+        self._stop()
+
+
+if __name__ == "__main__":
+    ServiceManager().start()
diff --git a/docker/2.1/sagemaker/tensorflow-serving.js b/docker/2.1/sagemaker/tensorflow-serving.js
new file mode 100644
index 00000000..fdce4472
--- /dev/null
+++ b/docker/2.1/sagemaker/tensorflow-serving.js
@@ -0,0 +1,231 @@
+var tfs_base_uri = "/tfs/v1/models/"
+var custom_attributes_header = "X-Amzn-SageMaker-Custom-Attributes"
+
+function invocations(r) {
+    var ct = r.headersIn["Content-Type"]
+
+    if ("application/json" == ct || "application/jsonlines" == ct || "application/jsons" == ct) {
+        json_request(r)
+    } else if ("text/csv" == ct) {
+        csv_request(r)
+    } else {
+        return_error(r, 415, "Unsupported Media Type: " + (ct || "Unknown"))
+    }
+}
+
+function ping(r) {
+    var uri = make_tfs_uri(r, false)
+
+    function callback (reply) {
+        if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) {
+            r.return(200)
+        } else {
+            r.error("failed ping" + reply.responseBody)
+            r.return(502)
+        }
+    }
+
+    r.subrequest(uri, callback)
+}
+
+function ping_without_model(r) {
+    // hack for TF 1.11 and MME
+    // for TF 1.11, send an arbitrary fixed request to the default model.
+    // if response is 400, the model is ok (but input was bad), so return 200
+    // for MME, the default model name is None and does not exist
+    // also return 200 in unlikely case our request was really valid
+
+    var uri = make_tfs_uri(r, true)
+    var options = {
+        method: "POST",
+        body: "{'instances': 'invalid'}"
+    }
+
+    function callback (reply) {
+        if (reply.status == 200 || reply.status == 400 ||
+        reply.responseBody.includes("Servable not found for request: Latest(None)")) {
+            r.return(200)
+        } else {
+            r.error("failed ping" + reply.responseBody)
+            r.return(502)
+        }
+    }
+
+    r.subrequest(uri, options, callback)
+}
+
+function return_error(r, code, message) {
+    if (message) {
+        r.return(code, "{'error': " + message + "'}'")
+    } else {
+        r.return(code)
+    }
+}
+
+function tfs_json_request(r, json) {
+    var uri = make_tfs_uri(r, true)
+    var options = {
+        method: "POST",
+        body: json
+    }
+
+    var accept = r.headersIn.Accept
+    function callback (reply) {
+        var body = reply.responseBody
+        if (reply.status == 400) {
+            // "fix" broken json escaping in \'instances\' message
+            body = body.replace("\\'instances\\'", "'instances'")
+        }
+
+        if ("application/jsonlines" == accept || "application/jsons" == accept) {
+            body = body.replace(/\n/g, "")
+            r.headersOut["Content-Type"] = accept
+        }
+        r.return(reply.status, body)
+    }
+
+    r.subrequest(uri, options, callback)
+
+}
+
+function make_tfs_uri(r, with_method) {
+    var attributes = parse_custom_attributes(r)
+
+    var uri = tfs_base_uri + attributes["tfs-model-name"]
+    if ("tfs-model-version" in attributes) {
+        uri += "/versions/" + attributes["tfs-model-version"]
+    }
+
+    if (with_method) {
+        uri += ":" + (attributes["tfs-method"] || "predict")
+    }
+
+    return uri
+}
+
+function parse_custom_attributes(r) {
+    var attributes = {}
+    var kv_pattern = /tfs-[a-z\-]+=[^,]+/g
+    var header = r.headersIn[custom_attributes_header]
+    if (header) {
+        var matches = header.match(kv_pattern)
+        if (matches) {
+            for (var i = 0; i < matches.length; i++) {
+                var kv = matches[i].split("=")
+                if (kv.length === 2) {
+                    attributes[kv[0]] = kv[1]
+                }
+            }
+        }
+    }
+
+    // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model
+    if (!attributes["tfs-model-name"]) {
+        var uri_pattern = /\/models\/[^,]+\/invoke/g
+        var model_name = r.uri.match(uri_pattern)
+        if (model_name[0]) {
+            model_name = r.uri.replace("/models/", "").replace("/invoke", "")
+            attributes["tfs-model-name"] = model_name
+        } else {
+            attributes["tfs-model-name"] = r.variables.default_tfs_model
+        }
+    }
+
+    return attributes
+}
+
+function json_request(r) {
+    var data = r.requestBody
+
+    if (is_json_lines(data)) {
+        json_lines_request(r, data)
+    } else if (is_tfs_json(data)) {
+        tfs_json_request(r, data)
+    } else {
+        generic_json_request(r, data)
+    }
+}
+
+function is_tfs_json(data) {
+    return /"(instances|inputs|examples)"\s*:/.test(data)
+}
+
+function is_json_lines(data) {
+    // objects separated only by (optional) whitespace means jsons/json-lines
+    return /[}\]]\s*[\[{]/.test(data)
+}
+
+function generic_json_request(r, data) {
+    if (! /^\s*\[\s*\[/.test(data)) {
+        data = "[" + data + "]"
+    }
+
+    var json = "{'instances':" + data + "}"
+    tfs_json_request(r, json)
+}
+
+function json_lines_request(r, data) {
+    var lines = data.trim().split(/\r?\n/)
+    var builder = []
+    builder.push("{'instances':")
+    if (lines.length != 1) {
+        builder.push("[")
+    }
+
+    for (var i = 0; i < lines.length; i++) {
+        var line = lines[i].trim()
+        if (line) {
+            var instance = (i == 0) ? "" : ","
+            instance += line
+            builder.push(instance)
+        }
+    }
+
+    builder.push(lines.length == 1 ? "}" : "]}")
+    tfs_json_request(r, builder.join(''))
+}
+
+function csv_request(r) {
+    var data = r.requestBody
+    // look for initial quote or numeric-only data in 1st field
+    var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0
+    var lines = data.trim().split(/\r?\n/)
+    var builder = []
+    builder.push("{'nstances':[")
+
+    for (var i = 0; i < lines.length; i++) {
+        var line = lines[i].trim()
+        if (line) {
+            var line_builder = []
+            // Only wrap line in brackets if there are multiple columns.
+            // If there's only one column and it has a string with a comma,
+            // the input will be wrapped in an extra set of brackets.
+            var has_multiple_columns = line.search(",") != -1
+
+            if (has_multiple_columns) {
+                line_builder.push("[")
+            }
+
+            if (needs_quotes) {
+                line_builder.push("'")
+                line_builder.push(line.replace("'", "\\'").replace(",", "','"))
+                line_builder.push("'")
+            } else {
+                line_builder.push(line)
+            }
+
+            if (has_multiple_columns) {
+                line_builder.push("]")
+            }
+
+            var json_line = line_builder.join("")
+            builder.push(json_line)
+
+            if (i != lines.length - 1)
+                builder.push(",")
+        }
+    }
+
+    builder.push("]}")
+    tfs_json_request(r, builder.join(""))
+}
diff --git a/docker/2.1/sagemaker/tfs_utils.py b/docker/2.1/sagemaker/tfs_utils.py
new file mode 100644
index 00000000..f3ca0cb7
--- /dev/null
+++ b/docker/2.1/sagemaker/tfs_utils.py
@@ -0,0 +1,209 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+
+import logging
+import multiprocessing
+import os
+import re
+
+from collections import namedtuple
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+DEFAULT_CONTENT_TYPE = "application/json"
+DEFAULT_ACCEPT_HEADER = "application/json"
+CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes"
+
+Context = namedtuple("Context",
+                     "model_name, model_version, method, rest_uri, grpc_port, "
+                     "custom_attributes, request_content_type, accept_header, content_length")
+
+
+def parse_request(req, rest_port, grpc_port, default_model_name, model_name=None):
+    tfs_attributes = parse_tfs_custom_attributes(req)
+    tfs_uri = make_tfs_uri(rest_port, tfs_attributes, default_model_name, model_name)
+
+    if not model_name:
+        model_name = tfs_attributes.get("tfs-model-name")
+
+    context = Context(model_name,
+                      tfs_attributes.get("tfs-model-version"),
+                      tfs_attributes.get("tfs-method"),
+                      tfs_uri,
+                      grpc_port,
+                      req.get_header(CUSTOM_ATTRIBUTES_HEADER),
+                      req.get_header("Content-Type") or DEFAULT_CONTENT_TYPE,
+                      req.get_header("Accept") or DEFAULT_ACCEPT_HEADER,
+                      req.content_length)
+
+    data = req.stream
+    return data, context
+
+
+def make_tfs_uri(port, attributes, default_model_name, model_name=None):
+    log.info("sagemaker tfs attributes: \n{}".format(attributes))
+
+    tfs_model_name = model_name or attributes.get("tfs-model-name", default_model_name)
+    tfs_model_version = attributes.get("tfs-model-version")
+    tfs_method = attributes.get("tfs-method", "predict")
+
+    uri = "http://localhost:{}/v1/models/{}".format(port, tfs_model_name)
+    if tfs_model_version:
+        uri += "/versions/" + tfs_model_version
+    uri += ":" + tfs_method
+    return uri
+
+
+def parse_tfs_custom_attributes(req):
+    attributes = {}
+    header = req.get_header(CUSTOM_ATTRIBUTES_HEADER)
+    if header:
+        matches = re.findall(r"(tfs-[a-z\-]+=[^,]+)", header)
+        attributes = dict(attribute.split("=") for attribute in matches)
+    return attributes
+
+
+def create_tfs_config_individual_model(model_name, base_path):
+    config = "model_config_list: {\n"
+    config += "  config: {\n"
+    config += "    name: '{}',\n".format(model_name)
+    config += "    base_path: '{}',\n".format(base_path)
+    config += "    model_platform: 'tensorflow'\n"
+    config += "  }\n"
+    config += "}\n"
+    return config
+
+
+def create_tfs_config(
+        tfs_default_model_name,
+        tfs_config_path,
+):
+    models = find_models()
+    if not models:
+        raise ValueError("no SavedModel bundles found!")
+
+    if tfs_default_model_name == "None":
+        default_model = os.path.basename(models[0])
+        if default_model:
+            tfs_default_model_name = default_model
+            log.info("using default model name: {}".format(tfs_default_model_name))
+        else:
+            log.info("no default model detected")
+
+    # config (may) include duplicate 'config' keys, so we can't just dump a dict
+    config = "model_config_list: {\n"
+    for m in models:
+        config += "  config: {\n"
+        config += "    name: '{}',\n".format(os.path.basename(m))
+        config += "    base_path: '{}',\n".format(m)
+        config += "    model_platform: 'tensorflow'\n"
+        config += "  }\n"
+    config += "}\n"
+
+    log.info("tensorflow serving model config: \n%s\n", config)
+
+    with open(tfs_config_path, "w") as f:
+        f.write(config)
+
+
+def tfs_command(tfs_grpc_port,
+                tfs_rest_port,
+                tfs_config_path,
+                tfs_enable_batching,
+                tfs_batching_config_file):
+    cmd = "tensorflow_model_server " \
+          "--port={} " \
+          "--rest_api_port={} " \
+          "--model_config_file={} " \
+          "--max_num_load_retries=0 {}" \
+        .format(tfs_grpc_port, tfs_rest_port, tfs_config_path,
+                get_tfs_batching_args(tfs_enable_batching, tfs_batching_config_file))
+    return cmd
+
+
+def find_models():
+    base_path = "/opt/ml/model"
+    models = []
+    for f in _find_saved_model_files(base_path):
+        parts = f.split("/")
+        if len(parts) >= 6 and re.match(r"^\d+$", parts[-2]):
+            model_path = "/".join(parts[0:-2])
+            if model_path not in models:
+                models.append(model_path)
+        return models
+
+
+def _find_saved_model_files(path):
+    for e in os.scandir(path):
+        if e.is_dir():
+            yield from _find_saved_model_files(os.path.join(path, e.name))
+        else:
+            if e.name == "saved_model.pb":
+                yield os.path.join(path, e.name)
+
+
+def get_tfs_batching_args(enable_batching, tfs_batching_config):
+    if enable_batching:
+        return "--enable_batching=true " \
+               "--batching_parameters_file={}".format(tfs_batching_config)
+    else:
+        return ""
+
+
+def create_batching_config(batching_config_file):
+    class _BatchingParameter:
+        def __init__(self, key, env_var, value, defaulted_message):
+            self.key = key
+            self.env_var = env_var
+            self.value = value
+            self.defaulted_message = defaulted_message
+
+    cpu_count = multiprocessing.cpu_count()
+    batching_parameters = [
+        _BatchingParameter("max_batch_size", "SAGEMAKER_TFS_MAX_BATCH_SIZE", 8,
+                           "max_batch_size defaulted to {}. Set {} to override default. "
+                           "Tuning this parameter may yield better performance."),
+        _BatchingParameter("batch_timeout_micros", "SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS", 1000,
+                           "batch_timeout_micros defaulted to {}. Set {} to override "
+                           "default. Tuning this parameter may yield better performance."),
+        _BatchingParameter("num_batch_threads", "SAGEMAKER_TFS_NUM_BATCH_THREADS",
+                           cpu_count, "num_batch_threads defaulted to {},"
+                                      "the number of CPUs. Set {} to override default."),
+        _BatchingParameter("max_enqueued_batches", "SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES",
+                           # Batch limits number of concurrent requests, which limits number
+                           # of enqueued batches, so this can be set high for Batch
+                           100000000 if "SAGEMAKER_BATCH" in os.environ else cpu_count,
+                           "max_enqueued_batches defaulted to {}. Set {} to override default. "
+                           "Tuning this parameter may be necessary to tune out-of-memory "
+                           "errors occur."),
+    ]
+
+    warning_message = ""
+    for batching_parameter in batching_parameters:
+        if batching_parameter.env_var in os.environ:
+            batching_parameter.value = os.environ[batching_parameter.env_var]
+        else:
+            warning_message += batching_parameter.defaulted_message.format(
+                batching_parameter.value, batching_parameter.env_var)
+            warning_message += "\n"
+    if warning_message:
+        log.warning(warning_message)
+
+    config = ""
+    for batching_parameter in batching_parameters:
+        config += "%s { value: %s }\n" % (batching_parameter.key, batching_parameter.value)
+
+    log.info("batching config: \n%s\n", config)
+    with open(batching_config_file, "w") as f:
+        f.write(config)
diff --git a/docker/build_artifacts/deep_learning_container.py b/docker/build_artifacts/deep_learning_container.py
index b60ea9f2..1e82e61e 100644
--- a/docker/build_artifacts/deep_learning_container.py
+++ b/docker/build_artifacts/deep_learning_container.py
@@ -20,7 +20,7 @@ def _validate_instance_id(instance_id):
     """
     Validate instance ID
     """
-    instance_id_regex = r'^(i-\S{17})'
+    instance_id_regex = r"^(i-\S{17})"
     compiled_regex = re.compile(instance_id_regex)
     match = compiled_regex.match(instance_id)
 
diff --git a/docker/build_artifacts/dockerd-entrypoint.py b/docker/build_artifacts/dockerd-entrypoint.py
index 68e1e966..fc4ce388 100644
--- a/docker/build_artifacts/dockerd-entrypoint.py
+++ b/docker/build_artifacts/dockerd-entrypoint.py
@@ -17,6 +17,6 @@
 import sys
 
 if not os.path.exists("/opt/ml/input/config"):
-    subprocess.call(['python', '/usr/local/bin/deep_learning_container.py', '&>/dev/null', '&'])
+    subprocess.call(["python", "/usr/local/bin/deep_learning_container.py", "&>/dev/null", "&"])
 
 subprocess.check_call(shlex.split(' '.join(sys.argv[1:])))
diff --git a/docker/build_artifacts/sagemaker/multi_model_utils.py b/docker/build_artifacts/sagemaker/multi_model_utils.py
index 6267a067..5d2c47f4 100644
--- a/docker/build_artifacts/sagemaker/multi_model_utils.py
+++ b/docker/build_artifacts/sagemaker/multi_model_utils.py
@@ -15,13 +15,13 @@
 import time
 from contextlib import contextmanager
 
-MODEL_CONFIG_FILE = '/sagemaker/model-config.cfg'
-DEFAULT_LOCK_FILE = '/sagemaker/lock-file.lock'
+MODEL_CONFIG_FILE = "/sagemaker/model-config.cfg"
+DEFAULT_LOCK_FILE = "/sagemaker/lock-file.lock"
 
 
 @contextmanager
 def lock(path=DEFAULT_LOCK_FILE):
-    f = open(path, 'w')
+    f = open(path, "w")
     fd = f.fileno()
     fcntl.lockf(fd, fcntl.LOCK_EX)
 
@@ -35,7 +35,7 @@ def lock(path=DEFAULT_LOCK_FILE):
 @contextmanager
 def timeout(seconds=60):
     def _raise_timeout_error(signum, frame):
-        raise Exception(408, 'Timed out after {} seconds'.format(seconds))
+        raise Exception(408, "Timed out after {} seconds".format(seconds))
 
     try:
         signal.signal(signal.SIGALRM, _raise_timeout_error)
diff --git a/docker/build_artifacts/sagemaker/python_service.py b/docker/build_artifacts/sagemaker/python_service.py
index 2bb81bc0..0014b6dd 100644
--- a/docker/build_artifacts/sagemaker/python_service.py
+++ b/docker/build_artifacts/sagemaker/python_service.py
@@ -24,22 +24,22 @@
 from multi_model_utils import lock, timeout, MultiModelException
 import tfs_utils
 
-SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get('SAGEMAKER_MULTI_MODEL', 'false').lower() == 'true'
-INFERENCE_SCRIPT_PATH = '/opt/ml/{}/code/inference.py'.format('models'
+SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get("SAGEMAKER_MULTI_MODEL", "false").lower() == "true"
+INFERENCE_SCRIPT_PATH = "/opt/ml/{}/code/inference.py".format("models"
                                                               if SAGEMAKER_MULTI_MODEL_ENABLED
-                                                              else 'model')
+                                                              else "model")
 PYTHON_PROCESSING_ENABLED = os.path.exists(INFERENCE_SCRIPT_PATH)
-SAGEMAKER_BATCHING_ENABLED = os.environ.get('SAGEMAKER_TFS_ENABLE_BATCHING', 'false').lower()
-MODEL_CONFIG_FILE_PATH = '/sagemaker/model-config.cfg'
-TFS_GRPC_PORT = os.environ.get('TFS_GRPC_PORT')
-TFS_REST_PORT = os.environ.get('TFS_REST_PORT')
-SAGEMAKER_TFS_PORT_RANGE = os.environ.get('SAGEMAKER_SAFE_PORT_RANGE')
+SAGEMAKER_BATCHING_ENABLED = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower()
+MODEL_CONFIG_FILE_PATH = "/sagemaker/model-config.cfg"
+TFS_GRPC_PORT = os.environ.get("TFS_GRPC_PORT")
+TFS_REST_PORT = os.environ.get("TFS_REST_PORT")
+SAGEMAKER_TFS_PORT_RANGE = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE")
 
 
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger(__name__)
 
-CUSTOM_ATTRIBUTES_HEADER = 'X-Amzn-SageMaker-Custom-Attributes'
+CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes"
 
 
 def default_handler(data, context):
@@ -66,8 +66,8 @@ def __init__(self):
             self._tfs_grpc_port = TFS_GRPC_PORT
             self._tfs_rest_port = TFS_REST_PORT
 
-        self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == 'true'
-        self._tfs_default_model_name = os.environ.get('TFS_DEFAULT_MODEL_NAME', "None")
+        self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true"
+        self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None")
 
         if PYTHON_PROCESSING_ENABLED:
             self._handler, self._input_handler, self._output_handler = self._import_handlers()
@@ -82,7 +82,7 @@ def on_post(self, req, res, model_name=None):
         if model_name or "invocations" in req.uri:
             self._handle_invocation_post(req, res, model_name)
         else:
-            data = json.loads(req.stream.read().decode('utf-8'))
+            data = json.loads(req.stream.read().decode("utf-8"))
             self._handle_load_model_post(res, data)
 
     def _parse_sagemaker_port_range(self, port_range):
@@ -92,49 +92,49 @@ def _parse_sagemaker_port_range(self, port_range):
         rest_port = lower
         grpc_port = (lower + upper) // 2
         tfs_ports = {
-            'rest_port': [port for port in range(rest_port, grpc_port)],
-            'grpc_port': [port for port in range(grpc_port, upper)],
+            "rest_port": [port for port in range(rest_port, grpc_port)],
+            "grpc_port": [port for port in range(grpc_port, upper)],
         }
         return tfs_ports
 
     def _ports_available(self):
         with lock():
-            rest_ports = self._tfs_ports['rest_port']
-            grpc_ports = self._tfs_ports['grpc_port']
+            rest_ports = self._tfs_ports["rest_port"]
+            grpc_ports = self._tfs_ports["grpc_port"]
         return len(rest_ports) > 0 and len(grpc_ports) > 0
 
     def _handle_load_model_post(self, res, data):  # noqa: C901
-        model_name = data['model_name']
-        base_path = data['url']
+        model_name = data["model_name"]
+        base_path = data["url"]
 
         # model is already loaded
         if model_name in self._model_tfs_pid:
             res.status = falcon.HTTP_409
             res.body = json.dumps({
-                'error': 'Model {} is already loaded.'.format(model_name)
+                "error": "Model {} is already loaded.".format(model_name)
             })
 
         # check if there are available ports
         if not self._ports_available():
             res.status = falcon.HTTP_507
             res.body = json.dumps({
-                'error': 'Memory exhausted: no available ports to load the model.'
+                "error": "Memory exhausted: no available ports to load the model."
             })
         with lock():
-            self._model_tfs_rest_port[model_name] = self._tfs_ports['rest_port'].pop()
-            self._model_tfs_grpc_port[model_name] = self._tfs_ports['grpc_port'].pop()
+            self._model_tfs_rest_port[model_name] = self._tfs_ports["rest_port"].pop()
+            self._model_tfs_grpc_port[model_name] = self._tfs_ports["grpc_port"].pop()
 
         # validate model files are in the specified base_path
         if self.validate_model_dir(base_path):
             try:
                 tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path)
-                tfs_config_file = '/sagemaker/tfs-config/{}/model-config.cfg'.format(model_name)
-                log.info('tensorflow serving model config: \n%s\n', tfs_config)
+                tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format(model_name)
+                log.info("tensorflow serving model config: \n%s\n", tfs_config)
                 os.makedirs(os.path.dirname(tfs_config_file))
-                with open(tfs_config_file, 'w') as f:
+                with open(tfs_config_file, "w") as f:
                     f.write(tfs_config)
 
-                batching_config_file = '/sagemaker/batching/{}/batching-config.cfg'.format(
+                batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format(
                     model_name)
                 if self._tfs_enable_batching:
                     tfs_utils.create_batching_config(batching_config_file)
@@ -149,16 +149,16 @@ def _handle_load_model_post(self, res, data):  # noqa: C901
                 p = subprocess.Popen(cmd.split())
                 self._wait_for_model(model_name)
 
-                log.info('started tensorflow serving (pid: %d)', p.pid)
+                log.info("started tensorflow serving (pid: %d)", p.pid)
                 # update model name <-> tfs pid map
                 self._model_tfs_pid[model_name] = p
 
                 res.status = falcon.HTTP_200
                 res.body = json.dumps({
-                    'success':
-                        'Successfully loaded model {}, '
-                        'listening on rest port {} '
-                        'and grpc port {}.'.format(model_name,
+                    "success":
+                        "Successfully loaded model {}, "
+                        "listening on rest port {} "
+                        "and grpc port {}.".format(model_name,
                                                    self._model_tfs_rest_port,
                                                    self._model_tfs_grpc_port,)
                 })
@@ -176,22 +176,22 @@ def _handle_load_model_post(self, res, data):  # noqa: C901
             except FileExistsError as e:
                 res.status = falcon.HTTP_409
                 res.body = json.dumps({
-                    'error': 'Model {} is already loaded. {}'.format(model_name, str(e))
+                    "error": "Model {} is already loaded. {}".format(model_name, str(e))
                 })
             except OSError as os_error:
                 self._cleanup_config_file(tfs_config_file)
                 self._cleanup_config_file(batching_config_file)
                 if os_error.errno == 12:
                     raise MultiModelException(falcon.HTTP_507,
-                                              'Memory exhausted: '
-                                              'not enough memory to start TFS instance')
+                                              "Memory exhausted: "
+                                              "not enough memory to start TFS instance")
                 else:
                     raise MultiModelException(falcon.HTTP_500, os_error.strerror)
         else:
             res.status = falcon.HTTP_404
             res.body = json.dumps({
-                'error':
-                    'Could not find valid base path {} for servable {}'.format(base_path,
+                "error":
+                    "Could not find valid base path {} for servable {}".format(base_path,
                                                                                model_name)
             })
 
@@ -208,7 +208,7 @@ def _wait_for_model(self, model_name):
                 try:
                     response = requests.get(url)
                     if response.status_code == 200:
-                        versions = json.loads(response.content)['model_version_status']
+                        versions = json.loads(response.content)["model_version_status"]
                         if all(version["state"] == "AVAILABLE" for version in versions):
                             break
                 except ConnectionError:
@@ -220,7 +220,7 @@ def _handle_invocation_post(self, req, res, model_name=None):
                 if model_name not in self._model_tfs_rest_port:
                     res.status = falcon.HTTP_404
                     res.body = json.dumps({
-                        'error': "Model {} is not loaded yet.".format(model_name)
+                        "error": "Model {} is not loaded yet.".format(model_name)
                     })
                     return
                 else:
@@ -235,7 +235,7 @@ def _handle_invocation_post(self, req, res, model_name=None):
             else:
                 res.status = falcon.HTTP_400
                 res.body = json.dumps({
-                    'error': 'Invocation request does not contain model name.'
+                    "error": "Invocation request does not contain model name."
                 })
         else:
             data, context = tfs_utils.parse_request(req, self._tfs_rest_port, self._tfs_grpc_port,
@@ -245,25 +245,25 @@ def _handle_invocation_post(self, req, res, model_name=None):
             res.status = falcon.HTTP_200
             res.body, res.content_type = self._handlers(data, context)
         except Exception as e:  # pylint: disable=broad-except
-            log.exception('exception handling request: {}'.format(e))
+            log.exception("exception handling request: {}".format(e))
             res.status = falcon.HTTP_500
             res.body = json.dumps({
-                'error': str(e)
-            }).encode('utf-8')  # pylint: disable=E1101
+                "error": str(e)
+            }).encode("utf-8")  # pylint: disable=E1101
 
     def _import_handlers(self):
-        spec = importlib.util.spec_from_file_location('inference', INFERENCE_SCRIPT_PATH)
+        spec = importlib.util.spec_from_file_location("inference", INFERENCE_SCRIPT_PATH)
         inference = importlib.util.module_from_spec(spec)
         spec.loader.exec_module(inference)
 
         _custom_handler, _custom_input_handler, _custom_output_handler = None, None, None
-        if hasattr(inference, 'handler'):
+        if hasattr(inference, "handler"):
             _custom_handler = inference.handler
-        elif hasattr(inference, 'input_handler') and hasattr(inference, 'output_handler'):
+        elif hasattr(inference, "input_handler") and hasattr(inference, "output_handler"):
             _custom_input_handler = inference.input_handler
             _custom_output_handler = inference.output_handler
         else:
-            raise NotImplementedError('Handlers are not implemented correctly in user script.')
+            raise NotImplementedError("Handlers are not implemented correctly in user script.")
 
         return _custom_handler, _custom_input_handler, _custom_output_handler
 
@@ -281,69 +281,69 @@ def handler(data, context):
     def on_get(self, req, res, model_name=None):  # pylint: disable=W0613
         if model_name is None:
             models_info = {}
-            uri = 'http://localhost:{}/v1/models/{}'
+            uri = "http://localhost:{}/v1/models/{}"
             for model, port in self._model_tfs_rest_port.items():
                 try:
                     info = json.loads(requests.get(uri.format(port, model)).content)
                     models_info[model] = info
                 except ValueError as e:
-                    log.exception('exception handling request: {}'.format(e))
+                    log.exception("exception handling request: {}".format(e))
                     res.status = falcon.HTTP_500
                     res.body = json.dumps({
-                        'error': str(e)
-                    }).encode('utf-8')
+                        "error": str(e)
+                    }).encode("utf-8")
             res.status = falcon.HTTP_200
             res.body = json.dumps(models_info)
         else:
             if model_name not in self._model_tfs_rest_port:
                 res.status = falcon.HTTP_404
                 res.body = json.dumps({
-                    'error': 'Model {} is loaded yet.'.format(model_name)
-                }).encode('utf-8')
+                    "error": "Model {} is loaded yet.".format(model_name)
+                }).encode("utf-8")
             else:
                 port = self._model_tfs_rest_port[model_name]
-                uri = 'http://localhost:{}/v1/models/{}'.format(port, model_name)
+                uri = "http://localhost:{}/v1/models/{}".format(port, model_name)
                 try:
                     info = requests.get(uri)
                     res.status = falcon.HTTP_200
                     res.body = json.dumps({
-                        'model': info
-                    }).encode('utf-8')
+                        "model": info
+                    }).encode("utf-8")
                 except ValueError as e:
-                    log.exception('exception handling GET models request.')
+                    log.exception("exception handling GET models request.")
                     res.status = falcon.HTTP_500
                     res.body = json.dumps({
-                        'error': str(e)
-                    }).encode('utf-8')
+                        "error": str(e)
+                    }).encode("utf-8")
 
     def on_delete(self, req, res, model_name):  # pylint: disable=W0613
         if model_name not in self._model_tfs_pid:
             res.status = falcon.HTTP_404
             res.body = json.dumps({
-                'error': 'Model {} is not loaded yet'.format(model_name)
+                "error": "Model {} is not loaded yet".format(model_name)
             })
         else:
             try:
                 self._model_tfs_pid[model_name].kill()
-                os.remove('/sagemaker/tfs-config/{}/model-config.cfg'.format(model_name))
-                os.rmdir('/sagemaker/tfs-config/{}'.format(model_name))
+                os.remove("/sagemaker/tfs-config/{}/model-config.cfg".format(model_name))
+                os.rmdir("/sagemaker/tfs-config/{}".format(model_name))
                 release_rest_port = self._model_tfs_rest_port[model_name]
                 release_grpc_port = self._model_tfs_grpc_port[model_name]
                 with lock():
-                    bisect.insort(self._tfs_ports['rest_port'], release_rest_port)
-                    bisect.insort(self._tfs_ports['grpc_port'], release_grpc_port)
+                    bisect.insort(self._tfs_ports["rest_port"], release_rest_port)
+                    bisect.insort(self._tfs_ports["grpc_port"], release_grpc_port)
                 del self._model_tfs_rest_port[model_name]
                 del self._model_tfs_grpc_port[model_name]
                 del self._model_tfs_pid[model_name]
                 res.status = falcon.HTTP_200
                 res.body = json.dumps({
-                    'success': 'Successfully unloaded model {}.'.format(model_name)
+                    "success": "Successfully unloaded model {}.".format(model_name)
                 })
             except OSError as error:
                 res.status = falcon.HTTP_500
                 res.body = json.dumps({
-                    'error': str(error)
-                }).encode('utf-8')
+                    "error": str(error)
+                }).encode("utf-8")
 
     def validate_model_dir(self, model_path):
         # model base path doesn't exits
@@ -383,13 +383,13 @@ def __init__(self):
         self._ping_resource = PingResource()
 
     def add_routes(self, application):
-        application.add_route('/ping', self._ping_resource)
-        application.add_route('/invocations', self._python_service_resource)
+        application.add_route("/ping", self._ping_resource)
+        application.add_route("/invocations", self._python_service_resource)
 
         if self._enable_model_manager:
-            application.add_route('/models', self._python_service_resource)
-            application.add_route('/models/{model_name}', self._python_service_resource)
-            application.add_route('/models/{model_name}/invoke', self._python_service_resource)
+            application.add_route("/models", self._python_service_resource)
+            application.add_route("/models/{model_name}", self._python_service_resource)
+            application.add_route("/models/{model_name}/invoke", self._python_service_resource)
 
 
 app = falcon.API()
diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py
index ee084d51..7a539fe6 100644
--- a/docker/build_artifacts/sagemaker/serve.py
+++ b/docker/build_artifacts/sagemaker/serve.py
@@ -23,160 +23,160 @@
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger(__name__)
 
-JS_PING = 'js_content ping'
-JS_INVOCATIONS = 'js_content invocations'
-GUNICORN_PING = 'proxy_pass http://gunicorn_upstream/ping'
-GUNICORN_INVOCATIONS = 'proxy_pass http://gunicorn_upstream/invocations'
+JS_PING = "js_content ping"
+JS_INVOCATIONS = "js_content invocations"
+GUNICORN_PING = "proxy_pass http://gunicorn_upstream/ping"
+GUNICORN_INVOCATIONS = "proxy_pass http://gunicorn_upstream/invocations"
 
-PYTHON_LIB_PATH = '/opt/ml/model/code/lib'
-REQUIREMENTS_PATH = '/opt/ml/model/code/requirements.txt'
-INFERENCE_PATH = '/opt/ml/model/code/inference.py'
+PYTHON_LIB_PATH = "/opt/ml/model/code/lib"
+REQUIREMENTS_PATH = "/opt/ml/model/code/requirements.txt"
+INFERENCE_PATH = "/opt/ml/model/code/inference.py"
 
 
 class ServiceManager(object):
     def __init__(self):
-        self._state = 'initializing'
+        self._state = "initializing"
         self._nginx = None
         self._tfs = None
         self._gunicorn = None
         self._gunicorn_command = None
         self._enable_python_service = os.path.exists(INFERENCE_PATH)
-        self._tfs_version = os.environ.get('SAGEMAKER_TFS_VERSION', '1.13')
-        self._nginx_http_port = os.environ.get('SAGEMAKER_BIND_TO_PORT', '8080')
-        self._nginx_loglevel = os.environ.get('SAGEMAKER_TFS_NGINX_LOGLEVEL', 'error')
-        self._tfs_default_model_name = os.environ.get('SAGEMAKER_TFS_DEFAULT_MODEL_NAME', 'None')
-        self._sagemaker_port_range = os.environ.get('SAGEMAKER_SAFE_PORT_RANGE', None)
-        self._tfs_config_path = '/sagemaker/model-config.cfg'
-        self._tfs_batching_config_path = '/sagemaker/batching-config.cfg'
-
-        _enable_batching = os.environ.get('SAGEMAKER_TFS_ENABLE_BATCHING', 'false').lower()
-        _enable_multi_model_endpoint = os.environ.get('SAGEMAKER_MULTI_MODEL',
-                                                      'false').lower()
-
-        if _enable_batching not in ['true', 'false']:
-            raise ValueError('SAGEMAKER_TFS_ENABLE_BATCHING must be "true" or "false"')
-        self._tfs_enable_batching = _enable_batching == 'true'
-
-        if _enable_multi_model_endpoint not in ['true', 'false']:
-            raise ValueError('SAGEMAKER_MULTI_MODEL must be "true" or "false"')
-        self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == 'true'
+        self._tfs_version = os.environ.get("SAGEMAKER_TFS_VERSION", "1.13")
+        self._nginx_http_port = os.environ.get("SAGEMAKER_BIND_TO_PORT", "8080")
+        self._nginx_loglevel = os.environ.get("SAGEMAKER_TFS_NGINX_LOGLEVEL", "error")
+        self._tfs_default_model_name = os.environ.get("SAGEMAKER_TFS_DEFAULT_MODEL_NAME", "None")
+        self._sagemaker_port_range = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE", None)
+        self._tfs_config_path = "/sagemaker/model-config.cfg"
+        self._tfs_batching_config_path = "/sagemaker/batching-config.cfg"
+
+        _enable_batching = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower()
+        _enable_multi_model_endpoint = os.environ.get("SAGEMAKER_MULTI_MODEL",
+                                                      "false").lower()
+
+        if _enable_batching not in ["true", "false"]:
+            raise ValueError("SAGEMAKER_TFS_ENABLE_BATCHING must be 'true' or 'false'")
+        self._tfs_enable_batching = _enable_batching == "true"
+
+        if _enable_multi_model_endpoint not in ["true", "false"]:
+            raise ValueError("SAGEMAKER_MULTI_MODEL must be 'true' or 'false'")
+        self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == "true"
 
         self._use_gunicorn = self._enable_python_service or self._tfs_enable_multi_model_endpoint
 
         if self._sagemaker_port_range is not None:
-            parts = self._sagemaker_port_range.split('-')
+            parts = self._sagemaker_port_range.split("-")
             low = int(parts[0])
             hi = int(parts[1])
             if low + 2 > hi:
-                raise ValueError('not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})'
+                raise ValueError("not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})"
                                  .format(self._sagemaker_port_range))
             self._tfs_grpc_port = str(low)
             self._tfs_rest_port = str(low + 1)
         else:
             # just use the standard default ports
-            self._tfs_grpc_port = '9000'
-            self._tfs_rest_port = '8501'
+            self._tfs_grpc_port = "9000"
+            self._tfs_rest_port = "8501"
 
         # set environment variable for python service
-        os.environ['TFS_GRPC_PORT'] = self._tfs_grpc_port
-        os.environ['TFS_REST_PORT'] = self._tfs_rest_port
+        os.environ["TFS_GRPC_PORT"] = self._tfs_grpc_port
+        os.environ["TFS_REST_PORT"] = self._tfs_rest_port
 
     def _create_tfs_config(self):
         models = tfs_utils.find_models()
         if not models:
-            raise ValueError('no SavedModel bundles found!')
+            raise ValueError("no SavedModel bundles found!")
 
-        if self._tfs_default_model_name == 'None':
+        if self._tfs_default_model_name == "None":
             default_model = os.path.basename(models[0])
             if default_model:
                 self._tfs_default_model_name = default_model
-                log.info('using default model name: {}'.format(self._tfs_default_model_name))
+                log.info("using default model name: {}".format(self._tfs_default_model_name))
             else:
-                log.info('no default model detected')
+                log.info("no default model detected")
 
         # config (may) include duplicate 'config' keys, so we can't just dump a dict
-        config = 'model_config_list: {\n'
+        config = "model_config_list: {\n"
         for m in models:
-            config += '  config: {\n'
-            config += '    name: "{}",\n'.format(os.path.basename(m))
-            config += '    base_path: "{}",\n'.format(m)
-            config += '    model_platform: "tensorflow"\n'
-            config += '  }\n'
-        config += '}\n'
+            config += "  config: {\n"
+            config += "    name: '{}',\n".format(os.path.basename(m))
+            config += "    base_path: '{}',\n".format(m)
+            config += "    model_platform: 'tensorflow'\n"
+            config += "  }\n"
+        config += "}\n"
 
-        log.info('tensorflow serving model config: \n%s\n', config)
+        log.info("tensorflow serving model config: \n%s\n", config)
 
-        with open('/sagemaker/model-config.cfg', 'w') as f:
+        with open("/sagemaker/model-config.cfg", "w") as f:
             f.write(config)
 
     def _setup_gunicorn(self):
         python_path_content = []
-        python_path_option = ''
+        python_path_option = ""
 
         if self._enable_python_service:
             lib_path_exists = os.path.exists(PYTHON_LIB_PATH)
             requirements_exists = os.path.exists(REQUIREMENTS_PATH)
-            python_path_content = ['/opt/ml/model/code']
-            python_path_option = '--pythonpath '
+            python_path_content = ["/opt/ml/model/code"]
+            python_path_option = "--pythonpath "
 
             if lib_path_exists:
                 python_path_content.append(PYTHON_LIB_PATH)
 
             if requirements_exists:
                 if lib_path_exists:
-                    log.warning('loading modules in "{}", ignoring requirements.txt'
+                    log.warning("loading modules in '{}', ignoring requirements.txt"
                                 .format(PYTHON_LIB_PATH))
                 else:
-                    log.info('installing packages from requirements.txt...')
-                    pip_install_cmd = 'pip3 install -r {}'.format(REQUIREMENTS_PATH)
+                    log.info("installing packages from requirements.txt...")
+                    pip_install_cmd = "pip3 install -r {}".format(REQUIREMENTS_PATH)
                     try:
                         subprocess.check_call(pip_install_cmd.split())
                     except subprocess.CalledProcessError:
-                        log.error('failed to install required packages, exiting.')
+                        log.error("failed to install required packages, exiting.")
                         self._stop()
-                        raise ChildProcessError('failed to install required packages.')
+                        raise ChildProcessError("failed to install required packages.")
 
         gunicorn_command = (
-            'gunicorn -b unix:/tmp/gunicorn.sock -k gevent --chdir /sagemaker '
-            '{}{} -e TFS_GRPC_PORT={} -e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} '
-            'python_service:app').format(python_path_option, ','.join(python_path_content),
+            "gunicorn -b unix:/tmp/gunicorn.sock -k gevent --chdir /sagemaker "
+            "{}{} -e TFS_GRPC_PORT={} -e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} "
+            "python_service:app").format(python_path_option, ",".join(python_path_content),
                                          self._tfs_grpc_port, self._tfs_enable_multi_model_endpoint,
                                          self._sagemaker_port_range)
 
-        log.info('gunicorn command: {}'.format(gunicorn_command))
+        log.info("gunicorn command: {}".format(gunicorn_command))
         self._gunicorn_command = gunicorn_command
 
     def _create_nginx_config(self):
         template = self._read_nginx_template()
-        pattern = re.compile(r'%(\w+)%')
+        pattern = re.compile(r"%(\w+)%")
 
         template_values = {
-            'TFS_VERSION': self._tfs_version,
-            'TFS_REST_PORT': self._tfs_rest_port,
-            'TFS_DEFAULT_MODEL_NAME': self._tfs_default_model_name,
-            'NGINX_HTTP_PORT': self._nginx_http_port,
-            'NGINX_LOG_LEVEL': self._nginx_loglevel,
-            'FORWARD_PING_REQUESTS': GUNICORN_PING if self._use_gunicorn else JS_PING,
-            'FORWARD_INVOCATION_REQUESTS': GUNICORN_INVOCATIONS if self._use_gunicorn
+            "TFS_VERSION": self._tfs_version,
+            "TFS_REST_PORT": self._tfs_rest_port,
+            "TFS_DEFAULT_MODEL_NAME": self._tfs_default_model_name,
+            "NGINX_HTTP_PORT": self._nginx_http_port,
+            "NGINX_LOG_LEVEL": self._nginx_loglevel,
+            "FORWARD_PING_REQUESTS": GUNICORN_PING if self._use_gunicorn else JS_PING,
+            "FORWARD_INVOCATION_REQUESTS": GUNICORN_INVOCATIONS if self._use_gunicorn
             else JS_INVOCATIONS,
         }
 
         config = pattern.sub(lambda x: template_values[x.group(1)], template)
-        log.info('nginx config: \n%s\n', config)
+        log.info("nginx config: \n%s\n", config)
 
-        with open('/sagemaker/nginx.conf', 'w') as f:
+        with open("/sagemaker/nginx.conf", "w") as f:
             f.write(config)
 
     def _read_nginx_template(self):
-        with open('/sagemaker/nginx.conf.template', 'r') as f:
+        with open("/sagemaker/nginx.conf.template", "r") as f:
             template = f.read()
             if not template:
-                raise ValueError('failed to read nginx.conf.template')
+                raise ValueError("failed to read nginx.conf.template")
 
             return template
 
     def _start_tfs(self):
-        self._log_version('tensorflow_model_server --version', 'tensorflow version info:')
+        self._log_version("tensorflow_model_server --version', 'tensorflow version info:")
         cmd = tfs_utils.tfs_command(
             self._tfs_grpc_port,
             self._tfs_rest_port,
@@ -184,37 +184,37 @@ def _start_tfs(self):
             self._tfs_enable_batching,
             self._tfs_batching_config_path,
         )
-        log.info('tensorflow serving command: {}'.format(cmd))
+        log.info("tensorflow serving command: {}".format(cmd))
         p = subprocess.Popen(cmd.split())
-        log.info('started tensorflow serving (pid: %d)', p.pid)
+        log.info("started tensorflow serving (pid: %d)", p.pid)
         self._tfs = p
 
     def _start_gunicorn(self):
-        self._log_version('gunicorn --version', 'gunicorn version info:')
+        self._log_version("gunicorn --version", "gunicorn version info:")
         env = os.environ.copy()
-        env['TFS_DEFAULT_MODEL_NAME'] = self._tfs_default_model_name
+        env["TFS_DEFAULT_MODEL_NAME"] = self._tfs_default_model_name
         p = subprocess.Popen(self._gunicorn_command.split(), env=env)
-        log.info('started gunicorn (pid: %d)', p.pid)
+        log.info("started gunicorn (pid: %d)", p.pid)
         self._gunicorn = p
 
     def _start_nginx(self):
-        self._log_version('/usr/sbin/nginx -V', 'nginx version info:')
-        p = subprocess.Popen('/usr/sbin/nginx -c /sagemaker/nginx.conf'.split())
-        log.info('started nginx (pid: %d)', p.pid)
+        self._log_version("/usr/sbin/nginx -V", "nginx version info:")
+        p = subprocess.Popen("/usr/sbin/nginx -c /sagemaker/nginx.conf".split())
+        log.info("started nginx (pid: %d)", p.pid)
         self._nginx = p
 
     def _log_version(self, command, message):
         try:
             output = subprocess.check_output(
                 command.split(),
-                stderr=subprocess.STDOUT).decode('utf-8', 'backslashreplace').strip()
-            log.info('{}\n{}'.format(message, output))
+                stderr=subprocess.STDOUT).decode("utf-8", "backslashreplace").strip()
+            log.info("{}\n{}".format(message, output))
         except subprocess.CalledProcessError:
-            log.warning('failed to run command: %s', command)
+            log.warning("failed to run command: %s", command)
 
     def _stop(self, *args):  # pylint: disable=W0613
-        self._state = 'stopping'
-        log.info('stopping services')
+        self._state = "stopping"
+        log.info("stopping services")
         try:
             os.kill(self._nginx.pid, signal.SIGQUIT)
         except OSError:
@@ -229,19 +229,19 @@ def _stop(self, *args):  # pylint: disable=W0613
         except OSError:
             pass
 
-        self._state = 'stopped'
-        log.info('stopped')
+        self._state = "stopped"
+        log.info("stopped")
 
     def _wait_for_gunicorn(self):
         while True:
-            if os.path.exists('/tmp/gunicorn.sock'):
-                log.info('gunicorn server is ready!')
+            if os.path.exists("/tmp/gunicorn.sock"):
+                log.info("gunicorn server is ready!")
                 return
 
     @contextmanager
     def _timeout(self, seconds):
         def _raise_timeout_error(signum, frame):
-            raise TimeoutError('time out after {} seconds'.format(seconds))
+            raise TimeoutError("time out after {} seconds".format(seconds))
 
         try:
             signal.signal(signal.SIGALRM, _raise_timeout_error)
@@ -251,18 +251,18 @@ def _raise_timeout_error(signum, frame):
             signal.alarm(0)
 
     def start(self):
-        log.info('starting services')
-        self._state = 'starting'
+        log.info("starting services")
+        self._state = "starting"
         signal.signal(signal.SIGTERM, self._stop)
 
         self._create_nginx_config()
 
         if self._tfs_enable_batching:
-            log.info('batching is enabled')
+            log.info("batching is enabled")
             tfs_utils.create_batching_config(self._tfs_batching_config_path)
 
         if self._tfs_enable_multi_model_endpoint:
-            log.info('multi-model endpoint is enabled, TFS model servers will be started later')
+            log.info("multi-model endpoint is enabled, TFS model servers will be started later")
         else:
             tfs_utils.create_tfs_config(
                 self._tfs_default_model_name,
@@ -279,30 +279,30 @@ def start(self):
                 self._wait_for_gunicorn()
 
         self._start_nginx()
-        self._state = 'started'
+        self._state = "started"
 
         while True:
             pid, status = os.wait()
 
-            if self._state != 'started':
+            if self._state != "started":
                 break
 
             if pid == self._nginx.pid:
-                log.warning('unexpected nginx exit (status: {}). restarting.'.format(status))
+                log.warning("unexpected nginx exit (status: {}). restarting.".format(status))
                 self._start_nginx()
 
             elif pid == self._tfs.pid:
                 log.warning(
-                    'unexpected tensorflow serving exit (status: {}). restarting.'.format(status))
+                    "unexpected tensorflow serving exit (status: {}). restarting.".format(status))
                 self._start_tfs()
 
             elif self._gunicorn and pid == self._gunicorn.pid:
-                log.warning('unexpected gunicorn exit (status: {}). restarting.'
+                log.warning("unexpected gunicorn exit (status: {}). restarting."
                             .format(status))
                 self._start_gunicorn()
 
         self._stop()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     ServiceManager().start()
diff --git a/docker/build_artifacts/sagemaker/tensorflow-serving.js b/docker/build_artifacts/sagemaker/tensorflow-serving.js
index 1c040b0a..fdce4472 100644
--- a/docker/build_artifacts/sagemaker/tensorflow-serving.js
+++ b/docker/build_artifacts/sagemaker/tensorflow-serving.js
@@ -1,15 +1,15 @@
-var tfs_base_uri = '/tfs/v1/models/'
-var custom_attributes_header = 'X-Amzn-SageMaker-Custom-Attributes'
+var tfs_base_uri = "/tfs/v1/models/"
+var custom_attributes_header = "X-Amzn-SageMaker-Custom-Attributes"
 
 function invocations(r) {
-    var ct = r.headersIn['Content-Type']
+    var ct = r.headersIn["Content-Type"]
 
-    if ('application/json' == ct || 'application/jsonlines' == ct || 'application/jsons' == ct) {
+    if ("application/json" == ct || "application/jsonlines" == ct || "application/jsons" == ct) {
         json_request(r)
-    } else if ('text/csv' == ct) {
+    } else if ("text/csv" == ct) {
         csv_request(r)
     } else {
-        return_error(r, 415, 'Unsupported Media Type: ' + (ct || 'Unknown'))
+        return_error(r, 415, "Unsupported Media Type: " + (ct || "Unknown"))
     }
 }
 
@@ -20,7 +20,7 @@ function ping(r) {
         if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) {
             r.return(200)
         } else {
-            r.error('failed ping' + reply.responseBody)
+            r.error("failed ping" + reply.responseBody)
             r.return(502)
         }
     }
@@ -37,16 +37,16 @@ function ping_without_model(r) {
 
     var uri = make_tfs_uri(r, true)
     var options = {
-        method: 'POST',
-        body: '{"instances": "invalid"}'
+        method: "POST",
+        body: "{'instances': 'invalid'}"
     }
 
     function callback (reply) {
         if (reply.status == 200 || reply.status == 400 ||
-        reply.responseBody.includes('Servable not found for request: Latest(None)')) {
+        reply.responseBody.includes("Servable not found for request: Latest(None)")) {
             r.return(200)
         } else {
-            r.error('failed ping' + reply.responseBody)
+            r.error("failed ping" + reply.responseBody)
             r.return(502)
         }
     }
@@ -56,7 +56,7 @@ function ping_without_model(r) {
 
 function return_error(r, code, message) {
     if (message) {
-        r.return(code, '{"error": "' + message + '"}')
+        r.return(code, "{'error': " + message + "'}'")
     } else {
         r.return(code)
     }
@@ -65,7 +65,7 @@ function return_error(r, code, message) {
 function tfs_json_request(r, json) {
     var uri = make_tfs_uri(r, true)
     var options = {
-        method: 'POST',
+        method: "POST",
         body: json
     }
 
@@ -77,9 +77,9 @@ function tfs_json_request(r, json) {
             body = body.replace("\\'instances\\'", "'instances'")
         }
 
-        if ('application/jsonlines' == accept || 'application/jsons' == accept) {
-            body = body.replace(/\n/g, '')
-            r.headersOut['Content-Type'] = accept
+        if ("application/jsonlines" == accept || "application/jsons" == accept) {
+            body = body.replace(/\n/g, "")
+            r.headersOut["Content-Type"] = accept
         }
         r.return(reply.status, body)
     }
@@ -91,13 +91,13 @@ function tfs_json_request(r, json) {
 function make_tfs_uri(r, with_method) {
     var attributes = parse_custom_attributes(r)
 
-    var uri = tfs_base_uri + attributes['tfs-model-name']
-    if ('tfs-model-version' in attributes) {
-        uri += '/versions/' + attributes['tfs-model-version']
+    var uri = tfs_base_uri + attributes["tfs-model-name"]
+    if ("tfs-model-version" in attributes) {
+        uri += "/versions/" + attributes["tfs-model-version"]
     }
 
     if (with_method) {
-        uri += ':' + (attributes['tfs-method'] || 'predict')
+        uri += ":" + (attributes["tfs-method"] || "predict")
     }
 
     return uri
@@ -111,7 +111,7 @@ function parse_custom_attributes(r) {
         var matches = header.match(kv_pattern)
         if (matches) {
             for (var i = 0; i < matches.length; i++) {
-                var kv = matches[i].split('=')
+                var kv = matches[i].split("=")
                 if (kv.length === 2) {
                     attributes[kv[0]] = kv[1]
                 }
@@ -120,14 +120,14 @@ function parse_custom_attributes(r) {
     }
 
     // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model
-    if (!attributes['tfs-model-name']) {
+    if (!attributes["tfs-model-name"]) {
         var uri_pattern = /\/models\/[^,]+\/invoke/g
         var model_name = r.uri.match(uri_pattern)
         if (model_name[0]) {
-            model_name = r.uri.replace('/models/', '').replace('/invoke', '')
-            attributes['tfs-model-name'] = model_name
+            model_name = r.uri.replace("/models/", "").replace("/invoke", "")
+            attributes["tfs-model-name"] = model_name
         } else {
-            attributes['tfs-model-name'] = r.variables.default_tfs_model
+            attributes["tfs-model-name"] = r.variables.default_tfs_model
         }
     }
 
@@ -157,31 +157,31 @@ function is_json_lines(data) {
 
 function generic_json_request(r, data) {
     if (! /^\s*\[\s*\[/.test(data)) {
-        data = '[' + data + ']'
+        data = "[" + data + "]"
     }
 
-    var json = '{"instances":' + data + '}'
+    var json = "{'instances':" + data + "}"
     tfs_json_request(r, json)
 }
 
 function json_lines_request(r, data) {
     var lines = data.trim().split(/\r?\n/)
     var builder = []
-    builder.push('{"instances":')
+    builder.push("{'instances':")
     if (lines.length != 1) {
-        builder.push('[')
+        builder.push("[")
     }
 
     for (var i = 0; i < lines.length; i++) {
         var line = lines[i].trim()
         if (line) {
-            var instance = (i == 0) ? '' : ','
+            var instance = (i == 0) ? "" : ","
             instance += line
             builder.push(instance)
         }
     }
 
-    builder.push(lines.length == 1 ? '}' : ']}')
+    builder.push(lines.length == 1 ? "}" : "]}")
     tfs_json_request(r, builder.join(''))
 }
 
@@ -191,7 +191,7 @@ function csv_request(r) {
     var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0
     var lines = data.trim().split(/\r?\n/)
     var builder = []
-    builder.push('{"instances":[')
+    builder.push("{'nstances':[")
 
     for (var i = 0; i < lines.length; i++) {
         var line = lines[i].trim()
@@ -200,32 +200,32 @@ function csv_request(r) {
             // Only wrap line in brackets if there are multiple columns.
             // If there's only one column and it has a string with a comma,
             // the input will be wrapped in an extra set of brackets.
-            var has_multiple_columns = line.search(',') != -1
+            var has_multiple_columns = line.search(",") != -1
 
             if (has_multiple_columns) {
-                line_builder.push('[')
+                line_builder.push("[")
             }
 
             if (needs_quotes) {
-                line_builder.push('"')
-                line_builder.push(line.replace('"', '\\"').replace(',', '","'))
-                line_builder.push('"')
+                line_builder.push("'")
+                line_builder.push(line.replace("'", "\\'").replace(",", "','"))
+                line_builder.push("'")
             } else {
                 line_builder.push(line)
             }
 
             if (has_multiple_columns) {
-                line_builder.push(']')
+                line_builder.push("]")
             }
 
-            var json_line = line_builder.join('')
+            var json_line = line_builder.join("")
             builder.push(json_line)
 
             if (i != lines.length - 1)
-                builder.push(',')
+                builder.push(",")
         }
     }
 
-    builder.push(']}')
-    tfs_json_request(r, builder.join(''))
+    builder.push("]}")
+    tfs_json_request(r, builder.join(""))
 }
diff --git a/docker/build_artifacts/sagemaker/tfs_utils.py b/docker/build_artifacts/sagemaker/tfs_utils.py
index ac12856f..f3ca0cb7 100644
--- a/docker/build_artifacts/sagemaker/tfs_utils.py
+++ b/docker/build_artifacts/sagemaker/tfs_utils.py
@@ -21,13 +21,13 @@
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger(__name__)
 
-DEFAULT_CONTENT_TYPE = 'application/json'
-DEFAULT_ACCEPT_HEADER = 'application/json'
-CUSTOM_ATTRIBUTES_HEADER = 'X-Amzn-SageMaker-Custom-Attributes'
+DEFAULT_CONTENT_TYPE = "application/json"
+DEFAULT_ACCEPT_HEADER = "application/json"
+CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes"
 
-Context = namedtuple('Context',
-                     'model_name, model_version, method, rest_uri, grpc_port, '
-                     'custom_attributes, request_content_type, accept_header, content_length')
+Context = namedtuple("Context",
+                     "model_name, model_version, method, rest_uri, grpc_port, "
+                     "custom_attributes, request_content_type, accept_header, content_length")
 
 
 def parse_request(req, rest_port, grpc_port, default_model_name, model_name=None):
@@ -35,16 +35,16 @@ def parse_request(req, rest_port, grpc_port, default_model_name, model_name=None
     tfs_uri = make_tfs_uri(rest_port, tfs_attributes, default_model_name, model_name)
 
     if not model_name:
-        model_name = tfs_attributes.get('tfs-model-name')
+        model_name = tfs_attributes.get("tfs-model-name")
 
     context = Context(model_name,
-                      tfs_attributes.get('tfs-model-version'),
-                      tfs_attributes.get('tfs-method'),
+                      tfs_attributes.get("tfs-model-version"),
+                      tfs_attributes.get("tfs-method"),
                       tfs_uri,
                       grpc_port,
                       req.get_header(CUSTOM_ATTRIBUTES_HEADER),
-                      req.get_header('Content-Type') or DEFAULT_CONTENT_TYPE,
-                      req.get_header('Accept') or DEFAULT_ACCEPT_HEADER,
+                      req.get_header("Content-Type") or DEFAULT_CONTENT_TYPE,
+                      req.get_header("Accept") or DEFAULT_ACCEPT_HEADER,
                       req.content_length)
 
     data = req.stream
@@ -55,13 +55,13 @@ def make_tfs_uri(port, attributes, default_model_name, model_name=None):
     log.info("sagemaker tfs attributes: \n{}".format(attributes))
 
     tfs_model_name = model_name or attributes.get("tfs-model-name", default_model_name)
-    tfs_model_version = attributes.get('tfs-model-version')
-    tfs_method = attributes.get('tfs-method', 'predict')
+    tfs_model_version = attributes.get("tfs-model-version")
+    tfs_method = attributes.get("tfs-method", "predict")
 
-    uri = 'http://localhost:{}/v1/models/{}'.format(port, tfs_model_name)
+    uri = "http://localhost:{}/v1/models/{}".format(port, tfs_model_name)
     if tfs_model_version:
-        uri += '/versions/' + tfs_model_version
-    uri += ':' + tfs_method
+        uri += "/versions/" + tfs_model_version
+    uri += ":" + tfs_method
     return uri
 
 
@@ -75,13 +75,13 @@ def parse_tfs_custom_attributes(req):
 
 
 def create_tfs_config_individual_model(model_name, base_path):
-    config = 'model_config_list: {\n'
-    config += '  config: {\n'
-    config += '    name: "{}",\n'.format(model_name)
-    config += '    base_path: "{}",\n'.format(base_path)
-    config += '    model_platform: "tensorflow"\n'
-    config += '  }\n'
-    config += '}\n'
+    config = "model_config_list: {\n"
+    config += "  config: {\n"
+    config += "    name: '{}',\n".format(model_name)
+    config += "    base_path: '{}',\n".format(base_path)
+    config += "    model_platform: 'tensorflow'\n"
+    config += "  }\n"
+    config += "}\n"
     return config
 
 
@@ -91,29 +91,29 @@ def create_tfs_config(
 ):
     models = find_models()
     if not models:
-        raise ValueError('no SavedModel bundles found!')
+        raise ValueError("no SavedModel bundles found!")
 
-    if tfs_default_model_name == 'None':
+    if tfs_default_model_name == "None":
         default_model = os.path.basename(models[0])
         if default_model:
             tfs_default_model_name = default_model
-            log.info('using default model name: {}'.format(tfs_default_model_name))
+            log.info("using default model name: {}".format(tfs_default_model_name))
         else:
-            log.info('no default model detected')
+            log.info("no default model detected")
 
     # config (may) include duplicate 'config' keys, so we can't just dump a dict
-    config = 'model_config_list: {\n'
+    config = "model_config_list: {\n"
     for m in models:
-        config += '  config: {\n'
-        config += '    name: "{}",\n'.format(os.path.basename(m))
-        config += '    base_path: "{}",\n'.format(m)
-        config += '    model_platform: "tensorflow"\n'
-        config += '  }\n'
-    config += '}\n'
+        config += "  config: {\n"
+        config += "    name: '{}',\n".format(os.path.basename(m))
+        config += "    base_path: '{}',\n".format(m)
+        config += "    model_platform: 'tensorflow'\n"
+        config += "  }\n"
+    config += "}\n"
 
-    log.info('tensorflow serving model config: \n%s\n', config)
+    log.info("tensorflow serving model config: \n%s\n", config)
 
-    with open(tfs_config_path, 'w') as f:
+    with open(tfs_config_path, "w") as f:
         f.write(config)
 
 
@@ -133,12 +133,12 @@ def tfs_command(tfs_grpc_port,
 
 
 def find_models():
-    base_path = '/opt/ml/model'
+    base_path = "/opt/ml/model"
     models = []
     for f in _find_saved_model_files(base_path):
-        parts = f.split('/')
-        if len(parts) >= 6 and re.match(r'^\d+$', parts[-2]):
-            model_path = '/'.join(parts[0:-2])
+        parts = f.split("/")
+        if len(parts) >= 6 and re.match(r"^\d+$", parts[-2]):
+            model_path = "/".join(parts[0:-2])
             if model_path not in models:
                 models.append(model_path)
         return models
@@ -149,7 +149,7 @@ def _find_saved_model_files(path):
         if e.is_dir():
             yield from _find_saved_model_files(os.path.join(path, e.name))
         else:
-            if e.name == 'saved_model.pb':
+            if e.name == "saved_model.pb":
                 yield os.path.join(path, e.name)
 
 
@@ -171,39 +171,39 @@ def __init__(self, key, env_var, value, defaulted_message):
 
     cpu_count = multiprocessing.cpu_count()
     batching_parameters = [
-        _BatchingParameter('max_batch_size', 'SAGEMAKER_TFS_MAX_BATCH_SIZE', 8,
+        _BatchingParameter("max_batch_size", "SAGEMAKER_TFS_MAX_BATCH_SIZE", 8,
                            "max_batch_size defaulted to {}. Set {} to override default. "
                            "Tuning this parameter may yield better performance."),
-        _BatchingParameter('batch_timeout_micros', 'SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS', 1000,
+        _BatchingParameter("batch_timeout_micros", "SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS", 1000,
                            "batch_timeout_micros defaulted to {}. Set {} to override "
                            "default. Tuning this parameter may yield better performance."),
-        _BatchingParameter('num_batch_threads', 'SAGEMAKER_TFS_NUM_BATCH_THREADS',
+        _BatchingParameter("num_batch_threads", "SAGEMAKER_TFS_NUM_BATCH_THREADS",
                            cpu_count, "num_batch_threads defaulted to {},"
                                       "the number of CPUs. Set {} to override default."),
-        _BatchingParameter('max_enqueued_batches', 'SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES',
+        _BatchingParameter("max_enqueued_batches", "SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES",
                            # Batch limits number of concurrent requests, which limits number
                            # of enqueued batches, so this can be set high for Batch
-                           100000000 if 'SAGEMAKER_BATCH' in os.environ else cpu_count,
+                           100000000 if "SAGEMAKER_BATCH" in os.environ else cpu_count,
                            "max_enqueued_batches defaulted to {}. Set {} to override default. "
                            "Tuning this parameter may be necessary to tune out-of-memory "
                            "errors occur."),
     ]
 
-    warning_message = ''
+    warning_message = ""
     for batching_parameter in batching_parameters:
         if batching_parameter.env_var in os.environ:
             batching_parameter.value = os.environ[batching_parameter.env_var]
         else:
             warning_message += batching_parameter.defaulted_message.format(
                 batching_parameter.value, batching_parameter.env_var)
-            warning_message += '\n'
+            warning_message += "\n"
     if warning_message:
         log.warning(warning_message)
 
-    config = ''
+    config = ""
     for batching_parameter in batching_parameters:
-        config += '%s { value: %s }\n' % (batching_parameter.key, batching_parameter.value)
+        config += "%s { value: %s }\n" % (batching_parameter.key, batching_parameter.value)
 
-    log.info('batching config: \n%s\n', config)
-    with open(batching_config_file, 'w') as f:
+    log.info("batching config: \n%s\n", config)
+    with open(batching_config_file, "w") as f:
         f.write(config)
diff --git a/scripts/shared.sh b/scripts/shared.sh
index 57be36dd..3a92382b 100755
--- a/scripts/shared.sh
+++ b/scripts/shared.sh
@@ -25,7 +25,7 @@ function get_short_version() {
 }
 
 function get_aws_account() {
-    aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text
+    aws --region us-west-2 sts --endpoint-url https://sts.us-west-2.amazonaws.com get-caller-identity --query 'Account' --output text
 }
 
 function get_ei_executable() {
diff --git a/test/integration/local/conftest.py b/test/integration/local/conftest.py
index 903885e2..86d97e2e 100644
--- a/test/integration/local/conftest.py
+++ b/test/integration/local/conftest.py
@@ -13,43 +13,43 @@
 
 import pytest
 
-FRAMEWORK_LATEST_VERSION = '1.13'
-TFS_DOCKER_BASE_NAME = 'sagemaker-tensorflow-serving'
+FRAMEWORK_LATEST_VERSION = "1.13"
+TFS_DOCKER_BASE_NAME = "sagemaker-tensorflow-serving"
 
 
 def pytest_addoption(parser):
-    parser.addoption('--docker-base-name', default=TFS_DOCKER_BASE_NAME)
-    parser.addoption('--framework-version', default=FRAMEWORK_LATEST_VERSION, required=True)
-    parser.addoption('--processor', default='cpu', choices=['cpu', 'gpu'])
-    parser.addoption('--tag')
+    parser.addoption("--docker-base-name", default=TFS_DOCKER_BASE_NAME)
+    parser.addoption("--framework-version", default=FRAMEWORK_LATEST_VERSION, required=True)
+    parser.addoption("--processor", default="cpu", choices=["cpu", "gpu"])
+    parser.addoption("--tag")
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def docker_base_name(request):
-    return request.config.getoption('--docker-base-name')
+    return request.config.getoption("--docker-base-name")
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def framework_version(request):
-    return request.config.getoption('--framework-version')
+    return request.config.getoption("--framework-version")
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def processor(request):
-    return request.config.getoption('--processor')
+    return request.config.getoption("--processor")
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def runtime_config(request, processor):
-    if processor == 'gpu':
-        return '--runtime=nvidia '
+    if processor == "gpu":
+        return "--runtime=nvidia "
     else:
-        return ''
+        return ""
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def tag(request, framework_version, processor):
-    image_tag = request.config.getoption('--tag')
+    image_tag = request.config.getoption("--tag")
     if not image_tag:
-        image_tag = '{}-{}'.format(framework_version, processor)
+        image_tag = "{}-{}".format(framework_version, processor)
     return image_tag
diff --git a/test/integration/local/multi_model_endpoint_test_utils.py b/test/integration/local/multi_model_endpoint_test_utils.py
index bce63250..08802dd6 100644
--- a/test/integration/local/multi_model_endpoint_test_utils.py
+++ b/test/integration/local/multi_model_endpoint_test_utils.py
@@ -15,23 +15,23 @@
 import json
 import requests
 
-INVOCATION_URL = 'http://localhost:8080/models/{}/invoke'
-MODELS_URL = 'http://localhost:8080/models'
-DELETE_MODEL_URL = 'http://localhost:8080/models/{}'
+INVOCATION_URL = "http://localhost:8080/models/{}/invoke"
+MODELS_URL = "http://localhost:8080/models"
+DELETE_MODEL_URL = "http://localhost:8080/models/{}"
 
 
-def make_headers(content_type='application/json', method='predict'):
+def make_headers(content_type="application/json", method="predict"):
     headers = {
-        'Content-Type': content_type,
-        'X-Amzn-SageMaker-Custom-Attributes': 'tfs-method=%s' % method
+        "Content-Type": content_type,
+        "X-Amzn-SageMaker-Custom-Attributes": "tfs-method=%s" % method
     }
     return headers
 
 
-def make_invocation_request(data, model_name, content_type='application/json'):
+def make_invocation_request(data, model_name, content_type="application/json"):
     headers = {
-        'Content-Type': content_type,
-        'X-Amzn-SageMaker-Custom-Attributes': 'tfs-method=predict'
+        "Content-Type": content_type,
+        "X-Amzn-SageMaker-Custom-Attributes": "tfs-method=predict"
     }
     response = requests.post(INVOCATION_URL.format(model_name), data=data, headers=headers)
     return response.status_code, response.content.decode(encodings.utf_8.getregentry().name)
@@ -43,13 +43,13 @@ def make_list_model_request():
 
 
 def make_get_model_request(model_name):
-    response = requests.get(MODELS_URL + '/{}'.format(model_name))
+    response = requests.get(MODELS_URL + "/{}".format(model_name))
     return response.status_code, response.content.decode(encodings.utf_8.getregentry().name)
 
 
-def make_load_model_request(data, content_type='application/json'):
+def make_load_model_request(data, content_type="application/json"):
     headers = {
-        'Content-Type': content_type
+        "Content-Type": content_type
     }
     response = requests.post(MODELS_URL, data=data, headers=headers)
     return response.status_code, response.content.decode(encodings.utf_8.getregentry().name)
diff --git a/test/integration/local/test_container.py b/test/integration/local/test_container.py
index 32602939..00a82ecf 100644
--- a/test/integration/local/test_container.py
+++ b/test/integration/local/test_container.py
@@ -20,37 +20,37 @@
 import pytest
 import requests
 
-BASE_URL = 'http://localhost:8080/invocations'
+BASE_URL = "http://localhost:8080/invocations"
 
 
-@pytest.fixture(scope='session', autouse=True)
+@pytest.fixture(scope="session", autouse=True)
 def volume():
     try:
-        model_dir = os.path.abspath('test/resources/models')
+        model_dir = os.path.abspath("test/resources/models")
         subprocess.check_call(
-            'docker volume create --name model_volume --opt type=none '
-            '--opt device={} --opt o=bind'.format(model_dir).split())
+            "docker volume create --name model_volume --opt type=none "
+            "--opt device={} --opt o=bind".format(model_dir).split())
         yield model_dir
     finally:
-        subprocess.check_call('docker volume rm model_volume'.split())
+        subprocess.check_call("docker volume rm model_volume".split())
 
 
-@pytest.fixture(scope='module', autouse=True, params=[True, False])
+@pytest.fixture(scope="module", autouse=True, params=[True, False])
 def container(request, docker_base_name, tag, runtime_config):
     try:
         if request.param:
-            batching_config = ' -e SAGEMAKER_TFS_ENABLE_BATCHING=true'
+            batching_config = " -e SAGEMAKER_TFS_ENABLE_BATCHING=true"
         else:
-            batching_config = ''
+            batching_config = ""
         command = (
-            'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
-            ' --mount type=volume,source=model_volume,target=/opt/ml/model,readonly'
-            ' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three'
-            ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
-            ' -e SAGEMAKER_BIND_TO_PORT=8080'
-            ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-            ' {}'
-            ' {}:{} serve'
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source=model_volume,target=/opt/ml/model,readonly"
+            " -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_BIND_TO_PORT=8080"
+            " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
+            " {}"
+            " {}:{} serve"
         ).format(runtime_config, batching_config, docker_base_name, tag)
 
         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
@@ -60,7 +60,7 @@ def container(request, docker_base_name, tag, runtime_config):
         while attempts < 40:
             time.sleep(3)
             try:
-                res_code = requests.get('http://localhost:8080/ping').status_code
+                res_code = requests.get("http://localhost:8080/ping").status_code
                 if res_code == 200:
                     break
             except:
@@ -69,126 +69,126 @@ def container(request, docker_base_name, tag, runtime_config):
 
         yield proc.pid
     finally:
-        subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split())
+        subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 
 
-def make_request(data, content_type='application/json', method='predict'):
+def make_request(data, content_type="application/json", method="predict"):
     headers = {
-        'Content-Type': content_type,
-        'X-Amzn-SageMaker-Custom-Attributes':
-            'tfs-model-name=half_plus_three,tfs-method=%s' % method
+        "Content-Type": content_type,
+        "X-Amzn-SageMaker-Custom-Attributes":
+            "tfs-model-name=half_plus_three,tfs-method=%s" % method
     }
     response = requests.post(BASE_URL, data=data, headers=headers)
-    return json.loads(response.content.decode('utf-8'))
+    return json.loads(response.content.decode("utf-8"))
 
 
 def test_predict():
     x = {
-        'instances': [1.0, 2.0, 5.0]
+        "instances": [1.0, 2.0, 5.0]
     }
 
     y = make_request(json.dumps(x))
-    assert y == {'predictions': [3.5, 4.0, 5.5]}
+    assert y == {"predictions": [3.5, 4.0, 5.5]}
 
 
 def test_predict_twice():
     x = {
-        'instances': [1.0, 2.0, 5.0]
+        "instances": [1.0, 2.0, 5.0]
     }
 
     y = make_request(json.dumps(x))
     z = make_request(json.dumps(x))
-    assert y == {'predictions': [3.5, 4.0, 5.5]}
-    assert z == {'predictions': [3.5, 4.0, 5.5]}
+    assert y == {"predictions": [3.5, 4.0, 5.5]}
+    assert z == {"predictions": [3.5, 4.0, 5.5]}
 
 
 def test_predict_two_instances():
     x = {
-        'instances': [[1.0, 2.0, 5.0], [1.0, 2.0, 5.0]]
+        "instances": [[1.0, 2.0, 5.0], [1.0, 2.0, 5.0]]
     }
 
     y = make_request(json.dumps(x))
-    assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
+    assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
 
 
 def test_predict_jsons_json_content_type():
-    x = '[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]'
+    x = "[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]"
     y = make_request(x)
-    assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
+    assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
 
 
 def test_predict_jsonlines():
-    x = '[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]'
-    y = make_request(x, 'application/jsonlines')
-    assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
+    x = "[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]"
+    y = make_request(x, "application/jsonlines")
+    assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
 
 
 def test_predict_jsons():
-    x = '[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]'
-    y = make_request(x, 'application/jsons')
-    assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
+    x = "[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]"
+    y = make_request(x, "application/jsons")
+    assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
 
 
 def test_predict_jsons_2():
-    x = '{"x": [1.0, 2.0, 5.0]}\n{"x": [1.0, 2.0, 5.0]}'
+    x = "{'x': [1.0, 2.0, 5.0]}\n{'x': [1.0, 2.0, 5.0]}"
     y = make_request(x)
-    assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
+    assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
 
 
 def test_predict_generic_json():
     x = [1.0, 2.0, 5.0]
     y = make_request(json.dumps(x))
-    assert y == {'predictions': [[3.5, 4.0, 5.5]]}
+    assert y == {"predictions": [[3.5, 4.0, 5.5]]}
 
 
 def test_predict_generic_json_two_instances():
     x = [[1.0, 2.0, 5.0], [1.0, 2.0, 5.0]]
     y = make_request(json.dumps(x))
-    assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
+    assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
 
 
 def test_predict_csv():
-    x = '1.0'
-    y = make_request(x, 'text/csv')
-    assert y == {'predictions': [3.5]}
+    x = "1.0"
+    y = make_request(x, "text/csv")
+    assert y == {"predictions": [3.5]}
 
 
 def test_predict_csv_with_zero():
-    x = '0.0'
-    y = make_request(x, 'text/csv')
-    assert y == {'predictions': [3.0]}
+    x = "0.0"
+    y = make_request(x, "text/csv")
+    assert y == {"predictions": [3.0]}
 
 
 def test_predict_csv_one_instance_three_values_with_zero():
-    x = '0.0,2.0,5.0'
-    y = make_request(x, 'text/csv')
-    assert y == {'predictions': [[3.0, 4.0, 5.5]]}
+    x = "0.0,2.0,5.0"
+    y = make_request(x, "text/csv")
+    assert y == {"predictions": [[3.0, 4.0, 5.5]]}
 
 
 def test_predict_csv_one_instance_three_values():
-    x = '1.0,2.0,5.0'
-    y = make_request(x, 'text/csv')
-    assert y == {'predictions': [[3.5, 4.0, 5.5]]}
+    x = "1.0,2.0,5.0"
+    y = make_request(x, "text/csv")
+    assert y == {"predictions": [[3.5, 4.0, 5.5]]}
 
 
 def test_predict_csv_two_instances_three_values():
-    x = '1.0,2.0,5.0\n1.0,2.0,5.0'
-    y = make_request(x, 'text/csv')
-    assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
+    x = "1.0,2.0,5.0\n1.0,2.0,5.0"
+    y = make_request(x, "text/csv")
+    assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
 
 
 def test_predict_csv_three_instances():
-    x = '1.0\n2.0\n5.0'
-    y = make_request(x, 'text/csv')
-    assert y == {'predictions': [3.5, 4.0, 5.5]}
+    x = "1.0\n2.0\n5.0"
+    y = make_request(x, "text/csv")
+    assert y == {"predictions": [3.5, 4.0, 5.5]}
 
 
 def test_predict_csv_wide_categorical_input():
-    x = ('0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0\n'   # noqa
-         '0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,6.0,0.0\n')  # noqa
+    x = ("0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0\n"   # noqa
+         "0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,6.0,0.0\n")  # noqa
 
-    y = make_request(x, 'text/csv')
-    predictions = y['predictions']
+    y = make_request(x, "text/csv")
+    predictions = y["predictions"]
 
     assert 2 == len(predictions)
     assert 30 == len(predictions[0])
@@ -198,12 +198,12 @@ def test_predict_csv_wide_categorical_input():
 
 def test_regress():
     x = {
-        'signature_name': 'tensorflow/serving/regress',
-        'examples': [{'x': 1.0}, {'x': 2.0}]
+        "signature_name": "tensorflow/serving/regress",
+        "examples": [{"x": 1.0}, {"x": 2.0}]
     }
 
-    y = make_request(json.dumps(x), method='regress')
-    assert y == {'results': [3.5, 4.0]}
+    y = make_request(json.dumps(x), method="regress")
+    assert y == {"results": [3.5, 4.0]}
 
 
 def test_regress_one_instance():
@@ -211,48 +211,48 @@ def test_regress_one_instance():
     # but it is actually 'results'
     # this test will catch if they change api to match docs (unlikely)
     x = {
-        'signature_name': 'tensorflow/serving/regress',
-        'examples': [{'x': 1.0}]
+        "signature_name": "tensorflow/serving/regress",
+        "examples": [{"x": 1.0}]
     }
 
-    y = make_request(json.dumps(x), method='regress')
-    assert y == {'results': [3.5]}
+    y = make_request(json.dumps(x), method="regress")
+    assert y == {"results": [3.5]}
 
 
 def test_predict_bad_input():
-    y = make_request('whatever')
-    assert 'error' in y
+    y = make_request("whatever")
+    assert "error" in y
 
 
 def test_predict_bad_input_instances():
-    x = json.dumps({'junk': 'data'})
+    x = json.dumps({"junk": "data"})
     y = make_request(x)
-    assert y['error'].startswith('Failed to process element: 0 key: junk of \'instances\' list.')
+    assert y["error"].startswith("Failed to process element: 0 key: junk of \'instances\' list.")
 
 
 def test_predict_no_custom_attributes_header():
     x = {
-        'instances': [1.0, 2.0, 5.0]
+        "instances": [1.0, 2.0, 5.0]
     }
 
     headers = {
         'Content-Type': 'application/json'
     }
     response = requests.post(BASE_URL, data=json.dumps(x), headers=headers)
-    y = json.loads(response.content.decode('utf-8'))
+    y = json.loads(response.content.decode("utf-8"))
 
-    assert y == {'predictions': [3.5, 4.0, 5.5]}
+    assert y == {"predictions": [3.5, 4.0, 5.5]}
 
 
 def test_predict_with_jsonlines():
     x = {
-        'instances': [1.0, 2.0, 5.0]
+        "instances": [1.0, 2.0, 5.0]
     }
 
     headers = {
-        'Content-Type': 'application/json',
-        'Accept':  'application/jsonlines'
+        "Content-Type": "application/json",
+        "Accept":  "application/jsonlines"
     }
     response = requests.post(BASE_URL, data=json.dumps(x), headers=headers)
-    assert response.headers['Content-Type'] == 'application/jsonlines'
-    assert response.content.decode('utf-8') == '{    "predictions": [3.5, 4.0, 5.5    ]}'
+    assert response.headers["Content-Type"] == "application/jsonlines"
+    assert response.content.decode("utf-8") == "{    'predictions': [3.5, 4.0, 5.5    ]}"
diff --git a/test/integration/local/test_multi_model_endpoint.py b/test/integration/local/test_multi_model_endpoint.py
index 811c531a..c1984bb6 100644
--- a/test/integration/local/test_multi_model_endpoint.py
+++ b/test/integration/local/test_multi_model_endpoint.py
@@ -23,32 +23,32 @@
 from multi_model_endpoint_test_utils import make_invocation_request, make_list_model_request, \
     make_get_model_request, make_load_model_request, make_unload_model_request
 
-PING_URL = 'http://localhost:8080/ping'
+PING_URL = "http://localhost:8080/ping"
 
 
-@pytest.fixture(scope='session', autouse=True)
+@pytest.fixture(scope="session", autouse=True)
 def volume():
     try:
-        model_dir = os.path.abspath('test/resources/mme')
+        model_dir = os.path.abspath("test/resources/mme")
         subprocess.check_call(
-           'docker volume create --name dynamic_endpoint_model_volume --opt type=none '
-           '--opt device={} --opt o=bind'.format(model_dir).split())
+           "docker volume create --name dynamic_endpoint_model_volume --opt type=none "
+           "--opt device={} --opt o=bind".format(model_dir).split())
         yield model_dir
     finally:
-        subprocess.check_call('docker volume rm dynamic_endpoint_model_volume'.split())
+        subprocess.check_call("docker volume rm dynamic_endpoint_model_volume".split())
 
 
-@pytest.fixture(scope='module', autouse=True)
+@pytest.fixture(scope="module", autouse=True)
 def container(request, docker_base_name, tag, runtime_config):
     try:
         command = (
-            'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
-            ' --mount type=volume,source=dynamic_endpoint_model_volume,target=/opt/ml/models,readonly'
-            ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
-            ' -e SAGEMAKER_BIND_TO_PORT=8080'
-            ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-            ' -e SAGEMAKER_MULTI_MODEL=true'
-            ' {}:{} serve'
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source=dynamic_endpoint_model_volume,target=/opt/ml/models,readonly"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_BIND_TO_PORT=8080"
+            " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
+            " -e SAGEMAKER_MULTI_MODEL=true"
+            " {}:{} serve"
         ).format(runtime_config, docker_base_name, tag)
 
         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
@@ -57,7 +57,7 @@ def container(request, docker_base_name, tag, runtime_config):
         while attempts < 40:
             time.sleep(3)
             try:
-                res_code = requests.get('http://localhost:8080/ping').status_code
+                res_code = requests.get("http://localhost:8080/ping").status_code
                 if res_code == 200:
                     break
             except:
@@ -66,7 +66,7 @@ def container(request, docker_base_name, tag, runtime_config):
 
         yield proc.pid
     finally:
-        subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split())
+        subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 
 
 def test_ping():
@@ -76,9 +76,9 @@ def test_ping():
 
 def test_container_start_invocation_fail():
     x = {
-        'instances': [1.0, 2.0, 5.0]
+        "instances": [1.0, 2.0, 5.0]
     }
-    code, y = make_invocation_request(json.dumps(x), 'half_plus_three')
+    code, y = make_invocation_request(json.dumps(x), "half_plus_three")
     y = json.loads(y)
     assert code == 404
     assert "Model half_plus_three is not loaded yet." in str(y)
@@ -93,28 +93,28 @@ def test_list_models_empty():
 
 def test_delete_unloaded_model():
     # unloads the given model/version, no-op if not loaded
-    model_name = 'non-existing-model'
+    model_name = "non-existing-model"
     code, res = make_unload_model_request(model_name)
     assert code == 404
-    assert 'Model {} is not loaded yet'.format(model_name) in res
+    assert "Model {} is not loaded yet".format(model_name) in res
 
 
 def test_delete_model():
-    model_name = 'half_plus_three'
+    model_name = "half_plus_three"
     model_data = {
-        'model_name': model_name,
-        'url': '/opt/ml/models/half_plus_three'
+        "model_name": model_name,
+        "url": "/opt/ml/models/half_plus_three"
     }
     code, res = make_load_model_request(json.dumps(model_data))
     assert code == 200
-    assert 'Successfully loaded model {}'.format(model_name) in res
+    assert "Successfully loaded model {}".format(model_name) in res
 
     x = {
-        'instances': [1.0, 2.0, 5.0]
+        "instances": [1.0, 2.0, 5.0]
     }
     _, y = make_invocation_request(json.dumps(x), model_name)
     y = json.loads(y)
-    assert y == {'predictions': [3.5, 4.0, 5.5]}
+    assert y == {"predictions": [3.5, 4.0, 5.5]}
 
     code_unload, res2 = make_unload_model_request(model_name)
     assert code_unload == 200
@@ -122,43 +122,43 @@ def test_delete_model():
     code_invoke, y2 = make_invocation_request(json.dumps(x), model_name)
     y2 = json.loads(y2)
     assert code_invoke == 404
-    assert 'Model {} is not loaded yet.'.format(model_name) in str(y2)
+    assert "Model {} is not loaded yet.".format(model_name) in str(y2)
 
 
 def test_load_two_models():
-    model_name_1 = 'half_plus_two'
+    model_name_1 = "half_plus_two"
     model_data_1 = {
-        'model_name': model_name_1,
-        'url': '/opt/ml/models/half_plus_two'
+        "model_name": model_name_1,
+        "url": "/opt/ml/models/half_plus_two"
     }
     code1, res1 = make_load_model_request(json.dumps(model_data_1))
     assert code1 == 200
-    assert 'Successfully loaded model {}'.format(model_name_1) in res1
+    assert "Successfully loaded model {}".format(model_name_1) in res1
 
     # load second model
-    model_name_2 = 'half_plus_three'
+    model_name_2 = "half_plus_three"
     model_data_2 = {
-        'model_name': model_name_2,
-        'url': '/opt/ml/models/half_plus_three'
+        "model_name": model_name_2,
+        "url": "/opt/ml/models/half_plus_three"
     }
     code2, res2 = make_load_model_request(json.dumps(model_data_2))
     assert code2 == 200
-    assert 'Successfully loaded model {}'.format(model_name_2) in res2
+    assert "Successfully loaded model {}".format(model_name_2) in res2
 
     # make invocation request to the first model
     x = {
-        'instances': [1.0, 2.0, 5.0]
+        "instances": [1.0, 2.0, 5.0]
     }
     code_invoke1, y1 = make_invocation_request(json.dumps(x), model_name_1)
     y1 = json.loads(y1)
     assert code_invoke1 == 200
-    assert y1 == {'predictions': [2.5, 3.0, 4.5]}
+    assert y1 == {"predictions": [2.5, 3.0, 4.5]}
 
     # make invocation request to the second model
-    code_invoke2, y2 = make_invocation_request(json.dumps(x), 'half_plus_three')
+    code_invoke2, y2 = make_invocation_request(json.dumps(x), "half_plus_three")
     y2 = json.loads(y2)
     assert code_invoke2 == 200
-    assert y2 == {'predictions': [3.5, 4.0, 5.5]}
+    assert y2 == {"predictions": [3.5, 4.0, 5.5]}
 
     code_list, res3 = make_list_model_request()
     res3 = json.loads(res3)
@@ -166,48 +166,48 @@ def test_load_two_models():
 
 
 def test_load_one_model_two_times():
-    model_name = 'cifar'
+    model_name = "cifar"
     model_data = {
-        'model_name': model_name,
-        'url': '/opt/ml/models/cifar'
+        "model_name": model_name,
+        "url": "/opt/ml/models/cifar"
     }
     code_load, res = make_load_model_request(json.dumps(model_data))
     assert code_load == 200
-    assert 'Successfully loaded model {}'.format(model_name) in res
+    assert "Successfully loaded model {}".format(model_name) in res
 
     code_load2, res2 = make_load_model_request(json.dumps(model_data))
     assert code_load2 == 409
-    assert'Model {} is already loaded'.format(model_name) in res2
+    assert "Model {} is already loaded".format(model_name) in res2
 
 
 def test_load_non_existing_model():
-    model_name = 'non-existing'
-    base_path = '/opt/ml/models/non-existing'
+    model_name = "non-existing"
+    base_path = "/opt/ml/models/non-existing"
     model_data = {
-        'model_name': model_name,
-        'url': base_path
+        "model_name": model_name,
+        "url": base_path
     }
     code, res = make_load_model_request(json.dumps(model_data))
     assert code == 404
-    assert 'Could not find valid base path {} for servable {}'.format(base_path, model_name) in str(res)
+    assert "Could not find valid base path {} for servable {}".format(base_path, model_name) in str(res)
 
 
 def test_bad_model_reqeust():
     bad_model_data = {
-        'model_name': 'model_name',
-        'uri': '/opt/ml/models/non-existing'
+        "model_name": "model_name",
+        "uri": "/opt/ml/models/non-existing"
     }
     code, _ = make_load_model_request(json.dumps(bad_model_data))
     assert code == 500
 
 
 def test_invalid_model_version():
-    model_name = 'invalid_version'
-    base_path = '/opt/ml/models/invalid_version'
+    model_name = "invalid_version"
+    base_path = "/opt/ml/models/invalid_version"
     invalid_model_version_data = {
-        'model_name': model_name,
-        'url': base_path
+        "model_name": model_name,
+        "url": base_path
     }
     code, res = make_load_model_request(json.dumps(invalid_model_version_data))
     assert code == 404
-    assert 'Could not find valid base path {} for servable {}'.format(base_path, model_name) in str(res)
+    assert "Could not find valid base path {} for servable {}".format(base_path, model_name) in str(res)
diff --git a/test/integration/local/test_pre_post_processing.py b/test/integration/local/test_pre_post_processing.py
index 3d72b612..e2ec89cc 100644
--- a/test/integration/local/test_pre_post_processing.py
+++ b/test/integration/local/test_pre_post_processing.py
@@ -22,42 +22,42 @@
 import requests
 
 
-PING_URL = 'http://localhost:8080/ping'
-INVOCATIONS_URL = 'http://localhost:8080/invocations'
+PING_URL = "http://localhost:8080/ping"
+INVOCATIONS_URL = "http://localhost:8080/invocations"
 
 
-@pytest.fixture(scope='module', autouse=True, params=['1', '2', '3', '4', '5'])
+@pytest.fixture(scope="module", autouse=True, params=["1", "2", "3", "4", "5"])
 def volume(tmpdir_factory, request):
     try:
         print(str(tmpdir_factory))
-        model_dir = os.path.join(tmpdir_factory.mktemp('test'), 'model')
-        code_dir = os.path.join(model_dir, 'code')
-        test_example = 'test/resources/examples/test{}'.format(request.param)
+        model_dir = os.path.join(tmpdir_factory.mktemp("test"), "model")
+        code_dir = os.path.join(model_dir, "code")
+        test_example = "test/resources/examples/test{}".format(request.param)
 
-        model_src_dir = 'test/resources/models'
+        model_src_dir = "test/resources/models"
         shutil.copytree(model_src_dir, model_dir)
         shutil.copytree(test_example, code_dir)
 
-        volume_name = f'model_volume_{request.param}'
+        volume_name = f"model_volume_{request.param}"
         subprocess.check_call(
-            'docker volume create --name {} --opt type=none '
-            '--opt device={} --opt o=bind'.format(volume_name, model_dir).split())
+            "docker volume create --name {} --opt type=none "
+            "--opt device={} --opt o=bind".format(volume_name, model_dir).split())
         yield volume_name
     finally:
-        subprocess.check_call(f'docker volume rm {volume_name}'.split())
+        subprocess.check_call(f"docker volume rm {volume_name}".split())
 
 
-@pytest.fixture(scope='module', autouse=True)
+@pytest.fixture(scope="module", autouse=True)
 def container(volume, docker_base_name, tag, runtime_config):
     try:
         command = (
-            'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
-            ' --mount type=volume,source={},target=/opt/ml/model,readonly'
-            ' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three'
-            ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
-            ' -e SAGEMAKER_BIND_TO_PORT=8080'
-            ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-            ' {}:{} serve'
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source={},target=/opt/ml/model,readonly"
+            " -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_BIND_TO_PORT=8080"
+            " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
+            " {}:{} serve"
         ).format(runtime_config, volume, docker_base_name, tag)
 
         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
@@ -66,7 +66,7 @@ def container(volume, docker_base_name, tag, runtime_config):
         while attempts < 40:
             time.sleep(3)
             try:
-                res_code = requests.get('http://localhost:8080/ping').status_code
+                res_code = requests.get("http://localhost:8080/ping").status_code
                 if res_code == 200:
                     break
             except:
@@ -75,56 +75,56 @@ def container(volume, docker_base_name, tag, runtime_config):
 
         yield proc.pid
     finally:
-        subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split())
+        subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 
 
 def make_headers(content_type, method):
     headers = {
-        'Content-Type': content_type,
-        'X-Amzn-SageMaker-Custom-Attributes': 'tfs-model-name=half_plus_three,tfs-method=%s' % method
+        "Content-Type": content_type,
+        "X-Amzn-SageMaker-Custom-Attributes": "tfs-model-name=half_plus_three,tfs-method=%s" % method
     }
     return headers
 
 
 def test_predict_json():
-    headers = make_headers('application/json', 'predict')
-    data = '{"instances": [1.0, 2.0, 5.0]}'
+    headers = make_headers("application/json", "predict")
+    data = "{'instances': [1.0, 2.0, 5.0]}"
     response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json()
-    assert response == {'predictions': [3.5, 4.0, 5.5]}
+    assert response == {"predictions": [3.5, 4.0, 5.5]}
 
 
 def test_zero_content():
-    headers = make_headers('application/json', 'predict')
-    data = ''
+    headers = make_headers("application/json", "predict")
+    data = ""
     response = requests.post(INVOCATIONS_URL, data=data, headers=headers)
     assert 500 == response.status_code
-    assert 'document is empty' in response.text
+    assert "document is empty" in response.text
 
 
 def test_large_input():
-    headers = make_headers('text/csv', 'predict')
-    data_file = 'test/resources/inputs/test-large.csv'
+    headers = make_headers("text/csv", "predict")
+    data_file = "test/resources/inputs/test-large.csv"
 
-    with open(data_file, 'r') as file:
+    with open(data_file, "r") as file:
         large_data = file.read()
         response = requests.post(INVOCATIONS_URL, data=large_data, headers=headers).json()
-        predictions = response['predictions']
+        predictions = response["predictions"]
         assert len(predictions) == 753936
 
 
 def test_csv_input():
-    headers = make_headers('text/csv', 'predict')
-    data = '1.0,2.0,5.0'
+    headers = make_headers("text/csv", "predict")
+    data = "1.0,2.0,5.0"
     response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json()
-    assert response == {'predictions': [3.5, 4.0, 5.5]}
+    assert response == {"predictions": [3.5, 4.0, 5.5]}
 
 
 def test_unsupported_content_type():
-    headers = make_headers('unsupported-type', 'predict')
-    data = 'aW1hZ2UgYnl0ZXM='
+    headers = make_headers("unsupported-type", "predict")
+    data = "aW1hZ2UgYnl0ZXM="
     response = requests.post(INVOCATIONS_URL, data=data, headers=headers)
     assert 500 == response.status_code
-    assert 'unsupported content type' in response.text
+    assert "unsupported content type" in response.text
 
 
 def test_ping_service():
diff --git a/test/integration/local/test_pre_post_processing_mme.py b/test/integration/local/test_pre_post_processing_mme.py
index 2fce6063..f4c8730d 100644
--- a/test/integration/local/test_pre_post_processing_mme.py
+++ b/test/integration/local/test_pre_post_processing_mme.py
@@ -26,43 +26,43 @@
     make_get_model_request, make_load_model_request, make_unload_model_request, make_headers
 
 
-PING_URL = 'http://localhost:8080/ping'
-INVOCATION_URL = 'http://localhost:8080/models/{}/invoke'
-MODEL_NAME = 'half_plus_three'
+PING_URL = "http://localhost:8080/ping"
+INVOCATION_URL = "http://localhost:8080/models/{}/invoke"
+MODEL_NAME = "half_plus_three"
 
 
-@pytest.fixture(scope='module', autouse=True)
+@pytest.fixture(scope="module", autouse=True)
 def volume(tmpdir_factory, request):
     try:
         print(str(tmpdir_factory))
-        model_dir = os.path.join(tmpdir_factory.mktemp('test'), 'model')
-        code_dir = os.path.join(model_dir, 'code')
-        test_example = 'test/resources/examples/test1'
+        model_dir = os.path.join(tmpdir_factory.mktemp("test"), "model")
+        code_dir = os.path.join(model_dir, "code")
+        test_example = "test/resources/examples/test1"
 
-        model_src_dir = 'test/resources/models'
+        model_src_dir = "test/resources/models"
         shutil.copytree(model_src_dir, model_dir)
         shutil.copytree(test_example, code_dir)
 
-        volume_name = f'model_volume_1'
+        volume_name = f"model_volume_1"
         subprocess.check_call(
-            'docker volume create --name {} --opt type=none '
-            '--opt device={} --opt o=bind'.format(volume_name, model_dir).split())
+            "docker volume create --name {} --opt type=none "
+            "--opt device={} --opt o=bind".format(volume_name, model_dir).split())
         yield volume_name
     finally:
-        subprocess.check_call(f'docker volume rm {volume_name}'.split())
+        subprocess.check_call(f"docker volume rm {volume_name}".split())
 
 
-@pytest.fixture(scope='module', autouse=True)
+@pytest.fixture(scope="module", autouse=True)
 def container(volume, docker_base_name, tag, runtime_config):
     try:
         command = (
-            'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
-            ' --mount type=volume,source={},target=/opt/ml/models,readonly'
-            ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
-            ' -e SAGEMAKER_BIND_TO_PORT=8080'
-            ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-            ' -e SAGEMAKER_MULTI_MODEL=True'
-            ' {}:{} serve'
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source={},target=/opt/ml/models,readonly"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_BIND_TO_PORT=8080"
+            " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
+            " -e SAGEMAKER_MULTI_MODEL=True"
+            " {}:{} serve"
         ).format(runtime_config, volume, docker_base_name, tag)
 
         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
@@ -71,7 +71,7 @@ def container(volume, docker_base_name, tag, runtime_config):
         while attempts < 40:
             time.sleep(3)
             try:
-                res_code = requests.get('http://localhost:8080/ping').status_code
+                res_code = requests.get("http://localhost:8080/ping").status_code
                 if res_code == 200:
                     break
             except:
@@ -80,14 +80,14 @@ def container(volume, docker_base_name, tag, runtime_config):
 
         yield proc.pid
     finally:
-        subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split())
+        subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 
 
 @pytest.fixture
 def model():
     model_data = {
-        'model_name': MODEL_NAME,
-        'url': '/opt/ml/models/half_plus_three'
+        "model_name": MODEL_NAME,
+        "url": "/opt/ml/models/half_plus_three"
     }
     make_load_model_request(json.dumps(model_data))
     return MODEL_NAME
@@ -100,40 +100,40 @@ def test_ping_service():
 
 def test_predict_json(model):
     headers = make_headers()
-    data = '{"instances": [1.0, 2.0, 5.0]}'
+    data = "{'instances': [1.0, 2.0, 5.0]}"
     response = requests.post(INVOCATION_URL.format(model), data=data, headers=headers).json()
-    assert response == {'predictions': [3.5, 4.0, 5.5]}
+    assert response == {"predictions": [3.5, 4.0, 5.5]}
 
 
 def test_zero_content():
     headers = make_headers()
-    x = ''
+    x = ""
     response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers)
     assert 500 == response.status_code
-    assert 'document is empty' in response.text
+    assert "document is empty" in response.text
 
 
 def test_large_input():
-    data_file = 'test/resources/inputs/test-large.csv'
+    data_file = "test/resources/inputs/test-large.csv"
 
-    with open(data_file, 'r') as file:
+    with open(data_file, "r") as file:
         x = file.read()
-        headers = make_headers(content_type='text/csv')
+        headers = make_headers(content_type="text/csv")
         response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers).json()
-        predictions = response['predictions']
+        predictions = response["predictions"]
         assert len(predictions) == 753936
 
 
 def test_csv_input():
-    headers = make_headers(content_type='text/csv')
-    data = '1.0,2.0,5.0'
+    headers = make_headers(content_type="text/csv")
+    data = "1.0,2.0,5.0"
     response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers).json()
-    assert response == {'predictions': [3.5, 4.0, 5.5]}
+    assert response == {"predictions": [3.5, 4.0, 5.5]}
 
 
 def test_unsupported_content_type():
-    headers = make_headers('unsupported-type', 'predict')
-    data = 'aW1hZ2UgYnl0ZXM='
+    headers = make_headers("unsupported-type", "predict")
+    data = "aW1hZ2UgYnl0ZXM="
     response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers)
     assert 500 == response.status_code
-    assert 'unsupported content type' in response.text
+    assert "unsupported content type" in response.text
diff --git a/test/integration/local/test_tfs_batching.py b/test/integration/local/test_tfs_batching.py
index 3460e08f..6584848d 100644
--- a/test/integration/local/test_tfs_batching.py
+++ b/test/integration/local/test_tfs_batching.py
@@ -19,42 +19,42 @@
 import pytest
 
 
-@pytest.fixture(scope='session', autouse=True)
+@pytest.fixture(scope="session", autouse=True)
 def volume():
     try:
-        model_dir = os.path.abspath('test/resources/models')
+        model_dir = os.path.abspath("test/resources/models")
         subprocess.check_call(
-            'docker volume create --name batching_model_volume --opt type=none '
-            '--opt device={} --opt o=bind'.format(model_dir).split())
+            "docker volume create --name batching_model_volume --opt type=none "
+            "--opt device={} --opt o=bind".format(model_dir).split())
         yield model_dir
     finally:
-        subprocess.check_call('docker volume rm batching_model_volume'.split())
+        subprocess.check_call("docker volume rm batching_model_volume".split())
 
 
 def test_run_tfs_with_batching_parameters(docker_base_name, tag, runtime_config):
     try:
         command = (
-            'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
-            ' --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly'
-            ' -e SAGEMAKER_TFS_ENABLE_BATCHING=true'
-            ' -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16'
-            ' -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500'
-            ' -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100'
-            ' -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1'
-            ' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three'
-            ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
-            ' -e SAGEMAKER_BIND_TO_PORT=8080'
-            ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-            ' {}:{} serve'
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly"
+            " -e SAGEMAKER_TFS_ENABLE_BATCHING=true"
+            " -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16"
+            " -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500"
+            " -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100"
+            " -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1"
+            " -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_BIND_TO_PORT=8080"
+            " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
+            " {}:{} serve"
         ).format(runtime_config, docker_base_name, tag)
 
         proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 
         lines_seen = {
-            'max_batch_size { value: 16 }': 0,
-            'batch_timeout_micros { value: 500 }': 0,
-            'num_batch_threads { value: 100 }': 0,
-            'max_enqueued_batches { value: 1 }': 0
+            "max_batch_size { value: 16 }": 0,
+            "batch_timeout_micros { value: 500 }": 0,
+            "num_batch_threads { value: 100 }": 0,
+            "max_enqueued_batches { value: 1 }": 0
         }
 
         for stdout_line in iter(proc.stdout.readline, ""):
@@ -68,4 +68,4 @@ def test_run_tfs_with_batching_parameters(docker_base_name, tag, runtime_config)
                 break
 
     finally:
-        subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split())
+        subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
diff --git a/test/integration/sagemaker/conftest.py b/test/integration/sagemaker/conftest.py
index 24b264b9..d4864600 100644
--- a/test/integration/sagemaker/conftest.py
+++ b/test/integration/sagemaker/conftest.py
@@ -19,105 +19,105 @@
 
 # these regions have some p2 and p3 instances, but not enough for automated testing
 NO_P2_REGIONS = [
-    'ca-central-1',
-    'eu-central-1',
-    'eu-west-2',
-    'us-west-1',
-    'eu-west-3',
-    'eu-north-1',
-    'sa-east-1',
-    'ap-east-1',
-    'me-south-1'
+    "ca-central-1",
+    "eu-central-1",
+    "eu-west-2",
+    "us-west-1",
+    "eu-west-3",
+    "eu-north-1",
+    "sa-east-1",
+    "ap-east-1",
+    "me-south-1"
 ]
 NO_P3_REGIONS = [
-    'ap-southeast-1',
-    'ap-southeast-2',
-    'ap-south-1',
-    'ca-central-1',
-    'eu-central-1',
-    'eu-west-2',
-    'us-west-1',
-    'eu-west-3',
-    'eu-north-1',
-    'sa-east-1',
-    'ap-east-1',
-    'me-south-1'
+    "ap-southeast-1",
+    "ap-southeast-2",
+    "ap-south-1",
+    "a-central-1",
+    "eu-central-1",
+    "eu-west-2",
+    "us-west-1",
+    "eu-west-3",
+    "eu-north-1",
+    "sa-east-1",
+    "ap-east-1",
+    "me-south-1"
 ]
 
 
 def pytest_addoption(parser):
-    parser.addoption('--region', default='us-west-2')
-    parser.addoption('--registry')
-    parser.addoption('--repo')
-    parser.addoption('--versions')
-    parser.addoption('--instance-types')
-    parser.addoption('--accelerator-type')
-    parser.addoption('--tag')
+    parser.addoption("--region", default="us-west-2")
+    parser.addoption("--registry")
+    parser.addoption("--repo")
+    parser.addoption("--versions")
+    parser.addoption("--instance-types")
+    parser.addoption("--accelerator-type")
+    parser.addoption("--tag")
 
 
 def pytest_configure(config):
-    os.environ['TEST_REGION'] = config.getoption('--region')
-    os.environ['TEST_VERSIONS'] = config.getoption('--versions') or '1.11.1,1.12.0,1.13.0'
-    os.environ['TEST_INSTANCE_TYPES'] = (config.getoption('--instance-types') or
-                                         'ml.m5.xlarge,ml.p2.xlarge')
+    os.environ["TEST_REGION"] = config.getoption("--region")
+    os.environ["TEST_VERSIONS"] = config.getoption("--versions") or "1.11.1,1.12.0,1.13.0"
+    os.environ["TEST_INSTANCE_TYPES"] = (config.getoption("--instance-types") or
+                                         "ml.m5.xlarge,ml.p2.xlarge")
 
-    os.environ['TEST_EI_VERSIONS'] = config.getoption('--versions') or '1.11,1.12'
-    os.environ['TEST_EI_INSTANCE_TYPES'] = (config.getoption('--instance-types') or
-                                            'ml.m5.xlarge')
+    os.environ["TEST_EI_VERSIONS"] = config.getoption("--versions") or "1.11,1.12"
+    os.environ["TEST_EI_INSTANCE_TYPES"] = (config.getoption("--instance-types") or
+                                            "ml.m5.xlarge")
 
-    if config.getoption('--tag'):
-        os.environ['TEST_VERSIONS'] = config.getoption('--tag')
-        os.environ['TEST_EI_VERSIONS'] = config.getoption('--tag')
+    if config.getoption("--tag"):
+        os.environ["TEST_VERSIONS"] = config.getoption("--tag")
+        os.environ["TEST_EI_VERSIONS"] = config.getoption("--tag")
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def region(request):
-    return request.config.getoption('--region')
+    return request.config.getoption("--region")
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def registry(request, region):
-    if request.config.getoption('--registry'):
-        return request.config.getoption('--registry')
+    if request.config.getoption("--registry"):
+        return request.config.getoption("--registry")
 
     sts = boto3.client(
-        'sts',
+        "sts",
         region_name=region,
-        endpoint_url='https://sts.{}.amazonaws.com'.format(region)
+        endpoint_url="https://sts.{}.amazonaws.com".format(region)
     )
-    return sts.get_caller_identity()['Account']
+    return sts.get_caller_identity()["Account"]
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def boto_session(region):
     return boto3.Session(region_name=region)
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def sagemaker_client(boto_session):
-    return boto_session.client('sagemaker')
+    return boto_session.client("sagemaker")
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def sagemaker_runtime_client(boto_session):
-    return boto_session.client('runtime.sagemaker')
+    return boto_session.client("runtime.sagemaker")
 
 
 def unique_name_from_base(base, max_length=63):
-    unique = '%04x' % random.randrange(16 ** 4)  # 4-digit hex
+    unique = "%04x" % random.randrange(16 ** 4)  # 4-digit hex
     ts = str(int(time.time()))
     available_length = max_length - 2 - len(ts) - len(unique)
     trimmed = base[:available_length]
-    return '{}-{}-{}'.format(trimmed, ts, unique)
+    return "{}-{}-{}".format(trimmed, ts, unique)
 
 
 @pytest.fixture
 def model_name():
-    return unique_name_from_base('test-tfs')
+    return unique_name_from_base("test-tfs")
 
 
 @pytest.fixture(autouse=True)
 def skip_gpu_instance_restricted_regions(region, instance_type):
-    if (region in NO_P2_REGIONS and instance_type.startswith('ml.p2')) or \
-            (region in NO_P3_REGIONS and instance_type.startswith('ml.p3')):
-        pytest.skip('Skipping GPU test in region {}'.format(region))
+    if (region in NO_P2_REGIONS and instance_type.startswith("ml.p2")) or \
+            (region in NO_P3_REGIONS and instance_type.startswith("ml.p3")):
+        pytest.skip("Skipping GPU test in region {}".format(region))
diff --git a/test/integration/sagemaker/test_ei.py b/test/integration/sagemaker/test_ei.py
index e91bad11..c7244e80 100644
--- a/test/integration/sagemaker/test_ei.py
+++ b/test/integration/sagemaker/test_ei.py
@@ -17,23 +17,23 @@
 
 import util
 
-EI_SUPPORTED_REGIONS = ['us-east-1', 'us-east-2', 'us-west-2',
-                        'eu-west-1', 'ap-northeast-1', 'ap-northeast-2']
+EI_SUPPORTED_REGIONS = ["us-east-1", "us-east-2", "us-west-2",
+                        "eu-west-1", "ap-northeast-1", "ap-northeast-2"]
 
 
-@pytest.fixture(params=os.environ['TEST_EI_VERSIONS'].split(','))
+@pytest.fixture(params=os.environ["TEST_EI_VERSIONS"].split(","))
 def version(request):
     return request.param
 
 
 @pytest.fixture
 def repo(request):
-    return request.config.getoption('--repo') or 'sagemaker-tensorflow-serving-eia'
+    return request.config.getoption("--repo") or "sagemaker-tensorflow-serving-eia"
 
 
 @pytest.fixture
 def tag(request, version):
-    return request.config.getoption('--tag') or f'{version}-cpu'
+    return request.config.getoption("--tag") or f"{version}-cpu"
 
 
 @pytest.fixture
@@ -41,37 +41,37 @@ def image_uri(registry, region, repo, tag):
     return util.image_uri(registry, region, repo, tag)
 
 
-@pytest.fixture(params=os.environ['TEST_EI_INSTANCE_TYPES'].split(','))
+@pytest.fixture(params=os.environ["TEST_EI_INSTANCE_TYPES"].split(","))
 def instance_type(request, region):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def accelerator_type(request):
-    return request.config.getoption('--accelerator-type') or 'ml.eia1.medium'
+    return request.config.getoption("--accelerator-type") or "ml.eia1.medium"
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def model_data(region):
-    return ('s3://sagemaker-sample-data-{}/tensorflow/model'
-            '/resnet/resnet_50_v2_fp32_NCHW.tar.gz').format(region)
+    return ("s3://sagemaker-sample-data-{}/tensorflow/model"
+            "/resnet/resnet_50_v2_fp32_NCHW.tar.gz").format(region)
 
 
 @pytest.fixture
 def input_data():
-    return {'instances': [[[[random.random() for _ in range(3)] for _ in range(3)]]]}
+    return {"instances": [[[[random.random() for _ in range(3)] for _ in range(3)]]]}
 
 
 @pytest.fixture
 def skip_if_no_accelerator(accelerator_type):
     if accelerator_type is None:
-        pytest.skip('Skipping because accelerator type was not provided')
+        pytest.skip("Skipping because accelerator type was not provided")
 
 
 @pytest.fixture
 def skip_if_non_supported_ei_region(region):
     if region not in EI_SUPPORTED_REGIONS:
-        pytest.skip('EI is not supported in {}'.format(region))
+        pytest.skip("EI is not supported in {}".format(region))
 
 
 @pytest.mark.skip_if_non_supported_ei_region()
diff --git a/test/integration/sagemaker/test_tfs.py b/test/integration/sagemaker/test_tfs.py
index 436351d8..2f67c0e9 100644
--- a/test/integration/sagemaker/test_tfs.py
+++ b/test/integration/sagemaker/test_tfs.py
@@ -16,27 +16,27 @@
 
 import util
 
-NON_P3_REGIONS = ['ap-southeast-1', 'ap-southeast-2', 'ap-south-1',
-                  'ca-central-1', 'eu-central-1', 'eu-west-2', 'us-west-1']
+NON_P3_REGIONS = ["ap-southeast-1", "ap-southeast-2", "ap-south-1",
+                  "ca-central-1", "eu-central-1", "eu-west-2", "us-west-1"]
 
 
-@pytest.fixture(params=os.environ['TEST_VERSIONS'].split(','))
+@pytest.fixture(params=os.environ["TEST_VERSIONS"].split(","))
 def version(request):
     return request.param
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def repo(request):
-    return request.config.getoption('--repo') or 'sagemaker-tensorflow-serving'
+    return request.config.getoption("--repo") or "sagemaker-tensorflow-serving"
 
 
 @pytest.fixture
 def tag(request, version, instance_type):
-    if request.config.getoption('--tag'):
-        return request.config.getoption('--tag')
+    if request.config.getoption("--tag"):
+        return request.config.getoption("--tag")
 
-    arch = 'gpu' if instance_type.startswith('ml.p') else 'cpu'
-    return f'{version}-{arch}'
+    arch = "gpu" if instance_type.startswith("ml.p") else "cpu"
+    return f"{version}-{arch}"
 
 
 @pytest.fixture
@@ -44,21 +44,21 @@ def image_uri(registry, region, repo, tag):
     return util.image_uri(registry, region, repo, tag)
 
 
-@pytest.fixture(params=os.environ['TEST_INSTANCE_TYPES'].split(','))
+@pytest.fixture(params=os.environ["TEST_INSTANCE_TYPES"].split(","))
 def instance_type(request, region):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def accelerator_type():
     return None
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def tfs_model(region, boto_session):
     return util.find_or_put_model_data(region,
                                        boto_session,
-                                       'test/data/tfs-model.tar.gz')
+                                       "test/data/tfs-model.tar.gz")
 
 
 @pytest.fixture(scope='session')
diff --git a/test/integration/sagemaker/util.py b/test/integration/sagemaker/util.py
index 9118fad7..f5247b17 100644
--- a/test/integration/sagemaker/util.py
+++ b/test/integration/sagemaker/util.py
@@ -20,15 +20,15 @@
 import time
 
 logger = logging.getLogger(__name__)
-BATCH_CSV = os.path.join('test', 'data', 'batch.csv')
+BATCH_CSV = os.path.join("test", "data", "batch.csv")
 
 
 def image_uri(registry, region, repo, tag):
-    return f'{registry}.dkr.ecr.{region}.amazonaws.com/{repo}:{tag}'
+    return f"{registry}.dkr.ecr.{region}.amazonaws.com/{repo}:{tag}"
 
 
 def _execution_role(boto_session):
-    return boto_session.resource('iam').Role('SageMakerRole').arn
+    return boto_session.resource("iam").Role("SageMakerRole").arn
 
 
 @contextlib.contextmanager
@@ -37,81 +37,81 @@ def sagemaker_model(boto_session, sagemaker_client, image_uri, model_name, model
         ModelName=model_name,
         ExecutionRoleArn=_execution_role(boto_session),
         PrimaryContainer={
-            'Image': image_uri,
-            'ModelDataUrl': model_data
+            "Image": image_uri,
+            "ModelDataUrl": model_data
         })
 
     try:
         yield model
     finally:
-        logger.info('deleting model %s', model_name)
+        logger.info("deleting model %s", model_name)
         sagemaker_client.delete_model(ModelName=model_name)
 
 
 def _production_variants(model_name, instance_type, accelerator_type):
     production_variants = [{
-        'VariantName': 'AllTraffic',
-        'ModelName': model_name,
-        'InitialInstanceCount': 1,
-        'InstanceType': instance_type
+        "VariantName": "AllTraffic",
+        "ModelName": model_name,
+        "InitialInstanceCount": 1,
+        "InstanceType": instance_type
     }]
 
     if accelerator_type:
-        production_variants[0]['AcceleratorType'] = accelerator_type
+        production_variants[0]["AcceleratorType"] = accelerator_type
 
     return production_variants
 
 
 def _test_bucket(region, boto_session):
     sts = boto_session.client(
-        'sts',
+        "sts",
         region_name=region,
-        endpoint_url='https://sts.{}.amazonaws.com'.format(region)
+        endpoint_url="https://sts.{}.amazonaws.com".format(region)
     )
-    account = sts.get_caller_identity()['Account']
-    return f'sagemaker-{region}-{account}'
+    account = sts.get_caller_identity()["Account"]
+    return f"sagemaker-{region}-{account}"
 
 
 def find_or_put_model_data(region, boto_session, local_path):
     model_file = os.path.basename(local_path)
 
     bucket = _test_bucket(region, boto_session)
-    key = f'test-tfs/{model_file}'
+    key = f"test-tfs/{model_file}"
 
-    s3 = boto_session.client('s3', region)
+    s3 = boto_session.client("s3", region)
 
     try:
         s3.head_bucket(Bucket=bucket)
     except botocore.exceptions.ClientError as e:
-        if e.response['Error']['Code'] != '404':
+        if e.response["Error"]["Code"] != "404":
             raise
 
         # bucket doesn't exist, create it
-        if region == 'us-east-1':
+        if region == "us-east-1":
             s3.create_bucket(Bucket=bucket)
         else:
             s3.create_bucket(Bucket=bucket,
-                             CreateBucketConfiguration={'LocationConstraint': region})
+                             CreateBucketConfiguration={"LocationConstraint": region})
 
     try:
         s3.head_object(Bucket=bucket, Key=key)
     except botocore.exceptions.ClientError as e:
-        if e.response['Error']['Code'] != '404':
+        if e.response["Error"]["Code"] != "404":
             raise
 
         # file doesn't exist - upload it
         s3.upload_file(local_path, bucket, key)
 
-    return f's3://{bucket}/{key}'
+    return f"s3://{bucket}/{key}"
 
 
 @contextlib.contextmanager
 def sagemaker_endpoint(sagemaker_client, model_name, instance_type, accelerator_type=None):
-    logger.info('creating endpoint %s', model_name)
+    logger.info("creating endpoint %s", model_name)
 
     # Add jitter so we can run tests in parallel without running into service side limits.
     delay = round(random.random()*5, 3)
-    logger.info('waiting for {} seconds'.format(delay))
+    logger.info("waiting for {} seconds".format(delay))
     time.sleep(delay)
 
     production_variants = _production_variants(model_name, instance_type, accelerator_type)
@@ -121,74 +121,74 @@ def sagemaker_endpoint(sagemaker_client, model_name, instance_type, accelerator_
     sagemaker_client.create_endpoint(EndpointName=model_name, EndpointConfigName=model_name)
 
     try:
-        sagemaker_client.get_waiter('endpoint_in_service').wait(EndpointName=model_name)
+        sagemaker_client.get_waiter("endpoint_in_service").wait(EndpointName=model_name)
     finally:
-        status = sagemaker_client.describe_endpoint(EndpointName=model_name)['EndpointStatus']
-        if status != 'InService':
-            raise ValueError(f'failed to create endpoint {model_name}')
+        status = sagemaker_client.describe_endpoint(EndpointName=model_name)["EndpointStatus"]
+        if status != "InService":
+            raise ValueError(f"failed to create endpoint {model_name}")
 
     try:
         yield model_name  # return the endpoint name
     finally:
-        logger.info('deleting endpoint and endpoint config %s', model_name)
+        logger.info("deleting endpoint and endpoint config %s", model_name)
         sagemaker_client.delete_endpoint(EndpointName=model_name)
         sagemaker_client.delete_endpoint_config(EndpointConfigName=model_name)
 
 
 def _create_transform_job_request(model_name, batch_output, batch_input, instance_type):
     return {
-        'TransformJobName': model_name,
-        'ModelName': model_name,
-        'BatchStrategy': 'MultiRecord',
-        'TransformOutput': {
-            'S3OutputPath': batch_output
+        "TransformJobName": model_name,
+        "ModelName": model_name,
+        "BatchStrategy": "MultiRecord",
+        "TransformOutput": {
+            "S3OutputPath": batch_output
         },
-        'TransformInput': {
-            'DataSource': {
-                'S3DataSource': {
-                    'S3DataType': 'S3Prefix',
-                    'S3Uri': batch_input
+        "TransformInput": {
+            "DataSource": {
+                "S3DataSource": {
+                    "S3DataType": "S3Prefix",
+                    "S3Uri": batch_input
                 }
             },
-            'ContentType': 'text/csv',
-            'SplitType': 'Line',
-            'CompressionType': 'None'
+            "ContentType": "text/csv",
+            "SplitType": "Line",
+            "CompressionType": "None"
         },
-        'TransformResources': {
-            'InstanceType': instance_type,
-            'InstanceCount': 1
+        "TransformResources": {
+            "InstanceType": instance_type,
+            "InstanceCount": 1
         }
     }
 
 
 def _read_batch_output(region, boto_session, bucket, model_name):
-    s3 = boto_session.client('s3', region)
-    output_file = f'/tmp/{model_name}.out'
-    s3.download_file(bucket, f'output/{model_name}/batch.csv.out', output_file)
-    return json.loads(open(output_file, 'r').read())['predictions']
+    s3 = boto_session.client("s3", region)
+    output_file = f"/tmp/{model_name}.out"
+    s3.download_file(bucket, f"output/{model_name}/batch.csv.out", output_file)
+    return json.loads(open(output_file, "r").read())["predictions"]
 
 
 def _wait_for_transform_job(region, boto_session, sagemaker_client, model_name, poll, timeout):
-    status = sagemaker_client.describe_transform_job(TransformJobName=model_name)['TransformJobStatus']
+    status = sagemaker_client.describe_transform_job(TransformJobName=model_name)["TransformJobStatus"]
     job_runtime = 0
-    while status == 'InProgress':
+    while status == "InProgress":
 
-        logger.info(f'Waiting for batch transform job {model_name} to finish')
+        logger.info(f"Waiting for batch transform job {model_name} to finish")
         time.sleep(poll)
         job_runtime += poll
         if job_runtime > timeout:
-            raise ValueError(f'Batch transform job {model_name} exceeded maximum runtime {timeout} seconds')
+            raise ValueError(f"Batch transform job {model_name} exceeded maximum runtime {timeout} seconds")
 
-        status = sagemaker_client.describe_transform_job(TransformJobName=model_name)['TransformJobStatus']
-        if status == 'Completed':
+        status = sagemaker_client.describe_transform_job(TransformJobName=model_name)["TransformJobStatus"]
+        if status == "Completed":
             return _read_batch_output(region=region,
                                       boto_session=boto_session,
                                       bucket=_test_bucket(region, boto_session),
                                       model_name=model_name)
-        if status == 'Failed':
-            raise ValueError(f'Failed to execute batch transform job {model_name}')
-        if status in ['Stopped', 'Stopping']:
-            raise ValueError(f'Batch transform job {model_name} was stopped')
+        if status == "Failed":
+            raise ValueError(f"Failed to execute batch transform job {model_name}")
+        if status in ["Stopped", "Stopping"]:
+            raise ValueError(f"Batch transform job {model_name} was stopped")
 
 
 def run_batch_transform_job(region, boto_session, model_data, image_uri,
@@ -198,7 +198,7 @@ def run_batch_transform_job(region, boto_session, model_data, image_uri,
     with sagemaker_model(boto_session, sagemaker_client, image_uri, model_name, model_data):
         batch_input = find_or_put_model_data(region, boto_session, BATCH_CSV)
         bucket = _test_bucket(region, boto_session)
-        batch_output = f's3://{bucket}/output/{model_name}'
+        batch_output = f"s3://{bucket}/output/{model_name}"
 
         request = _create_transform_job_request(
             model_name=model_name, batch_input=batch_input,
@@ -216,10 +216,10 @@ def run_batch_transform_job(region, boto_session, model_data, image_uri,
 
 def invoke_endpoint(sagemaker_runtime_client, endpoint_name, input_data):
     response = sagemaker_runtime_client.invoke_endpoint(EndpointName=endpoint_name,
-                                                        ContentType='application/json',
+                                                        ContentType="application/json",
                                                         Body=json.dumps(input_data))
-    result = json.loads(response['Body'].read().decode())
-    assert result['predictions'] is not None
+    result = json.loads(response["Body"].read().decode())
+    assert result["predictions"] is not None
     return result
 
 

From c2dc3563a601ae2091b5cce28119a59bff334ccc Mon Sep 17 00:00:00 2001
From: Chuyang Deng <chuyangd@amazon.com>
Date: Tue, 30 Jun 2020 17:33:42 -0700
Subject: [PATCH 2/5] fix: change single-quotes to double-quotes

---
 docker/2.1/__init__.py                        |   0
 docker/2.1/deep_learning_container.py         | 109 -----
 docker/2.1/dockerd-entrypoint.py              |  22 -
 docker/2.1/sagemaker/__init__.py              |  12 -
 docker/2.1/sagemaker/multi_model_utils.py     |  52 ---
 docker/2.1/sagemaker/nginx.conf.template      |  64 ---
 docker/2.1/sagemaker/python_service.py        | 397 ------------------
 docker/2.1/sagemaker/serve                    |   3 -
 docker/2.1/sagemaker/serve.py                 | 308 --------------
 docker/2.1/sagemaker/tensorflow-serving.js    | 231 ----------
 docker/2.1/sagemaker/tfs_utils.py             | 209 ---------
 docker/build_artifacts/sagemaker/serve.py     |   2 +-
 .../sagemaker/tensorflow-serving.js           |  84 ++--
 .../local/multi_model_endpoint_test_utils.py  |  11 +-
 test/integration/local/test_container.py      |   4 +-
 .../local/test_pre_post_processing.py         |   2 +-
 .../local/test_pre_post_processing_mme.py     |   2 +-
 17 files changed, 52 insertions(+), 1460 deletions(-)
 delete mode 100644 docker/2.1/__init__.py
 delete mode 100644 docker/2.1/deep_learning_container.py
 delete mode 100644 docker/2.1/dockerd-entrypoint.py
 delete mode 100644 docker/2.1/sagemaker/__init__.py
 delete mode 100644 docker/2.1/sagemaker/multi_model_utils.py
 delete mode 100644 docker/2.1/sagemaker/nginx.conf.template
 delete mode 100644 docker/2.1/sagemaker/python_service.py
 delete mode 100755 docker/2.1/sagemaker/serve
 delete mode 100644 docker/2.1/sagemaker/serve.py
 delete mode 100644 docker/2.1/sagemaker/tensorflow-serving.js
 delete mode 100644 docker/2.1/sagemaker/tfs_utils.py

diff --git a/docker/2.1/__init__.py b/docker/2.1/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/docker/2.1/deep_learning_container.py b/docker/2.1/deep_learning_container.py
deleted file mode 100644
index 1e82e61e..00000000
--- a/docker/2.1/deep_learning_container.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import re
-import json
-import logging
-import requests
-
-
-def _validate_instance_id(instance_id):
-    """
-    Validate instance ID
-    """
-    instance_id_regex = r"^(i-\S{17})"
-    compiled_regex = re.compile(instance_id_regex)
-    match = compiled_regex.match(instance_id)
-
-    if not match:
-        return None
-
-    return match.group(1)
-
-
-def _retrieve_instance_id():
-    """
-    Retrieve instance ID from instance metadata service
-    """
-    instance_id = None
-    url = "http://169.254.169.254/latest/meta-data/instance-id"
-    response = requests_helper(url, timeout=0.1)
-
-    if response is not None:
-        instance_id = _validate_instance_id(response.text)
-
-    return instance_id
-
-
-def _retrieve_instance_region():
-    """
-    Retrieve instance region from instance metadata service
-    """
-    region = None
-    valid_regions = ['ap-northeast-1', 'ap-northeast-2', 'ap-southeast-1', 'ap-southeast-2',
-                     'ap-south-1', 'ca-central-1', 'eu-central-1', 'eu-north-1',
-                     'eu-west-1', 'eu-west-2', 'eu-west-3', 'sa-east-1',
-                     'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2']
-
-    url = "http://169.254.169.254/latest/dynamic/instance-identity/document"
-    response = requests_helper(url, timeout=0.1)
-
-    if response is not None:
-        response_json = json.loads(response.text)
-
-        if response_json['region'] in valid_regions:
-            region = response_json['region']
-
-    return region
-
-
-def query_bucket():
-    """
-    GET request on an empty object from an Amazon S3 bucket
-    """
-    response = None
-    instance_id = _retrieve_instance_id()
-    region = _retrieve_instance_region()
-
-    if instance_id is not None and region is not None:
-        url = ("https://aws-deep-learning-containers-{0}.s3.{0}.amazonaws.com"
-               "/dlc-containers.txt?x-instance-id={1}".format(region, instance_id))
-        response = requests_helper(url, timeout=0.2)
-
-    logging.debug("Query bucket finished: {}".format(response))
-
-    return response
-
-
-def requests_helper(url, timeout):
-    response = None
-    try:
-        response = requests.get(url, timeout=timeout)
-    except requests.exceptions.RequestException as e:
-        logging.error("Request exception: {}".format(e))
-
-    return response
-
-
-def main():
-    """
-    Invoke bucket query
-    """
-    # Logs are not necessary for normal run. Remove this line while debugging.
-    logging.getLogger().disabled = True
-
-    logging.basicConfig(level=logging.ERROR)
-    query_bucket()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/docker/2.1/dockerd-entrypoint.py b/docker/2.1/dockerd-entrypoint.py
deleted file mode 100644
index fc4ce388..00000000
--- a/docker/2.1/dockerd-entrypoint.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-
-import os.path
-import subprocess
-import shlex
-import sys
-
-if not os.path.exists("/opt/ml/input/config"):
-    subprocess.call(["python", "/usr/local/bin/deep_learning_container.py", "&>/dev/null", "&"])
-
-subprocess.check_call(shlex.split(' '.join(sys.argv[1:])))
diff --git a/docker/2.1/sagemaker/__init__.py b/docker/2.1/sagemaker/__init__.py
deleted file mode 100644
index 04fbf5d9..00000000
--- a/docker/2.1/sagemaker/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
diff --git a/docker/2.1/sagemaker/multi_model_utils.py b/docker/2.1/sagemaker/multi_model_utils.py
deleted file mode 100644
index 5d2c47f4..00000000
--- a/docker/2.1/sagemaker/multi_model_utils.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import fcntl
-import signal
-import time
-from contextlib import contextmanager
-
-MODEL_CONFIG_FILE = "/sagemaker/model-config.cfg"
-DEFAULT_LOCK_FILE = "/sagemaker/lock-file.lock"
-
-
-@contextmanager
-def lock(path=DEFAULT_LOCK_FILE):
-    f = open(path, "w")
-    fd = f.fileno()
-    fcntl.lockf(fd, fcntl.LOCK_EX)
-
-    try:
-        yield
-    finally:
-        time.sleep(1)
-        fcntl.lockf(fd, fcntl.LOCK_UN)
-
-
-@contextmanager
-def timeout(seconds=60):
-    def _raise_timeout_error(signum, frame):
-        raise Exception(408, "Timed out after {} seconds".format(seconds))
-
-    try:
-        signal.signal(signal.SIGALRM, _raise_timeout_error)
-        signal.alarm(seconds)
-        yield
-    finally:
-        signal.alarm(0)
-
-
-class MultiModelException(Exception):
-    def __init__(self, code, msg):
-        Exception.__init__(self, code, msg)
-        self.code = code
-        self.msg = msg
diff --git a/docker/2.1/sagemaker/nginx.conf.template b/docker/2.1/sagemaker/nginx.conf.template
deleted file mode 100644
index 5ccfed3d..00000000
--- a/docker/2.1/sagemaker/nginx.conf.template
+++ /dev/null
@@ -1,64 +0,0 @@
-load_module modules/ngx_http_js_module.so;
-
-worker_processes auto;
-daemon off;
-pid /tmp/nginx.pid;
-error_log  /dev/stderr %NGINX_LOG_LEVEL%;
-
-worker_rlimit_nofile 4096;
-
-events {
-  worker_connections 2048;
-}
-
-http {
-  include /etc/nginx/mime.types;
-  default_type application/json;
-  access_log /dev/stdout combined;
-  js_include tensorflow-serving.js;
-
-  upstream tfs_upstream {
-    server localhost:%TFS_REST_PORT%;
-  }
-
-  upstream gunicorn_upstream {
-    server unix:/tmp/gunicorn.sock fail_timeout=1;
-  }
-
-  server {
-    listen %NGINX_HTTP_PORT% deferred;
-    client_max_body_size 0;
-    client_body_buffer_size 100m;
-    subrequest_output_buffer_size 100m;
-
-    set $tfs_version %TFS_VERSION%;
-    set $default_tfs_model %TFS_DEFAULT_MODEL_NAME%;
-
-    location /tfs {
-        rewrite ^/tfs/(.*) /$1  break;
-        proxy_redirect off;
-        proxy_pass_request_headers off;
-        proxy_set_header Content-Type 'application/json';
-        proxy_set_header Accept 'application/json';
-        proxy_pass http://tfs_upstream;
-    }
-
-    location /ping {
-        %FORWARD_PING_REQUESTS%;
-    }
-
-    location /invocations {
-        %FORWARD_INVOCATION_REQUESTS%;
-    }
-
-    location /models {
-        proxy_pass http://gunicorn_upstream/models;
-    }
-
-    location / {
-        return 404 '{"error": "Not Found"}';
-    }
-
-    keepalive_timeout 3;
-  }
-}
diff --git a/docker/2.1/sagemaker/python_service.py b/docker/2.1/sagemaker/python_service.py
deleted file mode 100644
index 0014b6dd..00000000
--- a/docker/2.1/sagemaker/python_service.py
+++ /dev/null
@@ -1,397 +0,0 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-import bisect
-import importlib.util
-import json
-import logging
-import os
-import subprocess
-import time
-
-import falcon
-import requests
-
-from multi_model_utils import lock, timeout, MultiModelException
-import tfs_utils
-
-SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get("SAGEMAKER_MULTI_MODEL", "false").lower() == "true"
-INFERENCE_SCRIPT_PATH = "/opt/ml/{}/code/inference.py".format("models"
-                                                              if SAGEMAKER_MULTI_MODEL_ENABLED
-                                                              else "model")
-PYTHON_PROCESSING_ENABLED = os.path.exists(INFERENCE_SCRIPT_PATH)
-SAGEMAKER_BATCHING_ENABLED = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower()
-MODEL_CONFIG_FILE_PATH = "/sagemaker/model-config.cfg"
-TFS_GRPC_PORT = os.environ.get("TFS_GRPC_PORT")
-TFS_REST_PORT = os.environ.get("TFS_REST_PORT")
-SAGEMAKER_TFS_PORT_RANGE = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE")
-
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes"
-
-
-def default_handler(data, context):
-    """A default inference request handler that directly send post request to TFS rest port with
-    un-processed data and return un-processed response
-
-    :param data: input data
-    :param context: context instance that contains tfs_rest_uri
-    :return: inference response from TFS model server
-    """
-    response = requests.post(context.rest_uri, data=data)
-    return response.content, context.accept_header
-
-
-class PythonServiceResource:
-
-    def __init__(self):
-        if SAGEMAKER_MULTI_MODEL_ENABLED:
-            self._model_tfs_rest_port = {}
-            self._model_tfs_grpc_port = {}
-            self._model_tfs_pid = {}
-            self._tfs_ports = self._parse_sagemaker_port_range(SAGEMAKER_TFS_PORT_RANGE)
-        else:
-            self._tfs_grpc_port = TFS_GRPC_PORT
-            self._tfs_rest_port = TFS_REST_PORT
-
-        self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true"
-        self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None")
-
-        if PYTHON_PROCESSING_ENABLED:
-            self._handler, self._input_handler, self._output_handler = self._import_handlers()
-            self._handlers = self._make_handler(self._handler,
-                                                self._input_handler,
-                                                self._output_handler)
-        else:
-            self._handlers = default_handler
-
-    def on_post(self, req, res, model_name=None):
-        log.info(req.uri)
-        if model_name or "invocations" in req.uri:
-            self._handle_invocation_post(req, res, model_name)
-        else:
-            data = json.loads(req.stream.read().decode("utf-8"))
-            self._handle_load_model_post(res, data)
-
-    def _parse_sagemaker_port_range(self, port_range):
-        lower, upper = port_range.split('-')
-        lower = int(lower)
-        upper = lower + int((int(upper) - lower) * 0.9)  # only utilizing 90% of the ports
-        rest_port = lower
-        grpc_port = (lower + upper) // 2
-        tfs_ports = {
-            "rest_port": [port for port in range(rest_port, grpc_port)],
-            "grpc_port": [port for port in range(grpc_port, upper)],
-        }
-        return tfs_ports
-
-    def _ports_available(self):
-        with lock():
-            rest_ports = self._tfs_ports["rest_port"]
-            grpc_ports = self._tfs_ports["grpc_port"]
-        return len(rest_ports) > 0 and len(grpc_ports) > 0
-
-    def _handle_load_model_post(self, res, data):  # noqa: C901
-        model_name = data["model_name"]
-        base_path = data["url"]
-
-        # model is already loaded
-        if model_name in self._model_tfs_pid:
-            res.status = falcon.HTTP_409
-            res.body = json.dumps({
-                "error": "Model {} is already loaded.".format(model_name)
-            })
-
-        # check if there are available ports
-        if not self._ports_available():
-            res.status = falcon.HTTP_507
-            res.body = json.dumps({
-                "error": "Memory exhausted: no available ports to load the model."
-            })
-        with lock():
-            self._model_tfs_rest_port[model_name] = self._tfs_ports["rest_port"].pop()
-            self._model_tfs_grpc_port[model_name] = self._tfs_ports["grpc_port"].pop()
-
-        # validate model files are in the specified base_path
-        if self.validate_model_dir(base_path):
-            try:
-                tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path)
-                tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format(model_name)
-                log.info("tensorflow serving model config: \n%s\n", tfs_config)
-                os.makedirs(os.path.dirname(tfs_config_file))
-                with open(tfs_config_file, "w") as f:
-                    f.write(tfs_config)
-
-                batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format(
-                    model_name)
-                if self._tfs_enable_batching:
-                    tfs_utils.create_batching_config(batching_config_file)
-
-                cmd = tfs_utils.tfs_command(
-                    self._model_tfs_grpc_port[model_name],
-                    self._model_tfs_rest_port[model_name],
-                    tfs_config_file,
-                    self._tfs_enable_batching,
-                    batching_config_file,
-                )
-                p = subprocess.Popen(cmd.split())
-                self._wait_for_model(model_name)
-
-                log.info("started tensorflow serving (pid: %d)", p.pid)
-                # update model name <-> tfs pid map
-                self._model_tfs_pid[model_name] = p
-
-                res.status = falcon.HTTP_200
-                res.body = json.dumps({
-                    "success":
-                        "Successfully loaded model {}, "
-                        "listening on rest port {} "
-                        "and grpc port {}.".format(model_name,
-                                                   self._model_tfs_rest_port,
-                                                   self._model_tfs_grpc_port,)
-                })
-            except MultiModelException as multi_model_exception:
-                self._cleanup_config_file(tfs_config_file)
-                self._cleanup_config_file(batching_config_file)
-                if multi_model_exception.code == 409:
-                    res.status = falcon.HTTP_409
-                    res.body = multi_model_exception.msg
-                elif multi_model_exception.code == 408:
-                    res.status = falcon.HTTP_408
-                    res.body = multi_model_exception.msg
-                else:
-                    raise MultiModelException(falcon.HTTP_500, multi_model_exception.msg)
-            except FileExistsError as e:
-                res.status = falcon.HTTP_409
-                res.body = json.dumps({
-                    "error": "Model {} is already loaded. {}".format(model_name, str(e))
-                })
-            except OSError as os_error:
-                self._cleanup_config_file(tfs_config_file)
-                self._cleanup_config_file(batching_config_file)
-                if os_error.errno == 12:
-                    raise MultiModelException(falcon.HTTP_507,
-                                              "Memory exhausted: "
-                                              "not enough memory to start TFS instance")
-                else:
-                    raise MultiModelException(falcon.HTTP_500, os_error.strerror)
-        else:
-            res.status = falcon.HTTP_404
-            res.body = json.dumps({
-                "error":
-                    "Could not find valid base path {} for servable {}".format(base_path,
-                                                                               model_name)
-            })
-
-    def _cleanup_config_file(self, config_file):
-        if os.path.exists(config_file):
-            os.remove(config_file)
-
-    def _wait_for_model(self, model_name):
-        url = "http://localhost:{}/v1/models/{}".format(self._model_tfs_rest_port[model_name],
-                                                        model_name)
-        with timeout():
-            while True:
-                time.sleep(0.5)
-                try:
-                    response = requests.get(url)
-                    if response.status_code == 200:
-                        versions = json.loads(response.content)["model_version_status"]
-                        if all(version["state"] == "AVAILABLE" for version in versions):
-                            break
-                except ConnectionError:
-                    log.exception("Failed to load models.")
-
-    def _handle_invocation_post(self, req, res, model_name=None):
-        if SAGEMAKER_MULTI_MODEL_ENABLED:
-            if model_name:
-                if model_name not in self._model_tfs_rest_port:
-                    res.status = falcon.HTTP_404
-                    res.body = json.dumps({
-                        "error": "Model {} is not loaded yet.".format(model_name)
-                    })
-                    return
-                else:
-                    log.info("model name: {}".format(model_name))
-                    rest_port = self._model_tfs_rest_port[model_name]
-                    log.info("rest port: {}".format(str(self._model_tfs_rest_port[model_name])))
-                    grpc_port = self._model_tfs_grpc_port[model_name]
-                    log.info("grpc port: {}".format(str(self._model_tfs_grpc_port[model_name])))
-                    data, context = tfs_utils.parse_request(req, rest_port, grpc_port,
-                                                            self._tfs_default_model_name,
-                                                            model_name)
-            else:
-                res.status = falcon.HTTP_400
-                res.body = json.dumps({
-                    "error": "Invocation request does not contain model name."
-                })
-        else:
-            data, context = tfs_utils.parse_request(req, self._tfs_rest_port, self._tfs_grpc_port,
-                                                    self._tfs_default_model_name)
-
-        try:
-            res.status = falcon.HTTP_200
-            res.body, res.content_type = self._handlers(data, context)
-        except Exception as e:  # pylint: disable=broad-except
-            log.exception("exception handling request: {}".format(e))
-            res.status = falcon.HTTP_500
-            res.body = json.dumps({
-                "error": str(e)
-            }).encode("utf-8")  # pylint: disable=E1101
-
-    def _import_handlers(self):
-        spec = importlib.util.spec_from_file_location("inference", INFERENCE_SCRIPT_PATH)
-        inference = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(inference)
-
-        _custom_handler, _custom_input_handler, _custom_output_handler = None, None, None
-        if hasattr(inference, "handler"):
-            _custom_handler = inference.handler
-        elif hasattr(inference, "input_handler") and hasattr(inference, "output_handler"):
-            _custom_input_handler = inference.input_handler
-            _custom_output_handler = inference.output_handler
-        else:
-            raise NotImplementedError("Handlers are not implemented correctly in user script.")
-
-        return _custom_handler, _custom_input_handler, _custom_output_handler
-
-    def _make_handler(self, custom_handler, custom_input_handler, custom_output_handler):
-        if custom_handler:
-            return custom_handler
-
-        def handler(data, context):
-            processed_input = custom_input_handler(data, context)
-            response = requests.post(context.rest_uri, data=processed_input)
-            return custom_output_handler(response, context)
-
-        return handler
-
-    def on_get(self, req, res, model_name=None):  # pylint: disable=W0613
-        if model_name is None:
-            models_info = {}
-            uri = "http://localhost:{}/v1/models/{}"
-            for model, port in self._model_tfs_rest_port.items():
-                try:
-                    info = json.loads(requests.get(uri.format(port, model)).content)
-                    models_info[model] = info
-                except ValueError as e:
-                    log.exception("exception handling request: {}".format(e))
-                    res.status = falcon.HTTP_500
-                    res.body = json.dumps({
-                        "error": str(e)
-                    }).encode("utf-8")
-            res.status = falcon.HTTP_200
-            res.body = json.dumps(models_info)
-        else:
-            if model_name not in self._model_tfs_rest_port:
-                res.status = falcon.HTTP_404
-                res.body = json.dumps({
-                    "error": "Model {} is loaded yet.".format(model_name)
-                }).encode("utf-8")
-            else:
-                port = self._model_tfs_rest_port[model_name]
-                uri = "http://localhost:{}/v1/models/{}".format(port, model_name)
-                try:
-                    info = requests.get(uri)
-                    res.status = falcon.HTTP_200
-                    res.body = json.dumps({
-                        "model": info
-                    }).encode("utf-8")
-                except ValueError as e:
-                    log.exception("exception handling GET models request.")
-                    res.status = falcon.HTTP_500
-                    res.body = json.dumps({
-                        "error": str(e)
-                    }).encode("utf-8")
-
-    def on_delete(self, req, res, model_name):  # pylint: disable=W0613
-        if model_name not in self._model_tfs_pid:
-            res.status = falcon.HTTP_404
-            res.body = json.dumps({
-                "error": "Model {} is not loaded yet".format(model_name)
-            })
-        else:
-            try:
-                self._model_tfs_pid[model_name].kill()
-                os.remove("/sagemaker/tfs-config/{}/model-config.cfg".format(model_name))
-                os.rmdir("/sagemaker/tfs-config/{}".format(model_name))
-                release_rest_port = self._model_tfs_rest_port[model_name]
-                release_grpc_port = self._model_tfs_grpc_port[model_name]
-                with lock():
-                    bisect.insort(self._tfs_ports["rest_port"], release_rest_port)
-                    bisect.insort(self._tfs_ports["grpc_port"], release_grpc_port)
-                del self._model_tfs_rest_port[model_name]
-                del self._model_tfs_grpc_port[model_name]
-                del self._model_tfs_pid[model_name]
-                res.status = falcon.HTTP_200
-                res.body = json.dumps({
-                    "success": "Successfully unloaded model {}.".format(model_name)
-                })
-            except OSError as error:
-                res.status = falcon.HTTP_500
-                res.body = json.dumps({
-                    "error": str(error)
-                }).encode("utf-8")
-
-    def validate_model_dir(self, model_path):
-        # model base path doesn't exits
-        if not os.path.exists(model_path):
-            return False
-        versions = []
-        for _, dirs, _ in os.walk(model_path):
-            for dirname in dirs:
-                log.info("dirname: {}".format(dirname))
-                if dirname.isdigit():
-                    versions.append(dirname)
-        return self.validate_model_versions(versions)
-
-    def validate_model_versions(self, versions):
-        log.info(versions)
-        if not versions:
-            return False
-        for v in versions:
-            if v.isdigit():
-                # TensorFlow model server will succeed with any versions found
-                # even if there are directories that's not a valid model version,
-                # the loading will succeed.
-                return True
-        return False
-
-
-class PingResource:
-    def on_get(self, req, res):  # pylint: disable=W0613
-        res.status = falcon.HTTP_200
-
-
-class ServiceResources:
-    def __init__(self):
-        self._enable_python_processing = PYTHON_PROCESSING_ENABLED
-        self._enable_model_manager = SAGEMAKER_MULTI_MODEL_ENABLED
-        self._python_service_resource = PythonServiceResource()
-        self._ping_resource = PingResource()
-
-    def add_routes(self, application):
-        application.add_route("/ping", self._ping_resource)
-        application.add_route("/invocations", self._python_service_resource)
-
-        if self._enable_model_manager:
-            application.add_route("/models", self._python_service_resource)
-            application.add_route("/models/{model_name}", self._python_service_resource)
-            application.add_route("/models/{model_name}/invoke", self._python_service_resource)
-
-
-app = falcon.API()
-resources = ServiceResources()
-resources.add_routes(app)
diff --git a/docker/2.1/sagemaker/serve b/docker/2.1/sagemaker/serve
deleted file mode 100755
index 9fac6a93..00000000
--- a/docker/2.1/sagemaker/serve
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-python3 /sagemaker/serve.py
diff --git a/docker/2.1/sagemaker/serve.py b/docker/2.1/sagemaker/serve.py
deleted file mode 100644
index 7a539fe6..00000000
--- a/docker/2.1/sagemaker/serve.py
+++ /dev/null
@@ -1,308 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-
-import logging
-import os
-import re
-import signal
-import subprocess
-import tfs_utils
-
-from contextlib import contextmanager
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-JS_PING = "js_content ping"
-JS_INVOCATIONS = "js_content invocations"
-GUNICORN_PING = "proxy_pass http://gunicorn_upstream/ping"
-GUNICORN_INVOCATIONS = "proxy_pass http://gunicorn_upstream/invocations"
-
-PYTHON_LIB_PATH = "/opt/ml/model/code/lib"
-REQUIREMENTS_PATH = "/opt/ml/model/code/requirements.txt"
-INFERENCE_PATH = "/opt/ml/model/code/inference.py"
-
-
-class ServiceManager(object):
-    def __init__(self):
-        self._state = "initializing"
-        self._nginx = None
-        self._tfs = None
-        self._gunicorn = None
-        self._gunicorn_command = None
-        self._enable_python_service = os.path.exists(INFERENCE_PATH)
-        self._tfs_version = os.environ.get("SAGEMAKER_TFS_VERSION", "1.13")
-        self._nginx_http_port = os.environ.get("SAGEMAKER_BIND_TO_PORT", "8080")
-        self._nginx_loglevel = os.environ.get("SAGEMAKER_TFS_NGINX_LOGLEVEL", "error")
-        self._tfs_default_model_name = os.environ.get("SAGEMAKER_TFS_DEFAULT_MODEL_NAME", "None")
-        self._sagemaker_port_range = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE", None)
-        self._tfs_config_path = "/sagemaker/model-config.cfg"
-        self._tfs_batching_config_path = "/sagemaker/batching-config.cfg"
-
-        _enable_batching = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower()
-        _enable_multi_model_endpoint = os.environ.get("SAGEMAKER_MULTI_MODEL",
-                                                      "false").lower()
-
-        if _enable_batching not in ["true", "false"]:
-            raise ValueError("SAGEMAKER_TFS_ENABLE_BATCHING must be 'true' or 'false'")
-        self._tfs_enable_batching = _enable_batching == "true"
-
-        if _enable_multi_model_endpoint not in ["true", "false"]:
-            raise ValueError("SAGEMAKER_MULTI_MODEL must be 'true' or 'false'")
-        self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == "true"
-
-        self._use_gunicorn = self._enable_python_service or self._tfs_enable_multi_model_endpoint
-
-        if self._sagemaker_port_range is not None:
-            parts = self._sagemaker_port_range.split("-")
-            low = int(parts[0])
-            hi = int(parts[1])
-            if low + 2 > hi:
-                raise ValueError("not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})"
-                                 .format(self._sagemaker_port_range))
-            self._tfs_grpc_port = str(low)
-            self._tfs_rest_port = str(low + 1)
-        else:
-            # just use the standard default ports
-            self._tfs_grpc_port = "9000"
-            self._tfs_rest_port = "8501"
-
-        # set environment variable for python service
-        os.environ["TFS_GRPC_PORT"] = self._tfs_grpc_port
-        os.environ["TFS_REST_PORT"] = self._tfs_rest_port
-
-    def _create_tfs_config(self):
-        models = tfs_utils.find_models()
-        if not models:
-            raise ValueError("no SavedModel bundles found!")
-
-        if self._tfs_default_model_name == "None":
-            default_model = os.path.basename(models[0])
-            if default_model:
-                self._tfs_default_model_name = default_model
-                log.info("using default model name: {}".format(self._tfs_default_model_name))
-            else:
-                log.info("no default model detected")
-
-        # config (may) include duplicate 'config' keys, so we can't just dump a dict
-        config = "model_config_list: {\n"
-        for m in models:
-            config += "  config: {\n"
-            config += "    name: '{}',\n".format(os.path.basename(m))
-            config += "    base_path: '{}',\n".format(m)
-            config += "    model_platform: 'tensorflow'\n"
-            config += "  }\n"
-        config += "}\n"
-
-        log.info("tensorflow serving model config: \n%s\n", config)
-
-        with open("/sagemaker/model-config.cfg", "w") as f:
-            f.write(config)
-
-    def _setup_gunicorn(self):
-        python_path_content = []
-        python_path_option = ""
-
-        if self._enable_python_service:
-            lib_path_exists = os.path.exists(PYTHON_LIB_PATH)
-            requirements_exists = os.path.exists(REQUIREMENTS_PATH)
-            python_path_content = ["/opt/ml/model/code"]
-            python_path_option = "--pythonpath "
-
-            if lib_path_exists:
-                python_path_content.append(PYTHON_LIB_PATH)
-
-            if requirements_exists:
-                if lib_path_exists:
-                    log.warning("loading modules in '{}', ignoring requirements.txt"
-                                .format(PYTHON_LIB_PATH))
-                else:
-                    log.info("installing packages from requirements.txt...")
-                    pip_install_cmd = "pip3 install -r {}".format(REQUIREMENTS_PATH)
-                    try:
-                        subprocess.check_call(pip_install_cmd.split())
-                    except subprocess.CalledProcessError:
-                        log.error("failed to install required packages, exiting.")
-                        self._stop()
-                        raise ChildProcessError("failed to install required packages.")
-
-        gunicorn_command = (
-            "gunicorn -b unix:/tmp/gunicorn.sock -k gevent --chdir /sagemaker "
-            "{}{} -e TFS_GRPC_PORT={} -e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} "
-            "python_service:app").format(python_path_option, ",".join(python_path_content),
-                                         self._tfs_grpc_port, self._tfs_enable_multi_model_endpoint,
-                                         self._sagemaker_port_range)
-
-        log.info("gunicorn command: {}".format(gunicorn_command))
-        self._gunicorn_command = gunicorn_command
-
-    def _create_nginx_config(self):
-        template = self._read_nginx_template()
-        pattern = re.compile(r"%(\w+)%")
-
-        template_values = {
-            "TFS_VERSION": self._tfs_version,
-            "TFS_REST_PORT": self._tfs_rest_port,
-            "TFS_DEFAULT_MODEL_NAME": self._tfs_default_model_name,
-            "NGINX_HTTP_PORT": self._nginx_http_port,
-            "NGINX_LOG_LEVEL": self._nginx_loglevel,
-            "FORWARD_PING_REQUESTS": GUNICORN_PING if self._use_gunicorn else JS_PING,
-            "FORWARD_INVOCATION_REQUESTS": GUNICORN_INVOCATIONS if self._use_gunicorn
-            else JS_INVOCATIONS,
-        }
-
-        config = pattern.sub(lambda x: template_values[x.group(1)], template)
-        log.info("nginx config: \n%s\n", config)
-
-        with open("/sagemaker/nginx.conf", "w") as f:
-            f.write(config)
-
-    def _read_nginx_template(self):
-        with open("/sagemaker/nginx.conf.template", "r") as f:
-            template = f.read()
-            if not template:
-                raise ValueError("failed to read nginx.conf.template")
-
-            return template
-
-    def _start_tfs(self):
-        self._log_version("tensorflow_model_server --version', 'tensorflow version info:")
-        cmd = tfs_utils.tfs_command(
-            self._tfs_grpc_port,
-            self._tfs_rest_port,
-            self._tfs_config_path,
-            self._tfs_enable_batching,
-            self._tfs_batching_config_path,
-        )
-        log.info("tensorflow serving command: {}".format(cmd))
-        p = subprocess.Popen(cmd.split())
-        log.info("started tensorflow serving (pid: %d)", p.pid)
-        self._tfs = p
-
-    def _start_gunicorn(self):
-        self._log_version("gunicorn --version", "gunicorn version info:")
-        env = os.environ.copy()
-        env["TFS_DEFAULT_MODEL_NAME"] = self._tfs_default_model_name
-        p = subprocess.Popen(self._gunicorn_command.split(), env=env)
-        log.info("started gunicorn (pid: %d)", p.pid)
-        self._gunicorn = p
-
-    def _start_nginx(self):
-        self._log_version("/usr/sbin/nginx -V", "nginx version info:")
-        p = subprocess.Popen("/usr/sbin/nginx -c /sagemaker/nginx.conf".split())
-        log.info("started nginx (pid: %d)", p.pid)
-        self._nginx = p
-
-    def _log_version(self, command, message):
-        try:
-            output = subprocess.check_output(
-                command.split(),
-                stderr=subprocess.STDOUT).decode("utf-8", "backslashreplace").strip()
-            log.info("{}\n{}".format(message, output))
-        except subprocess.CalledProcessError:
-            log.warning("failed to run command: %s", command)
-
-    def _stop(self, *args):  # pylint: disable=W0613
-        self._state = "stopping"
-        log.info("stopping services")
-        try:
-            os.kill(self._nginx.pid, signal.SIGQUIT)
-        except OSError:
-            pass
-        try:
-            if self._gunicorn:
-                os.kill(self._gunicorn.pid, signal.SIGTERM)
-        except OSError:
-            pass
-        try:
-            os.kill(self._tfs.pid, signal.SIGTERM)
-        except OSError:
-            pass
-
-        self._state = "stopped"
-        log.info("stopped")
-
-    def _wait_for_gunicorn(self):
-        while True:
-            if os.path.exists("/tmp/gunicorn.sock"):
-                log.info("gunicorn server is ready!")
-                return
-
-    @contextmanager
-    def _timeout(self, seconds):
-        def _raise_timeout_error(signum, frame):
-            raise TimeoutError("time out after {} seconds".format(seconds))
-
-        try:
-            signal.signal(signal.SIGALRM, _raise_timeout_error)
-            signal.alarm(seconds)
-            yield
-        finally:
-            signal.alarm(0)
-
-    def start(self):
-        log.info("starting services")
-        self._state = "starting"
-        signal.signal(signal.SIGTERM, self._stop)
-
-        self._create_nginx_config()
-
-        if self._tfs_enable_batching:
-            log.info("batching is enabled")
-            tfs_utils.create_batching_config(self._tfs_batching_config_path)
-
-        if self._tfs_enable_multi_model_endpoint:
-            log.info("multi-model endpoint is enabled, TFS model servers will be started later")
-        else:
-            tfs_utils.create_tfs_config(
-                self._tfs_default_model_name,
-                self._tfs_config_path
-            )
-            self._create_tfs_config()
-            self._start_tfs()
-
-        if self._use_gunicorn:
-            self._setup_gunicorn()
-            self._start_gunicorn()
-            # make sure gunicorn is up
-            with self._timeout(seconds=30):
-                self._wait_for_gunicorn()
-
-        self._start_nginx()
-        self._state = "started"
-
-        while True:
-            pid, status = os.wait()
-
-            if self._state != "started":
-                break
-
-            if pid == self._nginx.pid:
-                log.warning("unexpected nginx exit (status: {}). restarting.".format(status))
-                self._start_nginx()
-
-            elif pid == self._tfs.pid:
-                log.warning(
-                    "unexpected tensorflow serving exit (status: {}). restarting.".format(status))
-                self._start_tfs()
-
-            elif self._gunicorn and pid == self._gunicorn.pid:
-                log.warning("unexpected gunicorn exit (status: {}). restarting."
-                            .format(status))
-                self._start_gunicorn()
-
-        self._stop()
-
-
-if __name__ == "__main__":
-    ServiceManager().start()
diff --git a/docker/2.1/sagemaker/tensorflow-serving.js b/docker/2.1/sagemaker/tensorflow-serving.js
deleted file mode 100644
index fdce4472..00000000
--- a/docker/2.1/sagemaker/tensorflow-serving.js
+++ /dev/null
@@ -1,231 +0,0 @@
-var tfs_base_uri = "/tfs/v1/models/"
-var custom_attributes_header = "X-Amzn-SageMaker-Custom-Attributes"
-
-function invocations(r) {
-    var ct = r.headersIn["Content-Type"]
-
-    if ("application/json" == ct || "application/jsonlines" == ct || "application/jsons" == ct) {
-        json_request(r)
-    } else if ("text/csv" == ct) {
-        csv_request(r)
-    } else {
-        return_error(r, 415, "Unsupported Media Type: " + (ct || "Unknown"))
-    }
-}
-
-function ping(r) {
-    var uri = make_tfs_uri(r, false)
-
-    function callback (reply) {
-        if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) {
-            r.return(200)
-        } else {
-            r.error("failed ping" + reply.responseBody)
-            r.return(502)
-        }
-    }
-
-    r.subrequest(uri, callback)
-}
-
-function ping_without_model(r) {
-    // hack for TF 1.11 and MME
-    // for TF 1.11, send an arbitrary fixed request to the default model.
-    // if response is 400, the model is ok (but input was bad), so return 200
-    // for MME, the default model name is None and does not exist
-    // also return 200 in unlikely case our request was really valid
-
-    var uri = make_tfs_uri(r, true)
-    var options = {
-        method: "POST",
-        body: "{'instances': 'invalid'}"
-    }
-
-    function callback (reply) {
-        if (reply.status == 200 || reply.status == 400 ||
-        reply.responseBody.includes("Servable not found for request: Latest(None)")) {
-            r.return(200)
-        } else {
-            r.error("failed ping" + reply.responseBody)
-            r.return(502)
-        }
-    }
-
-    r.subrequest(uri, options, callback)
-}
-
-function return_error(r, code, message) {
-    if (message) {
-        r.return(code, "{'error': " + message + "'}'")
-    } else {
-        r.return(code)
-    }
-}
-
-function tfs_json_request(r, json) {
-    var uri = make_tfs_uri(r, true)
-    var options = {
-        method: "POST",
-        body: json
-    }
-
-    var accept = r.headersIn.Accept
-    function callback (reply) {
-        var body = reply.responseBody
-        if (reply.status == 400) {
-            // "fix" broken json escaping in \'instances\' message
-            body = body.replace("\\'instances\\'", "'instances'")
-        }
-
-        if ("application/jsonlines" == accept || "application/jsons" == accept) {
-            body = body.replace(/\n/g, "")
-            r.headersOut["Content-Type"] = accept
-        }
-        r.return(reply.status, body)
-    }
-
-    r.subrequest(uri, options, callback)
-
-}
-
-function make_tfs_uri(r, with_method) {
-    var attributes = parse_custom_attributes(r)
-
-    var uri = tfs_base_uri + attributes["tfs-model-name"]
-    if ("tfs-model-version" in attributes) {
-        uri += "/versions/" + attributes["tfs-model-version"]
-    }
-
-    if (with_method) {
-        uri += ":" + (attributes["tfs-method"] || "predict")
-    }
-
-    return uri
-}
-
-function parse_custom_attributes(r) {
-    var attributes = {}
-    var kv_pattern = /tfs-[a-z\-]+=[^,]+/g
-    var header = r.headersIn[custom_attributes_header]
-    if (header) {
-        var matches = header.match(kv_pattern)
-        if (matches) {
-            for (var i = 0; i < matches.length; i++) {
-                var kv = matches[i].split("=")
-                if (kv.length === 2) {
-                    attributes[kv[0]] = kv[1]
-                }
-            }
-        }
-    }
-
-    // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model
-    if (!attributes["tfs-model-name"]) {
-        var uri_pattern = /\/models\/[^,]+\/invoke/g
-        var model_name = r.uri.match(uri_pattern)
-        if (model_name[0]) {
-            model_name = r.uri.replace("/models/", "").replace("/invoke", "")
-            attributes["tfs-model-name"] = model_name
-        } else {
-            attributes["tfs-model-name"] = r.variables.default_tfs_model
-        }
-    }
-
-    return attributes
-}
-
-function json_request(r) {
-    var data = r.requestBody
-
-    if (is_json_lines(data)) {
-        json_lines_request(r, data)
-    } else if (is_tfs_json(data)) {
-        tfs_json_request(r, data)
-    } else {
-        generic_json_request(r, data)
-    }
-}
-
-function is_tfs_json(data) {
-    return /"(instances|inputs|examples)"\s*:/.test(data)
-}
-
-function is_json_lines(data) {
-    // objects separated only by (optional) whitespace means jsons/json-lines
-    return /[}\]]\s*[\[{]/.test(data)
-}
-
-function generic_json_request(r, data) {
-    if (! /^\s*\[\s*\[/.test(data)) {
-        data = "[" + data + "]"
-    }
-
-    var json = "{'instances':" + data + "}"
-    tfs_json_request(r, json)
-}
-
-function json_lines_request(r, data) {
-    var lines = data.trim().split(/\r?\n/)
-    var builder = []
-    builder.push("{'instances':")
-    if (lines.length != 1) {
-        builder.push("[")
-    }
-
-    for (var i = 0; i < lines.length; i++) {
-        var line = lines[i].trim()
-        if (line) {
-            var instance = (i == 0) ? "" : ","
-            instance += line
-            builder.push(instance)
-        }
-    }
-
-    builder.push(lines.length == 1 ? "}" : "]}")
-    tfs_json_request(r, builder.join(''))
-}
-
-function csv_request(r) {
-    var data = r.requestBody
-    // look for initial quote or numeric-only data in 1st field
-    var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0
-    var lines = data.trim().split(/\r?\n/)
-    var builder = []
-    builder.push("{'nstances':[")
-
-    for (var i = 0; i < lines.length; i++) {
-        var line = lines[i].trim()
-        if (line) {
-            var line_builder = []
-            // Only wrap line in brackets if there are multiple columns.
-            // If there's only one column and it has a string with a comma,
-            // the input will be wrapped in an extra set of brackets.
-            var has_multiple_columns = line.search(",") != -1
-
-            if (has_multiple_columns) {
-                line_builder.push("[")
-            }
-
-            if (needs_quotes) {
-                line_builder.push("'")
-                line_builder.push(line.replace("'", "\\'").replace(",", "','"))
-                line_builder.push("'")
-            } else {
-                line_builder.push(line)
-            }
-
-            if (has_multiple_columns) {
-                line_builder.push("]")
-            }
-
-            var json_line = line_builder.join("")
-            builder.push(json_line)
-
-            if (i != lines.length - 1)
-                builder.push(",")
-        }
-    }
-
-    builder.push("]}")
-    tfs_json_request(r, builder.join(""))
-}
diff --git a/docker/2.1/sagemaker/tfs_utils.py b/docker/2.1/sagemaker/tfs_utils.py
deleted file mode 100644
index f3ca0cb7..00000000
--- a/docker/2.1/sagemaker/tfs_utils.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-
-import logging
-import multiprocessing
-import os
-import re
-
-from collections import namedtuple
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-DEFAULT_CONTENT_TYPE = "application/json"
-DEFAULT_ACCEPT_HEADER = "application/json"
-CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes"
-
-Context = namedtuple("Context",
-                     "model_name, model_version, method, rest_uri, grpc_port, "
-                     "custom_attributes, request_content_type, accept_header, content_length")
-
-
-def parse_request(req, rest_port, grpc_port, default_model_name, model_name=None):
-    tfs_attributes = parse_tfs_custom_attributes(req)
-    tfs_uri = make_tfs_uri(rest_port, tfs_attributes, default_model_name, model_name)
-
-    if not model_name:
-        model_name = tfs_attributes.get("tfs-model-name")
-
-    context = Context(model_name,
-                      tfs_attributes.get("tfs-model-version"),
-                      tfs_attributes.get("tfs-method"),
-                      tfs_uri,
-                      grpc_port,
-                      req.get_header(CUSTOM_ATTRIBUTES_HEADER),
-                      req.get_header("Content-Type") or DEFAULT_CONTENT_TYPE,
-                      req.get_header("Accept") or DEFAULT_ACCEPT_HEADER,
-                      req.content_length)
-
-    data = req.stream
-    return data, context
-
-
-def make_tfs_uri(port, attributes, default_model_name, model_name=None):
-    log.info("sagemaker tfs attributes: \n{}".format(attributes))
-
-    tfs_model_name = model_name or attributes.get("tfs-model-name", default_model_name)
-    tfs_model_version = attributes.get("tfs-model-version")
-    tfs_method = attributes.get("tfs-method", "predict")
-
-    uri = "http://localhost:{}/v1/models/{}".format(port, tfs_model_name)
-    if tfs_model_version:
-        uri += "/versions/" + tfs_model_version
-    uri += ":" + tfs_method
-    return uri
-
-
-def parse_tfs_custom_attributes(req):
-    attributes = {}
-    header = req.get_header(CUSTOM_ATTRIBUTES_HEADER)
-    if header:
-        matches = re.findall(r"(tfs-[a-z\-]+=[^,]+)", header)
-        attributes = dict(attribute.split("=") for attribute in matches)
-    return attributes
-
-
-def create_tfs_config_individual_model(model_name, base_path):
-    config = "model_config_list: {\n"
-    config += "  config: {\n"
-    config += "    name: '{}',\n".format(model_name)
-    config += "    base_path: '{}',\n".format(base_path)
-    config += "    model_platform: 'tensorflow'\n"
-    config += "  }\n"
-    config += "}\n"
-    return config
-
-
-def create_tfs_config(
-        tfs_default_model_name,
-        tfs_config_path,
-):
-    models = find_models()
-    if not models:
-        raise ValueError("no SavedModel bundles found!")
-
-    if tfs_default_model_name == "None":
-        default_model = os.path.basename(models[0])
-        if default_model:
-            tfs_default_model_name = default_model
-            log.info("using default model name: {}".format(tfs_default_model_name))
-        else:
-            log.info("no default model detected")
-
-    # config (may) include duplicate 'config' keys, so we can't just dump a dict
-    config = "model_config_list: {\n"
-    for m in models:
-        config += "  config: {\n"
-        config += "    name: '{}',\n".format(os.path.basename(m))
-        config += "    base_path: '{}',\n".format(m)
-        config += "    model_platform: 'tensorflow'\n"
-        config += "  }\n"
-    config += "}\n"
-
-    log.info("tensorflow serving model config: \n%s\n", config)
-
-    with open(tfs_config_path, "w") as f:
-        f.write(config)
-
-
-def tfs_command(tfs_grpc_port,
-                tfs_rest_port,
-                tfs_config_path,
-                tfs_enable_batching,
-                tfs_batching_config_file):
-    cmd = "tensorflow_model_server " \
-          "--port={} " \
-          "--rest_api_port={} " \
-          "--model_config_file={} " \
-          "--max_num_load_retries=0 {}" \
-        .format(tfs_grpc_port, tfs_rest_port, tfs_config_path,
-                get_tfs_batching_args(tfs_enable_batching, tfs_batching_config_file))
-    return cmd
-
-
-def find_models():
-    base_path = "/opt/ml/model"
-    models = []
-    for f in _find_saved_model_files(base_path):
-        parts = f.split("/")
-        if len(parts) >= 6 and re.match(r"^\d+$", parts[-2]):
-            model_path = "/".join(parts[0:-2])
-            if model_path not in models:
-                models.append(model_path)
-        return models
-
-
-def _find_saved_model_files(path):
-    for e in os.scandir(path):
-        if e.is_dir():
-            yield from _find_saved_model_files(os.path.join(path, e.name))
-        else:
-            if e.name == "saved_model.pb":
-                yield os.path.join(path, e.name)
-
-
-def get_tfs_batching_args(enable_batching, tfs_batching_config):
-    if enable_batching:
-        return "--enable_batching=true " \
-               "--batching_parameters_file={}".format(tfs_batching_config)
-    else:
-        return ""
-
-
-def create_batching_config(batching_config_file):
-    class _BatchingParameter:
-        def __init__(self, key, env_var, value, defaulted_message):
-            self.key = key
-            self.env_var = env_var
-            self.value = value
-            self.defaulted_message = defaulted_message
-
-    cpu_count = multiprocessing.cpu_count()
-    batching_parameters = [
-        _BatchingParameter("max_batch_size", "SAGEMAKER_TFS_MAX_BATCH_SIZE", 8,
-                           "max_batch_size defaulted to {}. Set {} to override default. "
-                           "Tuning this parameter may yield better performance."),
-        _BatchingParameter("batch_timeout_micros", "SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS", 1000,
-                           "batch_timeout_micros defaulted to {}. Set {} to override "
-                           "default. Tuning this parameter may yield better performance."),
-        _BatchingParameter("num_batch_threads", "SAGEMAKER_TFS_NUM_BATCH_THREADS",
-                           cpu_count, "num_batch_threads defaulted to {},"
-                                      "the number of CPUs. Set {} to override default."),
-        _BatchingParameter("max_enqueued_batches", "SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES",
-                           # Batch limits number of concurrent requests, which limits number
-                           # of enqueued batches, so this can be set high for Batch
-                           100000000 if "SAGEMAKER_BATCH" in os.environ else cpu_count,
-                           "max_enqueued_batches defaulted to {}. Set {} to override default. "
-                           "Tuning this parameter may be necessary to tune out-of-memory "
-                           "errors occur."),
-    ]
-
-    warning_message = ""
-    for batching_parameter in batching_parameters:
-        if batching_parameter.env_var in os.environ:
-            batching_parameter.value = os.environ[batching_parameter.env_var]
-        else:
-            warning_message += batching_parameter.defaulted_message.format(
-                batching_parameter.value, batching_parameter.env_var)
-            warning_message += "\n"
-    if warning_message:
-        log.warning(warning_message)
-
-    config = ""
-    for batching_parameter in batching_parameters:
-        config += "%s { value: %s }\n" % (batching_parameter.key, batching_parameter.value)
-
-    log.info("batching config: \n%s\n", config)
-    with open(batching_config_file, "w") as f:
-        f.write(config)
diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py
index 7a539fe6..9d814feb 100644
--- a/docker/build_artifacts/sagemaker/serve.py
+++ b/docker/build_artifacts/sagemaker/serve.py
@@ -176,7 +176,7 @@ def _read_nginx_template(self):
             return template
 
     def _start_tfs(self):
-        self._log_version("tensorflow_model_server --version', 'tensorflow version info:")
+        self._log_version("tensorflow_model_server --version", "tensorflow version info:")
         cmd = tfs_utils.tfs_command(
             self._tfs_grpc_port,
             self._tfs_rest_port,
diff --git a/docker/build_artifacts/sagemaker/tensorflow-serving.js b/docker/build_artifacts/sagemaker/tensorflow-serving.js
index fdce4472..1c040b0a 100644
--- a/docker/build_artifacts/sagemaker/tensorflow-serving.js
+++ b/docker/build_artifacts/sagemaker/tensorflow-serving.js
@@ -1,15 +1,15 @@
-var tfs_base_uri = "/tfs/v1/models/"
-var custom_attributes_header = "X-Amzn-SageMaker-Custom-Attributes"
+var tfs_base_uri = '/tfs/v1/models/'
+var custom_attributes_header = 'X-Amzn-SageMaker-Custom-Attributes'
 
 function invocations(r) {
-    var ct = r.headersIn["Content-Type"]
+    var ct = r.headersIn['Content-Type']
 
-    if ("application/json" == ct || "application/jsonlines" == ct || "application/jsons" == ct) {
+    if ('application/json' == ct || 'application/jsonlines' == ct || 'application/jsons' == ct) {
         json_request(r)
-    } else if ("text/csv" == ct) {
+    } else if ('text/csv' == ct) {
         csv_request(r)
     } else {
-        return_error(r, 415, "Unsupported Media Type: " + (ct || "Unknown"))
+        return_error(r, 415, 'Unsupported Media Type: ' + (ct || 'Unknown'))
     }
 }
 
@@ -20,7 +20,7 @@ function ping(r) {
         if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) {
             r.return(200)
         } else {
-            r.error("failed ping" + reply.responseBody)
+            r.error('failed ping' + reply.responseBody)
             r.return(502)
         }
     }
@@ -37,16 +37,16 @@ function ping_without_model(r) {
 
     var uri = make_tfs_uri(r, true)
     var options = {
-        method: "POST",
-        body: "{'instances': 'invalid'}"
+        method: 'POST',
+        body: '{"instances": "invalid"}'
     }
 
     function callback (reply) {
         if (reply.status == 200 || reply.status == 400 ||
-        reply.responseBody.includes("Servable not found for request: Latest(None)")) {
+        reply.responseBody.includes('Servable not found for request: Latest(None)')) {
             r.return(200)
         } else {
-            r.error("failed ping" + reply.responseBody)
+            r.error('failed ping' + reply.responseBody)
             r.return(502)
         }
     }
@@ -56,7 +56,7 @@ function ping_without_model(r) {
 
 function return_error(r, code, message) {
     if (message) {
-        r.return(code, "{'error': " + message + "'}'")
+        r.return(code, '{"error": "' + message + '"}')
     } else {
         r.return(code)
     }
@@ -65,7 +65,7 @@ function return_error(r, code, message) {
 function tfs_json_request(r, json) {
     var uri = make_tfs_uri(r, true)
     var options = {
-        method: "POST",
+        method: 'POST',
         body: json
     }
 
@@ -77,9 +77,9 @@ function tfs_json_request(r, json) {
             body = body.replace("\\'instances\\'", "'instances'")
         }
 
-        if ("application/jsonlines" == accept || "application/jsons" == accept) {
-            body = body.replace(/\n/g, "")
-            r.headersOut["Content-Type"] = accept
+        if ('application/jsonlines' == accept || 'application/jsons' == accept) {
+            body = body.replace(/\n/g, '')
+            r.headersOut['Content-Type'] = accept
         }
         r.return(reply.status, body)
     }
@@ -91,13 +91,13 @@ function tfs_json_request(r, json) {
 function make_tfs_uri(r, with_method) {
     var attributes = parse_custom_attributes(r)
 
-    var uri = tfs_base_uri + attributes["tfs-model-name"]
-    if ("tfs-model-version" in attributes) {
-        uri += "/versions/" + attributes["tfs-model-version"]
+    var uri = tfs_base_uri + attributes['tfs-model-name']
+    if ('tfs-model-version' in attributes) {
+        uri += '/versions/' + attributes['tfs-model-version']
     }
 
     if (with_method) {
-        uri += ":" + (attributes["tfs-method"] || "predict")
+        uri += ':' + (attributes['tfs-method'] || 'predict')
     }
 
     return uri
@@ -111,7 +111,7 @@ function parse_custom_attributes(r) {
         var matches = header.match(kv_pattern)
         if (matches) {
             for (var i = 0; i < matches.length; i++) {
-                var kv = matches[i].split("=")
+                var kv = matches[i].split('=')
                 if (kv.length === 2) {
                     attributes[kv[0]] = kv[1]
                 }
@@ -120,14 +120,14 @@ function parse_custom_attributes(r) {
     }
 
     // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model
-    if (!attributes["tfs-model-name"]) {
+    if (!attributes['tfs-model-name']) {
         var uri_pattern = /\/models\/[^,]+\/invoke/g
         var model_name = r.uri.match(uri_pattern)
         if (model_name[0]) {
-            model_name = r.uri.replace("/models/", "").replace("/invoke", "")
-            attributes["tfs-model-name"] = model_name
+            model_name = r.uri.replace('/models/', '').replace('/invoke', '')
+            attributes['tfs-model-name'] = model_name
         } else {
-            attributes["tfs-model-name"] = r.variables.default_tfs_model
+            attributes['tfs-model-name'] = r.variables.default_tfs_model
         }
     }
 
@@ -157,31 +157,31 @@ function is_json_lines(data) {
 
 function generic_json_request(r, data) {
     if (! /^\s*\[\s*\[/.test(data)) {
-        data = "[" + data + "]"
+        data = '[' + data + ']'
     }
 
-    var json = "{'instances':" + data + "}"
+    var json = '{"instances":' + data + '}'
     tfs_json_request(r, json)
 }
 
 function json_lines_request(r, data) {
     var lines = data.trim().split(/\r?\n/)
     var builder = []
-    builder.push("{'instances':")
+    builder.push('{"instances":')
     if (lines.length != 1) {
-        builder.push("[")
+        builder.push('[')
     }
 
     for (var i = 0; i < lines.length; i++) {
         var line = lines[i].trim()
         if (line) {
-            var instance = (i == 0) ? "" : ","
+            var instance = (i == 0) ? '' : ','
             instance += line
             builder.push(instance)
         }
     }
 
-    builder.push(lines.length == 1 ? "}" : "]}")
+    builder.push(lines.length == 1 ? '}' : ']}')
     tfs_json_request(r, builder.join(''))
 }
 
@@ -191,7 +191,7 @@ function csv_request(r) {
     var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0
     var lines = data.trim().split(/\r?\n/)
     var builder = []
-    builder.push("{'nstances':[")
+    builder.push('{"instances":[')
 
     for (var i = 0; i < lines.length; i++) {
         var line = lines[i].trim()
@@ -200,32 +200,32 @@ function csv_request(r) {
             // Only wrap line in brackets if there are multiple columns.
             // If there's only one column and it has a string with a comma,
             // the input will be wrapped in an extra set of brackets.
-            var has_multiple_columns = line.search(",") != -1
+            var has_multiple_columns = line.search(',') != -1
 
             if (has_multiple_columns) {
-                line_builder.push("[")
+                line_builder.push('[')
             }
 
             if (needs_quotes) {
-                line_builder.push("'")
-                line_builder.push(line.replace("'", "\\'").replace(",", "','"))
-                line_builder.push("'")
+                line_builder.push('"')
+                line_builder.push(line.replace('"', '\\"').replace(',', '","'))
+                line_builder.push('"')
             } else {
                 line_builder.push(line)
             }
 
             if (has_multiple_columns) {
-                line_builder.push("]")
+                line_builder.push(']')
             }
 
-            var json_line = line_builder.join("")
+            var json_line = line_builder.join('')
             builder.push(json_line)
 
             if (i != lines.length - 1)
-                builder.push(",")
+                builder.push(',')
         }
     }
 
-    builder.push("]}")
-    tfs_json_request(r, builder.join(""))
+    builder.push(']}')
+    tfs_json_request(r, builder.join(''))
 }
diff --git a/test/integration/local/multi_model_endpoint_test_utils.py b/test/integration/local/multi_model_endpoint_test_utils.py
index 08802dd6..508b6615 100644
--- a/test/integration/local/multi_model_endpoint_test_utils.py
+++ b/test/integration/local/multi_model_endpoint_test_utils.py
@@ -11,7 +11,6 @@
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
 
-import encodings
 import json
 import requests
 
@@ -34,17 +33,17 @@ def make_invocation_request(data, model_name, content_type="application/json"):
         "X-Amzn-SageMaker-Custom-Attributes": "tfs-method=predict"
     }
     response = requests.post(INVOCATION_URL.format(model_name), data=data, headers=headers)
-    return response.status_code, response.content.decode(encodings.utf_8.getregentry().name)
+    return response.status_code, response.content.decode("utf-8")
 
 
 def make_list_model_request():
     response = requests.get(MODELS_URL)
-    return response.status_code, response.content.decode(encodings.utf_8.getregentry().name)
+    return response.status_code, response.content.decode("utf-8")
 
 
 def make_get_model_request(model_name):
     response = requests.get(MODELS_URL + "/{}".format(model_name))
-    return response.status_code, response.content.decode(encodings.utf_8.getregentry().name)
+    return response.status_code, response.content.decode("utf-8")
 
 
 def make_load_model_request(data, content_type="application/json"):
@@ -52,9 +51,9 @@ def make_load_model_request(data, content_type="application/json"):
         "Content-Type": content_type
     }
     response = requests.post(MODELS_URL, data=data, headers=headers)
-    return response.status_code, response.content.decode(encodings.utf_8.getregentry().name)
+    return response.status_code, response.content.decode("utf-8")
 
 
 def make_unload_model_request(model_name):
     response = requests.delete(DELETE_MODEL_URL.format(model_name))
-    return response.status_code, response.content.decode(encodings.utf_8.getregentry().name)
+    return response.status_code, response.content.decode("utf-8")
diff --git a/test/integration/local/test_container.py b/test/integration/local/test_container.py
index 00a82ecf..112df792 100644
--- a/test/integration/local/test_container.py
+++ b/test/integration/local/test_container.py
@@ -130,7 +130,7 @@ def test_predict_jsons():
 
 
 def test_predict_jsons_2():
-    x = "{'x': [1.0, 2.0, 5.0]}\n{'x': [1.0, 2.0, 5.0]}"
+    x = "{\"x\": [1.0, 2.0, 5.0]}\n{\"x\": [1.0, 2.0, 5.0]}"
     y = make_request(x)
     assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]}
 
@@ -255,4 +255,4 @@ def test_predict_with_jsonlines():
     }
     response = requests.post(BASE_URL, data=json.dumps(x), headers=headers)
     assert response.headers["Content-Type"] == "application/jsonlines"
-    assert response.content.decode("utf-8") == "{    'predictions': [3.5, 4.0, 5.5    ]}"
+    assert response.content.decode("utf-8") == "{    \"predictions\": [3.5, 4.0, 5.5    ]}"
diff --git a/test/integration/local/test_pre_post_processing.py b/test/integration/local/test_pre_post_processing.py
index e2ec89cc..3a490d4f 100644
--- a/test/integration/local/test_pre_post_processing.py
+++ b/test/integration/local/test_pre_post_processing.py
@@ -88,7 +88,7 @@ def make_headers(content_type, method):
 
 def test_predict_json():
     headers = make_headers("application/json", "predict")
-    data = "{'instances': [1.0, 2.0, 5.0]}"
+    data = "{\"instances\": [1.0, 2.0, 5.0]}"
     response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json()
     assert response == {"predictions": [3.5, 4.0, 5.5]}
 
diff --git a/test/integration/local/test_pre_post_processing_mme.py b/test/integration/local/test_pre_post_processing_mme.py
index f4c8730d..7e63b610 100644
--- a/test/integration/local/test_pre_post_processing_mme.py
+++ b/test/integration/local/test_pre_post_processing_mme.py
@@ -100,7 +100,7 @@ def test_ping_service():
 
 def test_predict_json(model):
     headers = make_headers()
-    data = "{'instances': [1.0, 2.0, 5.0]}"
+    data = "{\"instances\": [1.0, 2.0, 5.0]}"
     response = requests.post(INVOCATION_URL.format(model), data=data, headers=headers).json()
     assert response == {"predictions": [3.5, 4.0, 5.5]}
 

From 2807e34614eccac34edcbc66537c28ff8711944d Mon Sep 17 00:00:00 2001
From: Chuyang Deng <chuyangd@amazon.com>
Date: Wed, 22 Jul 2020 13:31:48 -0700
Subject: [PATCH 3/5] update quotes

---
 README.md                                     | 36 +++++++++----------
 .../sagemaker/python_service.py               | 20 +++++------
 docker/build_artifacts/sagemaker/serve.py     |  2 +-
 test/integration/local/conftest.py            |  6 ++--
 test/integration/local/test_container.py      | 14 ++++----
 .../local/test_pre_post_processing.py         | 12 +++----
 .../local/test_pre_post_processing_mme.py     | 14 ++++----
 test/integration/local/test_tfs_batching.py   | 22 ++++++------
 test/integration/sagemaker/conftest.py        | 14 ++++----
 test/integration/sagemaker/test_tfs.py        | 26 +++++++-------
 10 files changed, 83 insertions(+), 83 deletions(-)

diff --git a/README.md b/README.md
index dbe22668..18a2fc3e 100644
--- a/README.md
+++ b/README.md
@@ -620,24 +620,24 @@ To deploy a Multi-Model endpoint with TFS container, please start the container
 ### Multi-Model Interfaces
 We provide four different interfaces for user to interact with a Multi-Model Mode container:
 
-+---------------------+---------------------------------+---------------------------------------------+
-| Functionality       | Request                         | Response/Actions                            |
-+---------------------+---------------------------------+---------------------------------------------+
-| List A Single Model | GET /models/{model_name}        | Information about the specified model       |
-+---------------------+---------------------------------+---------------------------------------------+
-| List All Models     | GET /models                     | List of Information about all loaded models |
-+---------------------+---------------------------------+---------------------------------------------+
-|                     | POST /models                    | Load model with "model_name" from           |
-|                     | data = {                        | specified url                               |
-| Load A Model        |     "model_name": <model-name>, |                                             |
-|                     |     "url": <path to model data> |                                             |
-|                     | }                               |                                             |
-+---------------------+---------------------------------+---------------------------------------------+
-| Make Invocations    | POST /models/{model_name}/invoke| Return inference result from                |
-|                     | data = <invocation payload>     | the specified model                         |
-+---------------------+---------------------------------+---------------------------------------------+
-| Unload A Model      | DELETE /models/{model_name}     | Unload the specified model                  |
-+---------------------+---------------------------------+---------------------------------------------+
+    +---------------------+---------------------------------+---------------------------------------------+
+    | Functionality       | Request                         | Response/Actions                            |
+    +---------------------+---------------------------------+---------------------------------------------+
+    | List A Single Model | GET /models/{model_name}        | Information about the specified model       |
+    +---------------------+---------------------------------+---------------------------------------------+
+    | List All Models     | GET /models                     | List of Information about all loaded models |
+    +---------------------+---------------------------------+---------------------------------------------+
+    |                     | POST /models                    | Load model with "model_name" from           |
+    |                     | data = {                        | specified url                               |
+    | Load A Model        |     "model_name": <model-name>, |                                             |
+    |                     |     "url": <path to model data> |                                             |
+    |                     | }                               |                                             |
+    +---------------------+---------------------------------+---------------------------------------------+
+    | Make Invocations    | POST /models/{model_name}/invoke| Return inference result from                |
+    |                     | data = <invocation payload>     | the specified model                         |
+    +---------------------+---------------------------------+---------------------------------------------+
+    | Unload A Model      | DELETE /models/{model_name}     | Unload the specified model                  |
+    +---------------------+---------------------------------+---------------------------------------------+
 
 ### Maximum Number of Models
 Also please note the environment variable ``SAGEMAKER_SAFE_PORT_RANGE`` will limit the number of models that can be loaded to the endpoint at the same time.
diff --git a/docker/build_artifacts/sagemaker/python_service.py b/docker/build_artifacts/sagemaker/python_service.py
index 60a2d04b..44b9498c 100644
--- a/docker/build_artifacts/sagemaker/python_service.py
+++ b/docker/build_artifacts/sagemaker/python_service.py
@@ -27,14 +27,14 @@
 from multi_model_utils import lock, timeout, MultiModelException
 import tfs_utils
 
-SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get('SAGEMAKER_MULTI_MODEL', 'false').lower() == 'true'
-INFERENCE_SCRIPT_PATH = '/opt/ml/model/code/inference.py'
+SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get("SAGEMAKER_MULTI_MODEL", "false").lower() == "true"
+INFERENCE_SCRIPT_PATH = "/opt/ml/model/code/inference.py"
 
-SAGEMAKER_BATCHING_ENABLED = os.environ.get('SAGEMAKER_TFS_ENABLE_BATCHING', 'false').lower()
-MODEL_CONFIG_FILE_PATH = '/sagemaker/model-config.cfg'
-TFS_GRPC_PORT = os.environ.get('TFS_GRPC_PORT')
-TFS_REST_PORT = os.environ.get('TFS_REST_PORT')
-SAGEMAKER_TFS_PORT_RANGE = os.environ.get('SAGEMAKER_SAFE_PORT_RANGE')
+SAGEMAKER_BATCHING_ENABLED = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower()
+MODEL_CONFIG_FILE_PATH = "/sagemaker/model-config.cfg"
+TFS_GRPC_PORT = os.environ.get("TFS_GRPC_PORT")
+TFS_REST_PORT = os.environ.get("TFS_REST_PORT")
+SAGEMAKER_TFS_PORT_RANGE = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE")
 
 
 logging.basicConfig(level=logging.INFO)
@@ -78,8 +78,8 @@ def __init__(self):
             else:
                 self._handlers = default_handler
 
-        self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == 'true'
-        self._tfs_default_model_name = os.environ.get('TFS_DEFAULT_MODEL_NAME', "None")
+        self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true"
+        self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None")
 
     def on_post(self, req, res, model_name=None):
         log.info(req.uri)
@@ -296,7 +296,7 @@ def _import_handlers(self, model_name=None):
         inference_script = INFERENCE_SCRIPT_PATH
         if model_name:
             inference_script = "/opt/ml/models/{}/model/code/inference.py".format(model_name)
-        spec = importlib.util.spec_from_file_location('inference', inference_script)
+        spec = importlib.util.spec_from_file_location("inference", inference_script)
         inference = importlib.util.module_from_spec(spec)
         spec.loader.exec_module(inference)
 
diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py
index 0896bc3c..231cbc59 100644
--- a/docker/build_artifacts/sagemaker/serve.py
+++ b/docker/build_artifacts/sagemaker/serve.py
@@ -269,7 +269,7 @@ def start(self):
         self._create_nginx_config()
 
         if self._tfs_enable_batching:
-            log.info('batching is enabled')
+            log.info("batching is enabled")
             tfs_utils.create_batching_config(self._tfs_batching_config_path)
 
         if self._use_gunicorn:
diff --git a/test/integration/local/conftest.py b/test/integration/local/conftest.py
index 0cef34f9..050ba552 100644
--- a/test/integration/local/conftest.py
+++ b/test/integration/local/conftest.py
@@ -58,6 +58,6 @@ def tag(request, framework_version, processor):
 @pytest.fixture(autouse=True)
 def skip_by_device_type(request, processor):
     is_gpu = processor == "gpu"
-    if (request.node.get_closest_marker('skip_gpu') and is_gpu) or \
-            (request.node.get_closest_marker('skip_cpu') and not is_gpu):
-        pytest.skip('Skipping because running on \'{}\' instance'.format(processor))
+    if (request.node.get_closest_marker("skip_gpu") and is_gpu) or \
+            (request.node.get_closest_marker("skip_cpu") and not is_gpu):
+        pytest.skip("Skipping because running on \"{}\" instance".format(processor))
diff --git a/test/integration/local/test_container.py b/test/integration/local/test_container.py
index 2cacbb36..21650dd3 100644
--- a/test/integration/local/test_container.py
+++ b/test/integration/local/test_container.py
@@ -43,13 +43,13 @@ def container(request, docker_base_name, tag, runtime_config):
         else:
             batching_config = ""
         command = (
-            'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
-            ' --mount type=volume,source=model_volume,target=/opt/ml/model,readonly'
-            ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
-            ' -e SAGEMAKER_BIND_TO_PORT=8080'
-            ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-            ' {}'
-            ' {}:{} serve'
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source=model_volume,target=/opt/ml/model,readonly"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_BIND_TO_PORT=8080"
+            " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
+            " {}"
+            " {}:{} serve"
         ).format(runtime_config, batching_config, docker_base_name, tag)
 
         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
diff --git a/test/integration/local/test_pre_post_processing.py b/test/integration/local/test_pre_post_processing.py
index b12e485e..1106b0e5 100644
--- a/test/integration/local/test_pre_post_processing.py
+++ b/test/integration/local/test_pre_post_processing.py
@@ -51,12 +51,12 @@ def volume(tmpdir_factory, request):
 def container(volume, docker_base_name, tag, runtime_config):
     try:
         command = (
-            'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
-            ' --mount type=volume,source={},target=/opt/ml/model,readonly'
-            ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
-            ' -e SAGEMAKER_BIND_TO_PORT=8080'
-            ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-            ' {}:{} serve'
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source={},target=/opt/ml/model,readonly"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_BIND_TO_PORT=8080"
+            " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
+            " {}:{} serve"
         ).format(runtime_config, volume, docker_base_name, tag)
 
         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
diff --git a/test/integration/local/test_pre_post_processing_mme.py b/test/integration/local/test_pre_post_processing_mme.py
index 0c373cbd..fdd5438d 100644
--- a/test/integration/local/test_pre_post_processing_mme.py
+++ b/test/integration/local/test_pre_post_processing_mme.py
@@ -55,13 +55,13 @@ def volume(tmpdir_factory, request):
 def container(volume, docker_base_name, tag, runtime_config):
     try:
         command = (
-            'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
-            ' --mount type=volume,source={},target=/opt/ml/models/half_plus_three/model,readonly'
-            ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
-            ' -e SAGEMAKER_BIND_TO_PORT=8080'
-            ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-            ' -e SAGEMAKER_MULTI_MODEL=True'
-            ' {}:{} serve'
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source={},target=/opt/ml/models/half_plus_three/model,readonly"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_BIND_TO_PORT=8080"
+            " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
+            " -e SAGEMAKER_MULTI_MODEL=True"
+            " {}:{} serve"
         ).format(runtime_config, volume, docker_base_name, tag)
 
         proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)
diff --git a/test/integration/local/test_tfs_batching.py b/test/integration/local/test_tfs_batching.py
index 2e05aa7c..54d893b7 100644
--- a/test/integration/local/test_tfs_batching.py
+++ b/test/integration/local/test_tfs_batching.py
@@ -34,17 +34,17 @@ def volume():
 def test_run_tfs_with_batching_parameters(docker_base_name, tag, runtime_config):
     try:
         command = (
-            'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
-            ' --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly'
-            ' -e SAGEMAKER_TFS_ENABLE_BATCHING=true'
-            ' -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16'
-            ' -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500'
-            ' -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100'
-            ' -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1'
-            ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
-            ' -e SAGEMAKER_BIND_TO_PORT=8080'
-            ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
-            ' {}:{} serve'
+            "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080"
+            " --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly"
+            " -e SAGEMAKER_TFS_ENABLE_BATCHING=true"
+            " -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16"
+            " -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500"
+            " -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100"
+            " -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1"
+            " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info"
+            " -e SAGEMAKER_BIND_TO_PORT=8080"
+            " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999"
+            " {}:{} serve"
         ).format(runtime_config, docker_base_name, tag)
 
         proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
diff --git a/test/integration/sagemaker/conftest.py b/test/integration/sagemaker/conftest.py
index 27b49156..5009979a 100644
--- a/test/integration/sagemaker/conftest.py
+++ b/test/integration/sagemaker/conftest.py
@@ -118,14 +118,14 @@ def model_name():
 
 @pytest.fixture(autouse=True)
 def skip_gpu_instance_restricted_regions(region, instance_type):
-    if (region in NO_P2_REGIONS and instance_type.startswith('ml.p2')) or \
-            (region in NO_P3_REGIONS and instance_type.startswith('ml.p3')):
-        pytest.skip('Skipping GPU test in region {}'.format(region))
+    if (region in NO_P2_REGIONS and instance_type.startswith("ml.p2")) or \
+            (region in NO_P3_REGIONS and instance_type.startswith("ml.p3")):
+        pytest.skip("Skipping GPU test in region {}".format(region))
 
 
 @pytest.fixture(autouse=True)
 def skip_by_device_type(request, instance_type):
-    is_gpu = instance_type[3] in ['g', 'p']
-    if (request.node.get_closest_marker('skip_gpu') and is_gpu) or \
-            (request.node.get_closest_marker('skip_cpu') and not is_gpu):
-        pytest.skip('Skipping because running on \'{}\' instance'.format(instance_type))
+    is_gpu = instance_type[3] in ["g", "p"]
+    if (request.node.get_closest_marker("skip_gpu") and is_gpu) or \
+            (request.node.get_closest_marker("skip_cpu") and not is_gpu):
+        pytest.skip("Skipping because running on \"{}\" instance".format(instance_type))
diff --git a/test/integration/sagemaker/test_tfs.py b/test/integration/sagemaker/test_tfs.py
index 2f67c0e9..f73ce35c 100644
--- a/test/integration/sagemaker/test_tfs.py
+++ b/test/integration/sagemaker/test_tfs.py
@@ -65,20 +65,20 @@ def tfs_model(region, boto_session):
 def python_model_with_requirements(region, boto_session):
     return util.find_or_put_model_data(region,
                                        boto_session,
-                                       'test/data/python-with-requirements.tar.gz')
+                                       "test/data/python-with-requirements.tar.gz")
 
 
 @pytest.fixture(scope='session')
 def python_model_with_lib(region, boto_session):
     return util.find_or_put_model_data(region,
                                        boto_session,
-                                       'test/data/python-with-lib.tar.gz')
+                                       "test/data/python-with-lib.tar.gz")
 
 
 def test_tfs_model(boto_session, sagemaker_client,
                    sagemaker_runtime_client, model_name, tfs_model,
                    image_uri, instance_type, accelerator_type):
-    input_data = {'instances': [1.0, 2.0, 5.0]}
+    input_data = {"instances": [1.0, 2.0, 5.0]}
     util.create_and_invoke_endpoint(boto_session, sagemaker_client,
                                     sagemaker_runtime_client, model_name, tfs_model,
                                     image_uri, instance_type, accelerator_type, input_data)
@@ -104,34 +104,34 @@ def test_python_model_with_requirements(boto_session, sagemaker_client,
                                         python_model_with_requirements, image_uri, instance_type,
                                         accelerator_type):
 
-    if 'p3' in instance_type:
-        pytest.skip('skip for p3 instance')
+    if "p3" in instance_type:
+        pytest.skip("skip for p3 instance")
 
     # the python service needs to transform this to get a valid prediction
-    input_data = {'x': [1.0, 2.0, 5.0]}
+    input_data = {"x": [1.0, 2.0, 5.0]}
     output_data = util.create_and_invoke_endpoint(boto_session, sagemaker_client,
                                                   sagemaker_runtime_client, model_name,
                                                   python_model_with_requirements, image_uri,
                                                   instance_type, accelerator_type, input_data)
 
     # python service adds this to tfs response
-    assert output_data['python'] is True
-    assert output_data['pillow'] == '6.0.0'
+    assert output_data["python"] is True
+    assert output_data["pillow"] == "6.0.0"
 
 
 def test_python_model_with_lib(boto_session, sagemaker_client,
                                sagemaker_runtime_client, model_name, python_model_with_lib,
                                image_uri, instance_type, accelerator_type):
 
-    if 'p3' in instance_type:
-        pytest.skip('skip for p3 instance')
+    if "p3" in instance_type:
+        pytest.skip("skip for p3 instance")
 
     # the python service needs to transform this to get a valid prediction
-    input_data = {'x': [1.0, 2.0, 5.0]}
+    input_data = {"x": [1.0, 2.0, 5.0]}
     output_data = util.create_and_invoke_endpoint(boto_session, sagemaker_client,
                                                   sagemaker_runtime_client, model_name, python_model_with_lib,
                                                   image_uri, instance_type, accelerator_type, input_data)
 
     # python service adds this to tfs response
-    assert output_data['python'] is True
-    assert output_data['dummy_module'] == '0.1'
+    assert output_data["python"] is True
+    assert output_data["dummy_module"] == "0.1"

From 0f5fa25391697292ed59600372967ef0f27c9a3c Mon Sep 17 00:00:00 2001
From: Chuyang Deng <chuyangd@amazon.com>
Date: Wed, 22 Jul 2020 13:35:44 -0700
Subject: [PATCH 4/5] remove hardcoded region

---
 scripts/shared.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/shared.sh b/scripts/shared.sh
index 3a92382b..57be36dd 100755
--- a/scripts/shared.sh
+++ b/scripts/shared.sh
@@ -25,7 +25,7 @@ function get_short_version() {
 }
 
 function get_aws_account() {
-    aws --region us-west-2 sts --endpoint-url https://sts.us-west-2.amazonaws.com get-caller-identity --query 'Account' --output text
+    aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text
 }
 
 function get_ei_executable() {

From 21af6ef3ef50c2f928b2f62907668fefc7435e85 Mon Sep 17 00:00:00 2001
From: Chuyang Deng <chuyangd@amazon.com>
Date: Wed, 22 Jul 2020 13:47:10 -0700
Subject: [PATCH 5/5] update test quotes

---
 test/integration/local/test_pre_post_processing_mme.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/test/integration/local/test_pre_post_processing_mme.py b/test/integration/local/test_pre_post_processing_mme.py
index fdd5438d..6a7d1b45 100644
--- a/test/integration/local/test_pre_post_processing_mme.py
+++ b/test/integration/local/test_pre_post_processing_mme.py
@@ -85,13 +85,8 @@ def container(volume, docker_base_name, tag, runtime_config):
 @pytest.fixture
 def model():
     model_data = {
-<<<<<<< HEAD
         "model_name": MODEL_NAME,
-        "url": "/opt/ml/models/half_plus_three"
-=======
-        'model_name': MODEL_NAME,
-        'url': '/opt/ml/models/half_plus_three/model/half_plus_three'
->>>>>>> 2d2cfadc557d9b8716eefae2e6982eaab91d82ad
+        "url": "/opt/ml/models/half_plus_three/model/half_plus_three"
     }
     make_load_model_request(json.dumps(model_data))
     return MODEL_NAME