From 6ca91a237547530fcc46d6a36e4175d12863fa9a Mon Sep 17 00:00:00 2001 From: Chuyang Deng Date: Tue, 30 Jun 2020 16:14:09 -0700 Subject: [PATCH 1/5] ake quotes consistent --- docker/2.1/__init__.py | 0 docker/2.1/deep_learning_container.py | 109 +++++ docker/2.1/dockerd-entrypoint.py | 22 + docker/2.1/sagemaker/__init__.py | 12 + docker/2.1/sagemaker/multi_model_utils.py | 52 +++ docker/2.1/sagemaker/nginx.conf.template | 64 +++ docker/2.1/sagemaker/python_service.py | 397 ++++++++++++++++++ docker/2.1/sagemaker/serve | 3 + docker/2.1/sagemaker/serve.py | 308 ++++++++++++++ docker/2.1/sagemaker/tensorflow-serving.js | 231 ++++++++++ docker/2.1/sagemaker/tfs_utils.py | 209 +++++++++ .../deep_learning_container.py | 2 +- docker/build_artifacts/dockerd-entrypoint.py | 2 +- .../sagemaker/multi_model_utils.py | 8 +- .../sagemaker/python_service.py | 142 +++---- docker/build_artifacts/sagemaker/serve.py | 198 ++++----- .../sagemaker/tensorflow-serving.js | 84 ++-- docker/build_artifacts/sagemaker/tfs_utils.py | 104 ++--- scripts/shared.sh | 2 +- test/integration/local/conftest.py | 38 +- .../local/multi_model_endpoint_test_utils.py | 24 +- test/integration/local/test_container.py | 170 ++++---- .../local/test_multi_model_endpoint.py | 112 ++--- .../local/test_pre_post_processing.py | 78 ++-- .../local/test_pre_post_processing_mme.py | 76 ++-- test/integration/local/test_tfs_batching.py | 44 +- test/integration/sagemaker/conftest.py | 114 ++--- test/integration/sagemaker/test_ei.py | 28 +- test/integration/sagemaker/test_tfs.py | 26 +- test/integration/sagemaker/util.py | 126 +++--- 30 files changed, 2096 insertions(+), 689 deletions(-) create mode 100644 docker/2.1/__init__.py create mode 100644 docker/2.1/deep_learning_container.py create mode 100644 docker/2.1/dockerd-entrypoint.py create mode 100644 docker/2.1/sagemaker/__init__.py create mode 100644 docker/2.1/sagemaker/multi_model_utils.py create mode 100644 docker/2.1/sagemaker/nginx.conf.template create mode 100644 docker/2.1/sagemaker/python_service.py create mode 100755 docker/2.1/sagemaker/serve create mode 100644 docker/2.1/sagemaker/serve.py create mode 100644 docker/2.1/sagemaker/tensorflow-serving.js create mode 100644 docker/2.1/sagemaker/tfs_utils.py diff --git a/docker/2.1/__init__.py b/docker/2.1/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/docker/2.1/deep_learning_container.py b/docker/2.1/deep_learning_container.py new file mode 100644 index 00000000..1e82e61e --- /dev/null +++ b/docker/2.1/deep_learning_container.py @@ -0,0 +1,109 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import re +import json +import logging +import requests + + +def _validate_instance_id(instance_id): + """ + Validate instance ID + """ + instance_id_regex = r"^(i-\S{17})" + compiled_regex = re.compile(instance_id_regex) + match = compiled_regex.match(instance_id) + + if not match: + return None + + return match.group(1) + + +def _retrieve_instance_id(): + """ + Retrieve instance ID from instance metadata service + """ + instance_id = None + url = "http://169.254.169.254/latest/meta-data/instance-id" + response = requests_helper(url, timeout=0.1) + + if response is not None: + instance_id = _validate_instance_id(response.text) + + return instance_id + + +def _retrieve_instance_region(): + """ + Retrieve instance region from instance metadata service + """ + region = None + valid_regions = ['ap-northeast-1', 'ap-northeast-2', 'ap-southeast-1', 'ap-southeast-2', + 'ap-south-1', 'ca-central-1', 'eu-central-1', 'eu-north-1', + 'eu-west-1', 'eu-west-2', 'eu-west-3', 'sa-east-1', + 'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2'] + + url = "http://169.254.169.254/latest/dynamic/instance-identity/document" + response = requests_helper(url, timeout=0.1) + + if response is not None: + response_json = json.loads(response.text) + + if response_json['region'] in valid_regions: + region = response_json['region'] + + return region + + +def query_bucket(): + """ + GET request on an empty object from an Amazon S3 bucket + """ + response = None + instance_id = _retrieve_instance_id() + region = _retrieve_instance_region() + + if instance_id is not None and region is not None: + url = ("https://aws-deep-learning-containers-{0}.s3.{0}.amazonaws.com" + "/dlc-containers.txt?x-instance-id={1}".format(region, instance_id)) + response = requests_helper(url, timeout=0.2) + + logging.debug("Query bucket finished: {}".format(response)) + + return response + + +def requests_helper(url, timeout): + response = None + try: + response = requests.get(url, timeout=timeout) + except requests.exceptions.RequestException as e: + logging.error("Request exception: {}".format(e)) + + return response + + +def main(): + """ + Invoke bucket query + """ + # Logs are not necessary for normal run. Remove this line while debugging. + logging.getLogger().disabled = True + + logging.basicConfig(level=logging.ERROR) + query_bucket() + + +if __name__ == '__main__': + main() diff --git a/docker/2.1/dockerd-entrypoint.py b/docker/2.1/dockerd-entrypoint.py new file mode 100644 index 00000000..fc4ce388 --- /dev/null +++ b/docker/2.1/dockerd-entrypoint.py @@ -0,0 +1,22 @@ +# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import os.path +import subprocess +import shlex +import sys + +if not os.path.exists("/opt/ml/input/config"): + subprocess.call(["python", "/usr/local/bin/deep_learning_container.py", "&>/dev/null", "&"]) + +subprocess.check_call(shlex.split(' '.join(sys.argv[1:]))) diff --git a/docker/2.1/sagemaker/__init__.py b/docker/2.1/sagemaker/__init__.py new file mode 100644 index 00000000..04fbf5d9 --- /dev/null +++ b/docker/2.1/sagemaker/__init__.py @@ -0,0 +1,12 @@ +# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. diff --git a/docker/2.1/sagemaker/multi_model_utils.py b/docker/2.1/sagemaker/multi_model_utils.py new file mode 100644 index 00000000..5d2c47f4 --- /dev/null +++ b/docker/2.1/sagemaker/multi_model_utils.py @@ -0,0 +1,52 @@ +# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import fcntl +import signal +import time +from contextlib import contextmanager + +MODEL_CONFIG_FILE = "/sagemaker/model-config.cfg" +DEFAULT_LOCK_FILE = "/sagemaker/lock-file.lock" + + +@contextmanager +def lock(path=DEFAULT_LOCK_FILE): + f = open(path, "w") + fd = f.fileno() + fcntl.lockf(fd, fcntl.LOCK_EX) + + try: + yield + finally: + time.sleep(1) + fcntl.lockf(fd, fcntl.LOCK_UN) + + +@contextmanager +def timeout(seconds=60): + def _raise_timeout_error(signum, frame): + raise Exception(408, "Timed out after {} seconds".format(seconds)) + + try: + signal.signal(signal.SIGALRM, _raise_timeout_error) + signal.alarm(seconds) + yield + finally: + signal.alarm(0) + + +class MultiModelException(Exception): + def __init__(self, code, msg): + Exception.__init__(self, code, msg) + self.code = code + self.msg = msg diff --git a/docker/2.1/sagemaker/nginx.conf.template b/docker/2.1/sagemaker/nginx.conf.template new file mode 100644 index 00000000..5ccfed3d --- /dev/null +++ b/docker/2.1/sagemaker/nginx.conf.template @@ -0,0 +1,64 @@ +load_module modules/ngx_http_js_module.so; + +worker_processes auto; +daemon off; +pid /tmp/nginx.pid; +error_log /dev/stderr %NGINX_LOG_LEVEL%; + +worker_rlimit_nofile 4096; + +events { + worker_connections 2048; +} + +http { + include /etc/nginx/mime.types; + default_type application/json; + access_log /dev/stdout combined; + js_include tensorflow-serving.js; + + upstream tfs_upstream { + server localhost:%TFS_REST_PORT%; + } + + upstream gunicorn_upstream { + server unix:/tmp/gunicorn.sock fail_timeout=1; + } + + server { + listen %NGINX_HTTP_PORT% deferred; + client_max_body_size 0; + client_body_buffer_size 100m; + subrequest_output_buffer_size 100m; + + set $tfs_version %TFS_VERSION%; + set $default_tfs_model %TFS_DEFAULT_MODEL_NAME%; + + location /tfs { + rewrite ^/tfs/(.*) /$1 break; + proxy_redirect off; + proxy_pass_request_headers off; + proxy_set_header Content-Type 'application/json'; + proxy_set_header Accept 'application/json'; + proxy_pass http://tfs_upstream; + } + + location /ping { + %FORWARD_PING_REQUESTS%; + } + + location /invocations { + %FORWARD_INVOCATION_REQUESTS%; + } + + location /models { + proxy_pass http://gunicorn_upstream/models; + } + + location / { + return 404 '{"error": "Not Found"}'; + } + + keepalive_timeout 3; + } +} diff --git a/docker/2.1/sagemaker/python_service.py b/docker/2.1/sagemaker/python_service.py new file mode 100644 index 00000000..0014b6dd --- /dev/null +++ b/docker/2.1/sagemaker/python_service.py @@ -0,0 +1,397 @@ +# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import bisect +import importlib.util +import json +import logging +import os +import subprocess +import time + +import falcon +import requests + +from multi_model_utils import lock, timeout, MultiModelException +import tfs_utils + +SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get("SAGEMAKER_MULTI_MODEL", "false").lower() == "true" +INFERENCE_SCRIPT_PATH = "/opt/ml/{}/code/inference.py".format("models" + if SAGEMAKER_MULTI_MODEL_ENABLED + else "model") +PYTHON_PROCESSING_ENABLED = os.path.exists(INFERENCE_SCRIPT_PATH) +SAGEMAKER_BATCHING_ENABLED = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower() +MODEL_CONFIG_FILE_PATH = "/sagemaker/model-config.cfg" +TFS_GRPC_PORT = os.environ.get("TFS_GRPC_PORT") +TFS_REST_PORT = os.environ.get("TFS_REST_PORT") +SAGEMAKER_TFS_PORT_RANGE = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE") + + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + +CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes" + + +def default_handler(data, context): + """A default inference request handler that directly send post request to TFS rest port with + un-processed data and return un-processed response + + :param data: input data + :param context: context instance that contains tfs_rest_uri + :return: inference response from TFS model server + """ + response = requests.post(context.rest_uri, data=data) + return response.content, context.accept_header + + +class PythonServiceResource: + + def __init__(self): + if SAGEMAKER_MULTI_MODEL_ENABLED: + self._model_tfs_rest_port = {} + self._model_tfs_grpc_port = {} + self._model_tfs_pid = {} + self._tfs_ports = self._parse_sagemaker_port_range(SAGEMAKER_TFS_PORT_RANGE) + else: + self._tfs_grpc_port = TFS_GRPC_PORT + self._tfs_rest_port = TFS_REST_PORT + + self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true" + self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None") + + if PYTHON_PROCESSING_ENABLED: + self._handler, self._input_handler, self._output_handler = self._import_handlers() + self._handlers = self._make_handler(self._handler, + self._input_handler, + self._output_handler) + else: + self._handlers = default_handler + + def on_post(self, req, res, model_name=None): + log.info(req.uri) + if model_name or "invocations" in req.uri: + self._handle_invocation_post(req, res, model_name) + else: + data = json.loads(req.stream.read().decode("utf-8")) + self._handle_load_model_post(res, data) + + def _parse_sagemaker_port_range(self, port_range): + lower, upper = port_range.split('-') + lower = int(lower) + upper = lower + int((int(upper) - lower) * 0.9) # only utilizing 90% of the ports + rest_port = lower + grpc_port = (lower + upper) // 2 + tfs_ports = { + "rest_port": [port for port in range(rest_port, grpc_port)], + "grpc_port": [port for port in range(grpc_port, upper)], + } + return tfs_ports + + def _ports_available(self): + with lock(): + rest_ports = self._tfs_ports["rest_port"] + grpc_ports = self._tfs_ports["grpc_port"] + return len(rest_ports) > 0 and len(grpc_ports) > 0 + + def _handle_load_model_post(self, res, data): # noqa: C901 + model_name = data["model_name"] + base_path = data["url"] + + # model is already loaded + if model_name in self._model_tfs_pid: + res.status = falcon.HTTP_409 + res.body = json.dumps({ + "error": "Model {} is already loaded.".format(model_name) + }) + + # check if there are available ports + if not self._ports_available(): + res.status = falcon.HTTP_507 + res.body = json.dumps({ + "error": "Memory exhausted: no available ports to load the model." + }) + with lock(): + self._model_tfs_rest_port[model_name] = self._tfs_ports["rest_port"].pop() + self._model_tfs_grpc_port[model_name] = self._tfs_ports["grpc_port"].pop() + + # validate model files are in the specified base_path + if self.validate_model_dir(base_path): + try: + tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path) + tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format(model_name) + log.info("tensorflow serving model config: \n%s\n", tfs_config) + os.makedirs(os.path.dirname(tfs_config_file)) + with open(tfs_config_file, "w") as f: + f.write(tfs_config) + + batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format( + model_name) + if self._tfs_enable_batching: + tfs_utils.create_batching_config(batching_config_file) + + cmd = tfs_utils.tfs_command( + self._model_tfs_grpc_port[model_name], + self._model_tfs_rest_port[model_name], + tfs_config_file, + self._tfs_enable_batching, + batching_config_file, + ) + p = subprocess.Popen(cmd.split()) + self._wait_for_model(model_name) + + log.info("started tensorflow serving (pid: %d)", p.pid) + # update model name <-> tfs pid map + self._model_tfs_pid[model_name] = p + + res.status = falcon.HTTP_200 + res.body = json.dumps({ + "success": + "Successfully loaded model {}, " + "listening on rest port {} " + "and grpc port {}.".format(model_name, + self._model_tfs_rest_port, + self._model_tfs_grpc_port,) + }) + except MultiModelException as multi_model_exception: + self._cleanup_config_file(tfs_config_file) + self._cleanup_config_file(batching_config_file) + if multi_model_exception.code == 409: + res.status = falcon.HTTP_409 + res.body = multi_model_exception.msg + elif multi_model_exception.code == 408: + res.status = falcon.HTTP_408 + res.body = multi_model_exception.msg + else: + raise MultiModelException(falcon.HTTP_500, multi_model_exception.msg) + except FileExistsError as e: + res.status = falcon.HTTP_409 + res.body = json.dumps({ + "error": "Model {} is already loaded. {}".format(model_name, str(e)) + }) + except OSError as os_error: + self._cleanup_config_file(tfs_config_file) + self._cleanup_config_file(batching_config_file) + if os_error.errno == 12: + raise MultiModelException(falcon.HTTP_507, + "Memory exhausted: " + "not enough memory to start TFS instance") + else: + raise MultiModelException(falcon.HTTP_500, os_error.strerror) + else: + res.status = falcon.HTTP_404 + res.body = json.dumps({ + "error": + "Could not find valid base path {} for servable {}".format(base_path, + model_name) + }) + + def _cleanup_config_file(self, config_file): + if os.path.exists(config_file): + os.remove(config_file) + + def _wait_for_model(self, model_name): + url = "http://localhost:{}/v1/models/{}".format(self._model_tfs_rest_port[model_name], + model_name) + with timeout(): + while True: + time.sleep(0.5) + try: + response = requests.get(url) + if response.status_code == 200: + versions = json.loads(response.content)["model_version_status"] + if all(version["state"] == "AVAILABLE" for version in versions): + break + except ConnectionError: + log.exception("Failed to load models.") + + def _handle_invocation_post(self, req, res, model_name=None): + if SAGEMAKER_MULTI_MODEL_ENABLED: + if model_name: + if model_name not in self._model_tfs_rest_port: + res.status = falcon.HTTP_404 + res.body = json.dumps({ + "error": "Model {} is not loaded yet.".format(model_name) + }) + return + else: + log.info("model name: {}".format(model_name)) + rest_port = self._model_tfs_rest_port[model_name] + log.info("rest port: {}".format(str(self._model_tfs_rest_port[model_name]))) + grpc_port = self._model_tfs_grpc_port[model_name] + log.info("grpc port: {}".format(str(self._model_tfs_grpc_port[model_name]))) + data, context = tfs_utils.parse_request(req, rest_port, grpc_port, + self._tfs_default_model_name, + model_name) + else: + res.status = falcon.HTTP_400 + res.body = json.dumps({ + "error": "Invocation request does not contain model name." + }) + else: + data, context = tfs_utils.parse_request(req, self._tfs_rest_port, self._tfs_grpc_port, + self._tfs_default_model_name) + + try: + res.status = falcon.HTTP_200 + res.body, res.content_type = self._handlers(data, context) + except Exception as e: # pylint: disable=broad-except + log.exception("exception handling request: {}".format(e)) + res.status = falcon.HTTP_500 + res.body = json.dumps({ + "error": str(e) + }).encode("utf-8") # pylint: disable=E1101 + + def _import_handlers(self): + spec = importlib.util.spec_from_file_location("inference", INFERENCE_SCRIPT_PATH) + inference = importlib.util.module_from_spec(spec) + spec.loader.exec_module(inference) + + _custom_handler, _custom_input_handler, _custom_output_handler = None, None, None + if hasattr(inference, "handler"): + _custom_handler = inference.handler + elif hasattr(inference, "input_handler") and hasattr(inference, "output_handler"): + _custom_input_handler = inference.input_handler + _custom_output_handler = inference.output_handler + else: + raise NotImplementedError("Handlers are not implemented correctly in user script.") + + return _custom_handler, _custom_input_handler, _custom_output_handler + + def _make_handler(self, custom_handler, custom_input_handler, custom_output_handler): + if custom_handler: + return custom_handler + + def handler(data, context): + processed_input = custom_input_handler(data, context) + response = requests.post(context.rest_uri, data=processed_input) + return custom_output_handler(response, context) + + return handler + + def on_get(self, req, res, model_name=None): # pylint: disable=W0613 + if model_name is None: + models_info = {} + uri = "http://localhost:{}/v1/models/{}" + for model, port in self._model_tfs_rest_port.items(): + try: + info = json.loads(requests.get(uri.format(port, model)).content) + models_info[model] = info + except ValueError as e: + log.exception("exception handling request: {}".format(e)) + res.status = falcon.HTTP_500 + res.body = json.dumps({ + "error": str(e) + }).encode("utf-8") + res.status = falcon.HTTP_200 + res.body = json.dumps(models_info) + else: + if model_name not in self._model_tfs_rest_port: + res.status = falcon.HTTP_404 + res.body = json.dumps({ + "error": "Model {} is loaded yet.".format(model_name) + }).encode("utf-8") + else: + port = self._model_tfs_rest_port[model_name] + uri = "http://localhost:{}/v1/models/{}".format(port, model_name) + try: + info = requests.get(uri) + res.status = falcon.HTTP_200 + res.body = json.dumps({ + "model": info + }).encode("utf-8") + except ValueError as e: + log.exception("exception handling GET models request.") + res.status = falcon.HTTP_500 + res.body = json.dumps({ + "error": str(e) + }).encode("utf-8") + + def on_delete(self, req, res, model_name): # pylint: disable=W0613 + if model_name not in self._model_tfs_pid: + res.status = falcon.HTTP_404 + res.body = json.dumps({ + "error": "Model {} is not loaded yet".format(model_name) + }) + else: + try: + self._model_tfs_pid[model_name].kill() + os.remove("/sagemaker/tfs-config/{}/model-config.cfg".format(model_name)) + os.rmdir("/sagemaker/tfs-config/{}".format(model_name)) + release_rest_port = self._model_tfs_rest_port[model_name] + release_grpc_port = self._model_tfs_grpc_port[model_name] + with lock(): + bisect.insort(self._tfs_ports["rest_port"], release_rest_port) + bisect.insort(self._tfs_ports["grpc_port"], release_grpc_port) + del self._model_tfs_rest_port[model_name] + del self._model_tfs_grpc_port[model_name] + del self._model_tfs_pid[model_name] + res.status = falcon.HTTP_200 + res.body = json.dumps({ + "success": "Successfully unloaded model {}.".format(model_name) + }) + except OSError as error: + res.status = falcon.HTTP_500 + res.body = json.dumps({ + "error": str(error) + }).encode("utf-8") + + def validate_model_dir(self, model_path): + # model base path doesn't exits + if not os.path.exists(model_path): + return False + versions = [] + for _, dirs, _ in os.walk(model_path): + for dirname in dirs: + log.info("dirname: {}".format(dirname)) + if dirname.isdigit(): + versions.append(dirname) + return self.validate_model_versions(versions) + + def validate_model_versions(self, versions): + log.info(versions) + if not versions: + return False + for v in versions: + if v.isdigit(): + # TensorFlow model server will succeed with any versions found + # even if there are directories that's not a valid model version, + # the loading will succeed. + return True + return False + + +class PingResource: + def on_get(self, req, res): # pylint: disable=W0613 + res.status = falcon.HTTP_200 + + +class ServiceResources: + def __init__(self): + self._enable_python_processing = PYTHON_PROCESSING_ENABLED + self._enable_model_manager = SAGEMAKER_MULTI_MODEL_ENABLED + self._python_service_resource = PythonServiceResource() + self._ping_resource = PingResource() + + def add_routes(self, application): + application.add_route("/ping", self._ping_resource) + application.add_route("/invocations", self._python_service_resource) + + if self._enable_model_manager: + application.add_route("/models", self._python_service_resource) + application.add_route("/models/{model_name}", self._python_service_resource) + application.add_route("/models/{model_name}/invoke", self._python_service_resource) + + +app = falcon.API() +resources = ServiceResources() +resources.add_routes(app) diff --git a/docker/2.1/sagemaker/serve b/docker/2.1/sagemaker/serve new file mode 100755 index 00000000..9fac6a93 --- /dev/null +++ b/docker/2.1/sagemaker/serve @@ -0,0 +1,3 @@ +#!/bin/bash + +python3 /sagemaker/serve.py diff --git a/docker/2.1/sagemaker/serve.py b/docker/2.1/sagemaker/serve.py new file mode 100644 index 00000000..7a539fe6 --- /dev/null +++ b/docker/2.1/sagemaker/serve.py @@ -0,0 +1,308 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import logging +import os +import re +import signal +import subprocess +import tfs_utils + +from contextlib import contextmanager + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + +JS_PING = "js_content ping" +JS_INVOCATIONS = "js_content invocations" +GUNICORN_PING = "proxy_pass http://gunicorn_upstream/ping" +GUNICORN_INVOCATIONS = "proxy_pass http://gunicorn_upstream/invocations" + +PYTHON_LIB_PATH = "/opt/ml/model/code/lib" +REQUIREMENTS_PATH = "/opt/ml/model/code/requirements.txt" +INFERENCE_PATH = "/opt/ml/model/code/inference.py" + + +class ServiceManager(object): + def __init__(self): + self._state = "initializing" + self._nginx = None + self._tfs = None + self._gunicorn = None + self._gunicorn_command = None + self._enable_python_service = os.path.exists(INFERENCE_PATH) + self._tfs_version = os.environ.get("SAGEMAKER_TFS_VERSION", "1.13") + self._nginx_http_port = os.environ.get("SAGEMAKER_BIND_TO_PORT", "8080") + self._nginx_loglevel = os.environ.get("SAGEMAKER_TFS_NGINX_LOGLEVEL", "error") + self._tfs_default_model_name = os.environ.get("SAGEMAKER_TFS_DEFAULT_MODEL_NAME", "None") + self._sagemaker_port_range = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE", None) + self._tfs_config_path = "/sagemaker/model-config.cfg" + self._tfs_batching_config_path = "/sagemaker/batching-config.cfg" + + _enable_batching = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower() + _enable_multi_model_endpoint = os.environ.get("SAGEMAKER_MULTI_MODEL", + "false").lower() + + if _enable_batching not in ["true", "false"]: + raise ValueError("SAGEMAKER_TFS_ENABLE_BATCHING must be 'true' or 'false'") + self._tfs_enable_batching = _enable_batching == "true" + + if _enable_multi_model_endpoint not in ["true", "false"]: + raise ValueError("SAGEMAKER_MULTI_MODEL must be 'true' or 'false'") + self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == "true" + + self._use_gunicorn = self._enable_python_service or self._tfs_enable_multi_model_endpoint + + if self._sagemaker_port_range is not None: + parts = self._sagemaker_port_range.split("-") + low = int(parts[0]) + hi = int(parts[1]) + if low + 2 > hi: + raise ValueError("not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})" + .format(self._sagemaker_port_range)) + self._tfs_grpc_port = str(low) + self._tfs_rest_port = str(low + 1) + else: + # just use the standard default ports + self._tfs_grpc_port = "9000" + self._tfs_rest_port = "8501" + + # set environment variable for python service + os.environ["TFS_GRPC_PORT"] = self._tfs_grpc_port + os.environ["TFS_REST_PORT"] = self._tfs_rest_port + + def _create_tfs_config(self): + models = tfs_utils.find_models() + if not models: + raise ValueError("no SavedModel bundles found!") + + if self._tfs_default_model_name == "None": + default_model = os.path.basename(models[0]) + if default_model: + self._tfs_default_model_name = default_model + log.info("using default model name: {}".format(self._tfs_default_model_name)) + else: + log.info("no default model detected") + + # config (may) include duplicate 'config' keys, so we can't just dump a dict + config = "model_config_list: {\n" + for m in models: + config += " config: {\n" + config += " name: '{}',\n".format(os.path.basename(m)) + config += " base_path: '{}',\n".format(m) + config += " model_platform: 'tensorflow'\n" + config += " }\n" + config += "}\n" + + log.info("tensorflow serving model config: \n%s\n", config) + + with open("/sagemaker/model-config.cfg", "w") as f: + f.write(config) + + def _setup_gunicorn(self): + python_path_content = [] + python_path_option = "" + + if self._enable_python_service: + lib_path_exists = os.path.exists(PYTHON_LIB_PATH) + requirements_exists = os.path.exists(REQUIREMENTS_PATH) + python_path_content = ["/opt/ml/model/code"] + python_path_option = "--pythonpath " + + if lib_path_exists: + python_path_content.append(PYTHON_LIB_PATH) + + if requirements_exists: + if lib_path_exists: + log.warning("loading modules in '{}', ignoring requirements.txt" + .format(PYTHON_LIB_PATH)) + else: + log.info("installing packages from requirements.txt...") + pip_install_cmd = "pip3 install -r {}".format(REQUIREMENTS_PATH) + try: + subprocess.check_call(pip_install_cmd.split()) + except subprocess.CalledProcessError: + log.error("failed to install required packages, exiting.") + self._stop() + raise ChildProcessError("failed to install required packages.") + + gunicorn_command = ( + "gunicorn -b unix:/tmp/gunicorn.sock -k gevent --chdir /sagemaker " + "{}{} -e TFS_GRPC_PORT={} -e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} " + "python_service:app").format(python_path_option, ",".join(python_path_content), + self._tfs_grpc_port, self._tfs_enable_multi_model_endpoint, + self._sagemaker_port_range) + + log.info("gunicorn command: {}".format(gunicorn_command)) + self._gunicorn_command = gunicorn_command + + def _create_nginx_config(self): + template = self._read_nginx_template() + pattern = re.compile(r"%(\w+)%") + + template_values = { + "TFS_VERSION": self._tfs_version, + "TFS_REST_PORT": self._tfs_rest_port, + "TFS_DEFAULT_MODEL_NAME": self._tfs_default_model_name, + "NGINX_HTTP_PORT": self._nginx_http_port, + "NGINX_LOG_LEVEL": self._nginx_loglevel, + "FORWARD_PING_REQUESTS": GUNICORN_PING if self._use_gunicorn else JS_PING, + "FORWARD_INVOCATION_REQUESTS": GUNICORN_INVOCATIONS if self._use_gunicorn + else JS_INVOCATIONS, + } + + config = pattern.sub(lambda x: template_values[x.group(1)], template) + log.info("nginx config: \n%s\n", config) + + with open("/sagemaker/nginx.conf", "w") as f: + f.write(config) + + def _read_nginx_template(self): + with open("/sagemaker/nginx.conf.template", "r") as f: + template = f.read() + if not template: + raise ValueError("failed to read nginx.conf.template") + + return template + + def _start_tfs(self): + self._log_version("tensorflow_model_server --version', 'tensorflow version info:") + cmd = tfs_utils.tfs_command( + self._tfs_grpc_port, + self._tfs_rest_port, + self._tfs_config_path, + self._tfs_enable_batching, + self._tfs_batching_config_path, + ) + log.info("tensorflow serving command: {}".format(cmd)) + p = subprocess.Popen(cmd.split()) + log.info("started tensorflow serving (pid: %d)", p.pid) + self._tfs = p + + def _start_gunicorn(self): + self._log_version("gunicorn --version", "gunicorn version info:") + env = os.environ.copy() + env["TFS_DEFAULT_MODEL_NAME"] = self._tfs_default_model_name + p = subprocess.Popen(self._gunicorn_command.split(), env=env) + log.info("started gunicorn (pid: %d)", p.pid) + self._gunicorn = p + + def _start_nginx(self): + self._log_version("/usr/sbin/nginx -V", "nginx version info:") + p = subprocess.Popen("/usr/sbin/nginx -c /sagemaker/nginx.conf".split()) + log.info("started nginx (pid: %d)", p.pid) + self._nginx = p + + def _log_version(self, command, message): + try: + output = subprocess.check_output( + command.split(), + stderr=subprocess.STDOUT).decode("utf-8", "backslashreplace").strip() + log.info("{}\n{}".format(message, output)) + except subprocess.CalledProcessError: + log.warning("failed to run command: %s", command) + + def _stop(self, *args): # pylint: disable=W0613 + self._state = "stopping" + log.info("stopping services") + try: + os.kill(self._nginx.pid, signal.SIGQUIT) + except OSError: + pass + try: + if self._gunicorn: + os.kill(self._gunicorn.pid, signal.SIGTERM) + except OSError: + pass + try: + os.kill(self._tfs.pid, signal.SIGTERM) + except OSError: + pass + + self._state = "stopped" + log.info("stopped") + + def _wait_for_gunicorn(self): + while True: + if os.path.exists("/tmp/gunicorn.sock"): + log.info("gunicorn server is ready!") + return + + @contextmanager + def _timeout(self, seconds): + def _raise_timeout_error(signum, frame): + raise TimeoutError("time out after {} seconds".format(seconds)) + + try: + signal.signal(signal.SIGALRM, _raise_timeout_error) + signal.alarm(seconds) + yield + finally: + signal.alarm(0) + + def start(self): + log.info("starting services") + self._state = "starting" + signal.signal(signal.SIGTERM, self._stop) + + self._create_nginx_config() + + if self._tfs_enable_batching: + log.info("batching is enabled") + tfs_utils.create_batching_config(self._tfs_batching_config_path) + + if self._tfs_enable_multi_model_endpoint: + log.info("multi-model endpoint is enabled, TFS model servers will be started later") + else: + tfs_utils.create_tfs_config( + self._tfs_default_model_name, + self._tfs_config_path + ) + self._create_tfs_config() + self._start_tfs() + + if self._use_gunicorn: + self._setup_gunicorn() + self._start_gunicorn() + # make sure gunicorn is up + with self._timeout(seconds=30): + self._wait_for_gunicorn() + + self._start_nginx() + self._state = "started" + + while True: + pid, status = os.wait() + + if self._state != "started": + break + + if pid == self._nginx.pid: + log.warning("unexpected nginx exit (status: {}). restarting.".format(status)) + self._start_nginx() + + elif pid == self._tfs.pid: + log.warning( + "unexpected tensorflow serving exit (status: {}). restarting.".format(status)) + self._start_tfs() + + elif self._gunicorn and pid == self._gunicorn.pid: + log.warning("unexpected gunicorn exit (status: {}). restarting." + .format(status)) + self._start_gunicorn() + + self._stop() + + +if __name__ == "__main__": + ServiceManager().start() diff --git a/docker/2.1/sagemaker/tensorflow-serving.js b/docker/2.1/sagemaker/tensorflow-serving.js new file mode 100644 index 00000000..fdce4472 --- /dev/null +++ b/docker/2.1/sagemaker/tensorflow-serving.js @@ -0,0 +1,231 @@ +var tfs_base_uri = "/tfs/v1/models/" +var custom_attributes_header = "X-Amzn-SageMaker-Custom-Attributes" + +function invocations(r) { + var ct = r.headersIn["Content-Type"] + + if ("application/json" == ct || "application/jsonlines" == ct || "application/jsons" == ct) { + json_request(r) + } else if ("text/csv" == ct) { + csv_request(r) + } else { + return_error(r, 415, "Unsupported Media Type: " + (ct || "Unknown")) + } +} + +function ping(r) { + var uri = make_tfs_uri(r, false) + + function callback (reply) { + if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) { + r.return(200) + } else { + r.error("failed ping" + reply.responseBody) + r.return(502) + } + } + + r.subrequest(uri, callback) +} + +function ping_without_model(r) { + // hack for TF 1.11 and MME + // for TF 1.11, send an arbitrary fixed request to the default model. + // if response is 400, the model is ok (but input was bad), so return 200 + // for MME, the default model name is None and does not exist + // also return 200 in unlikely case our request was really valid + + var uri = make_tfs_uri(r, true) + var options = { + method: "POST", + body: "{'instances': 'invalid'}" + } + + function callback (reply) { + if (reply.status == 200 || reply.status == 400 || + reply.responseBody.includes("Servable not found for request: Latest(None)")) { + r.return(200) + } else { + r.error("failed ping" + reply.responseBody) + r.return(502) + } + } + + r.subrequest(uri, options, callback) +} + +function return_error(r, code, message) { + if (message) { + r.return(code, "{'error': " + message + "'}'") + } else { + r.return(code) + } +} + +function tfs_json_request(r, json) { + var uri = make_tfs_uri(r, true) + var options = { + method: "POST", + body: json + } + + var accept = r.headersIn.Accept + function callback (reply) { + var body = reply.responseBody + if (reply.status == 400) { + // "fix" broken json escaping in \'instances\' message + body = body.replace("\\'instances\\'", "'instances'") + } + + if ("application/jsonlines" == accept || "application/jsons" == accept) { + body = body.replace(/\n/g, "") + r.headersOut["Content-Type"] = accept + } + r.return(reply.status, body) + } + + r.subrequest(uri, options, callback) + +} + +function make_tfs_uri(r, with_method) { + var attributes = parse_custom_attributes(r) + + var uri = tfs_base_uri + attributes["tfs-model-name"] + if ("tfs-model-version" in attributes) { + uri += "/versions/" + attributes["tfs-model-version"] + } + + if (with_method) { + uri += ":" + (attributes["tfs-method"] || "predict") + } + + return uri +} + +function parse_custom_attributes(r) { + var attributes = {} + var kv_pattern = /tfs-[a-z\-]+=[^,]+/g + var header = r.headersIn[custom_attributes_header] + if (header) { + var matches = header.match(kv_pattern) + if (matches) { + for (var i = 0; i < matches.length; i++) { + var kv = matches[i].split("=") + if (kv.length === 2) { + attributes[kv[0]] = kv[1] + } + } + } + } + + // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model + if (!attributes["tfs-model-name"]) { + var uri_pattern = /\/models\/[^,]+\/invoke/g + var model_name = r.uri.match(uri_pattern) + if (model_name[0]) { + model_name = r.uri.replace("/models/", "").replace("/invoke", "") + attributes["tfs-model-name"] = model_name + } else { + attributes["tfs-model-name"] = r.variables.default_tfs_model + } + } + + return attributes +} + +function json_request(r) { + var data = r.requestBody + + if (is_json_lines(data)) { + json_lines_request(r, data) + } else if (is_tfs_json(data)) { + tfs_json_request(r, data) + } else { + generic_json_request(r, data) + } +} + +function is_tfs_json(data) { + return /"(instances|inputs|examples)"\s*:/.test(data) +} + +function is_json_lines(data) { + // objects separated only by (optional) whitespace means jsons/json-lines + return /[}\]]\s*[\[{]/.test(data) +} + +function generic_json_request(r, data) { + if (! /^\s*\[\s*\[/.test(data)) { + data = "[" + data + "]" + } + + var json = "{'instances':" + data + "}" + tfs_json_request(r, json) +} + +function json_lines_request(r, data) { + var lines = data.trim().split(/\r?\n/) + var builder = [] + builder.push("{'instances':") + if (lines.length != 1) { + builder.push("[") + } + + for (var i = 0; i < lines.length; i++) { + var line = lines[i].trim() + if (line) { + var instance = (i == 0) ? "" : "," + instance += line + builder.push(instance) + } + } + + builder.push(lines.length == 1 ? "}" : "]}") + tfs_json_request(r, builder.join('')) +} + +function csv_request(r) { + var data = r.requestBody + // look for initial quote or numeric-only data in 1st field + var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0 + var lines = data.trim().split(/\r?\n/) + var builder = [] + builder.push("{'nstances':[") + + for (var i = 0; i < lines.length; i++) { + var line = lines[i].trim() + if (line) { + var line_builder = [] + // Only wrap line in brackets if there are multiple columns. + // If there's only one column and it has a string with a comma, + // the input will be wrapped in an extra set of brackets. + var has_multiple_columns = line.search(",") != -1 + + if (has_multiple_columns) { + line_builder.push("[") + } + + if (needs_quotes) { + line_builder.push("'") + line_builder.push(line.replace("'", "\\'").replace(",", "','")) + line_builder.push("'") + } else { + line_builder.push(line) + } + + if (has_multiple_columns) { + line_builder.push("]") + } + + var json_line = line_builder.join("") + builder.push(json_line) + + if (i != lines.length - 1) + builder.push(",") + } + } + + builder.push("]}") + tfs_json_request(r, builder.join("")) +} diff --git a/docker/2.1/sagemaker/tfs_utils.py b/docker/2.1/sagemaker/tfs_utils.py new file mode 100644 index 00000000..f3ca0cb7 --- /dev/null +++ b/docker/2.1/sagemaker/tfs_utils.py @@ -0,0 +1,209 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import logging +import multiprocessing +import os +import re + +from collections import namedtuple + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + +DEFAULT_CONTENT_TYPE = "application/json" +DEFAULT_ACCEPT_HEADER = "application/json" +CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes" + +Context = namedtuple("Context", + "model_name, model_version, method, rest_uri, grpc_port, " + "custom_attributes, request_content_type, accept_header, content_length") + + +def parse_request(req, rest_port, grpc_port, default_model_name, model_name=None): + tfs_attributes = parse_tfs_custom_attributes(req) + tfs_uri = make_tfs_uri(rest_port, tfs_attributes, default_model_name, model_name) + + if not model_name: + model_name = tfs_attributes.get("tfs-model-name") + + context = Context(model_name, + tfs_attributes.get("tfs-model-version"), + tfs_attributes.get("tfs-method"), + tfs_uri, + grpc_port, + req.get_header(CUSTOM_ATTRIBUTES_HEADER), + req.get_header("Content-Type") or DEFAULT_CONTENT_TYPE, + req.get_header("Accept") or DEFAULT_ACCEPT_HEADER, + req.content_length) + + data = req.stream + return data, context + + +def make_tfs_uri(port, attributes, default_model_name, model_name=None): + log.info("sagemaker tfs attributes: \n{}".format(attributes)) + + tfs_model_name = model_name or attributes.get("tfs-model-name", default_model_name) + tfs_model_version = attributes.get("tfs-model-version") + tfs_method = attributes.get("tfs-method", "predict") + + uri = "http://localhost:{}/v1/models/{}".format(port, tfs_model_name) + if tfs_model_version: + uri += "/versions/" + tfs_model_version + uri += ":" + tfs_method + return uri + + +def parse_tfs_custom_attributes(req): + attributes = {} + header = req.get_header(CUSTOM_ATTRIBUTES_HEADER) + if header: + matches = re.findall(r"(tfs-[a-z\-]+=[^,]+)", header) + attributes = dict(attribute.split("=") for attribute in matches) + return attributes + + +def create_tfs_config_individual_model(model_name, base_path): + config = "model_config_list: {\n" + config += " config: {\n" + config += " name: '{}',\n".format(model_name) + config += " base_path: '{}',\n".format(base_path) + config += " model_platform: 'tensorflow'\n" + config += " }\n" + config += "}\n" + return config + + +def create_tfs_config( + tfs_default_model_name, + tfs_config_path, +): + models = find_models() + if not models: + raise ValueError("no SavedModel bundles found!") + + if tfs_default_model_name == "None": + default_model = os.path.basename(models[0]) + if default_model: + tfs_default_model_name = default_model + log.info("using default model name: {}".format(tfs_default_model_name)) + else: + log.info("no default model detected") + + # config (may) include duplicate 'config' keys, so we can't just dump a dict + config = "model_config_list: {\n" + for m in models: + config += " config: {\n" + config += " name: '{}',\n".format(os.path.basename(m)) + config += " base_path: '{}',\n".format(m) + config += " model_platform: 'tensorflow'\n" + config += " }\n" + config += "}\n" + + log.info("tensorflow serving model config: \n%s\n", config) + + with open(tfs_config_path, "w") as f: + f.write(config) + + +def tfs_command(tfs_grpc_port, + tfs_rest_port, + tfs_config_path, + tfs_enable_batching, + tfs_batching_config_file): + cmd = "tensorflow_model_server " \ + "--port={} " \ + "--rest_api_port={} " \ + "--model_config_file={} " \ + "--max_num_load_retries=0 {}" \ + .format(tfs_grpc_port, tfs_rest_port, tfs_config_path, + get_tfs_batching_args(tfs_enable_batching, tfs_batching_config_file)) + return cmd + + +def find_models(): + base_path = "/opt/ml/model" + models = [] + for f in _find_saved_model_files(base_path): + parts = f.split("/") + if len(parts) >= 6 and re.match(r"^\d+$", parts[-2]): + model_path = "/".join(parts[0:-2]) + if model_path not in models: + models.append(model_path) + return models + + +def _find_saved_model_files(path): + for e in os.scandir(path): + if e.is_dir(): + yield from _find_saved_model_files(os.path.join(path, e.name)) + else: + if e.name == "saved_model.pb": + yield os.path.join(path, e.name) + + +def get_tfs_batching_args(enable_batching, tfs_batching_config): + if enable_batching: + return "--enable_batching=true " \ + "--batching_parameters_file={}".format(tfs_batching_config) + else: + return "" + + +def create_batching_config(batching_config_file): + class _BatchingParameter: + def __init__(self, key, env_var, value, defaulted_message): + self.key = key + self.env_var = env_var + self.value = value + self.defaulted_message = defaulted_message + + cpu_count = multiprocessing.cpu_count() + batching_parameters = [ + _BatchingParameter("max_batch_size", "SAGEMAKER_TFS_MAX_BATCH_SIZE", 8, + "max_batch_size defaulted to {}. Set {} to override default. " + "Tuning this parameter may yield better performance."), + _BatchingParameter("batch_timeout_micros", "SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS", 1000, + "batch_timeout_micros defaulted to {}. Set {} to override " + "default. Tuning this parameter may yield better performance."), + _BatchingParameter("num_batch_threads", "SAGEMAKER_TFS_NUM_BATCH_THREADS", + cpu_count, "num_batch_threads defaulted to {}," + "the number of CPUs. Set {} to override default."), + _BatchingParameter("max_enqueued_batches", "SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES", + # Batch limits number of concurrent requests, which limits number + # of enqueued batches, so this can be set high for Batch + 100000000 if "SAGEMAKER_BATCH" in os.environ else cpu_count, + "max_enqueued_batches defaulted to {}. Set {} to override default. " + "Tuning this parameter may be necessary to tune out-of-memory " + "errors occur."), + ] + + warning_message = "" + for batching_parameter in batching_parameters: + if batching_parameter.env_var in os.environ: + batching_parameter.value = os.environ[batching_parameter.env_var] + else: + warning_message += batching_parameter.defaulted_message.format( + batching_parameter.value, batching_parameter.env_var) + warning_message += "\n" + if warning_message: + log.warning(warning_message) + + config = "" + for batching_parameter in batching_parameters: + config += "%s { value: %s }\n" % (batching_parameter.key, batching_parameter.value) + + log.info("batching config: \n%s\n", config) + with open(batching_config_file, "w") as f: + f.write(config) diff --git a/docker/build_artifacts/deep_learning_container.py b/docker/build_artifacts/deep_learning_container.py index b60ea9f2..1e82e61e 100644 --- a/docker/build_artifacts/deep_learning_container.py +++ b/docker/build_artifacts/deep_learning_container.py @@ -20,7 +20,7 @@ def _validate_instance_id(instance_id): """ Validate instance ID """ - instance_id_regex = r'^(i-\S{17})' + instance_id_regex = r"^(i-\S{17})" compiled_regex = re.compile(instance_id_regex) match = compiled_regex.match(instance_id) diff --git a/docker/build_artifacts/dockerd-entrypoint.py b/docker/build_artifacts/dockerd-entrypoint.py index 68e1e966..fc4ce388 100644 --- a/docker/build_artifacts/dockerd-entrypoint.py +++ b/docker/build_artifacts/dockerd-entrypoint.py @@ -17,6 +17,6 @@ import sys if not os.path.exists("/opt/ml/input/config"): - subprocess.call(['python', '/usr/local/bin/deep_learning_container.py', '&>/dev/null', '&']) + subprocess.call(["python", "/usr/local/bin/deep_learning_container.py", "&>/dev/null", "&"]) subprocess.check_call(shlex.split(' '.join(sys.argv[1:]))) diff --git a/docker/build_artifacts/sagemaker/multi_model_utils.py b/docker/build_artifacts/sagemaker/multi_model_utils.py index 6267a067..5d2c47f4 100644 --- a/docker/build_artifacts/sagemaker/multi_model_utils.py +++ b/docker/build_artifacts/sagemaker/multi_model_utils.py @@ -15,13 +15,13 @@ import time from contextlib import contextmanager -MODEL_CONFIG_FILE = '/sagemaker/model-config.cfg' -DEFAULT_LOCK_FILE = '/sagemaker/lock-file.lock' +MODEL_CONFIG_FILE = "/sagemaker/model-config.cfg" +DEFAULT_LOCK_FILE = "/sagemaker/lock-file.lock" @contextmanager def lock(path=DEFAULT_LOCK_FILE): - f = open(path, 'w') + f = open(path, "w") fd = f.fileno() fcntl.lockf(fd, fcntl.LOCK_EX) @@ -35,7 +35,7 @@ def lock(path=DEFAULT_LOCK_FILE): @contextmanager def timeout(seconds=60): def _raise_timeout_error(signum, frame): - raise Exception(408, 'Timed out after {} seconds'.format(seconds)) + raise Exception(408, "Timed out after {} seconds".format(seconds)) try: signal.signal(signal.SIGALRM, _raise_timeout_error) diff --git a/docker/build_artifacts/sagemaker/python_service.py b/docker/build_artifacts/sagemaker/python_service.py index 2bb81bc0..0014b6dd 100644 --- a/docker/build_artifacts/sagemaker/python_service.py +++ b/docker/build_artifacts/sagemaker/python_service.py @@ -24,22 +24,22 @@ from multi_model_utils import lock, timeout, MultiModelException import tfs_utils -SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get('SAGEMAKER_MULTI_MODEL', 'false').lower() == 'true' -INFERENCE_SCRIPT_PATH = '/opt/ml/{}/code/inference.py'.format('models' +SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get("SAGEMAKER_MULTI_MODEL", "false").lower() == "true" +INFERENCE_SCRIPT_PATH = "/opt/ml/{}/code/inference.py".format("models" if SAGEMAKER_MULTI_MODEL_ENABLED - else 'model') + else "model") PYTHON_PROCESSING_ENABLED = os.path.exists(INFERENCE_SCRIPT_PATH) -SAGEMAKER_BATCHING_ENABLED = os.environ.get('SAGEMAKER_TFS_ENABLE_BATCHING', 'false').lower() -MODEL_CONFIG_FILE_PATH = '/sagemaker/model-config.cfg' -TFS_GRPC_PORT = os.environ.get('TFS_GRPC_PORT') -TFS_REST_PORT = os.environ.get('TFS_REST_PORT') -SAGEMAKER_TFS_PORT_RANGE = os.environ.get('SAGEMAKER_SAFE_PORT_RANGE') +SAGEMAKER_BATCHING_ENABLED = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower() +MODEL_CONFIG_FILE_PATH = "/sagemaker/model-config.cfg" +TFS_GRPC_PORT = os.environ.get("TFS_GRPC_PORT") +TFS_REST_PORT = os.environ.get("TFS_REST_PORT") +SAGEMAKER_TFS_PORT_RANGE = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE") logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) -CUSTOM_ATTRIBUTES_HEADER = 'X-Amzn-SageMaker-Custom-Attributes' +CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes" def default_handler(data, context): @@ -66,8 +66,8 @@ def __init__(self): self._tfs_grpc_port = TFS_GRPC_PORT self._tfs_rest_port = TFS_REST_PORT - self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == 'true' - self._tfs_default_model_name = os.environ.get('TFS_DEFAULT_MODEL_NAME', "None") + self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true" + self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None") if PYTHON_PROCESSING_ENABLED: self._handler, self._input_handler, self._output_handler = self._import_handlers() @@ -82,7 +82,7 @@ def on_post(self, req, res, model_name=None): if model_name or "invocations" in req.uri: self._handle_invocation_post(req, res, model_name) else: - data = json.loads(req.stream.read().decode('utf-8')) + data = json.loads(req.stream.read().decode("utf-8")) self._handle_load_model_post(res, data) def _parse_sagemaker_port_range(self, port_range): @@ -92,49 +92,49 @@ def _parse_sagemaker_port_range(self, port_range): rest_port = lower grpc_port = (lower + upper) // 2 tfs_ports = { - 'rest_port': [port for port in range(rest_port, grpc_port)], - 'grpc_port': [port for port in range(grpc_port, upper)], + "rest_port": [port for port in range(rest_port, grpc_port)], + "grpc_port": [port for port in range(grpc_port, upper)], } return tfs_ports def _ports_available(self): with lock(): - rest_ports = self._tfs_ports['rest_port'] - grpc_ports = self._tfs_ports['grpc_port'] + rest_ports = self._tfs_ports["rest_port"] + grpc_ports = self._tfs_ports["grpc_port"] return len(rest_ports) > 0 and len(grpc_ports) > 0 def _handle_load_model_post(self, res, data): # noqa: C901 - model_name = data['model_name'] - base_path = data['url'] + model_name = data["model_name"] + base_path = data["url"] # model is already loaded if model_name in self._model_tfs_pid: res.status = falcon.HTTP_409 res.body = json.dumps({ - 'error': 'Model {} is already loaded.'.format(model_name) + "error": "Model {} is already loaded.".format(model_name) }) # check if there are available ports if not self._ports_available(): res.status = falcon.HTTP_507 res.body = json.dumps({ - 'error': 'Memory exhausted: no available ports to load the model.' + "error": "Memory exhausted: no available ports to load the model." }) with lock(): - self._model_tfs_rest_port[model_name] = self._tfs_ports['rest_port'].pop() - self._model_tfs_grpc_port[model_name] = self._tfs_ports['grpc_port'].pop() + self._model_tfs_rest_port[model_name] = self._tfs_ports["rest_port"].pop() + self._model_tfs_grpc_port[model_name] = self._tfs_ports["grpc_port"].pop() # validate model files are in the specified base_path if self.validate_model_dir(base_path): try: tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path) - tfs_config_file = '/sagemaker/tfs-config/{}/model-config.cfg'.format(model_name) - log.info('tensorflow serving model config: \n%s\n', tfs_config) + tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format(model_name) + log.info("tensorflow serving model config: \n%s\n", tfs_config) os.makedirs(os.path.dirname(tfs_config_file)) - with open(tfs_config_file, 'w') as f: + with open(tfs_config_file, "w") as f: f.write(tfs_config) - batching_config_file = '/sagemaker/batching/{}/batching-config.cfg'.format( + batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format( model_name) if self._tfs_enable_batching: tfs_utils.create_batching_config(batching_config_file) @@ -149,16 +149,16 @@ def _handle_load_model_post(self, res, data): # noqa: C901 p = subprocess.Popen(cmd.split()) self._wait_for_model(model_name) - log.info('started tensorflow serving (pid: %d)', p.pid) + log.info("started tensorflow serving (pid: %d)", p.pid) # update model name <-> tfs pid map self._model_tfs_pid[model_name] = p res.status = falcon.HTTP_200 res.body = json.dumps({ - 'success': - 'Successfully loaded model {}, ' - 'listening on rest port {} ' - 'and grpc port {}.'.format(model_name, + "success": + "Successfully loaded model {}, " + "listening on rest port {} " + "and grpc port {}.".format(model_name, self._model_tfs_rest_port, self._model_tfs_grpc_port,) }) @@ -176,22 +176,22 @@ def _handle_load_model_post(self, res, data): # noqa: C901 except FileExistsError as e: res.status = falcon.HTTP_409 res.body = json.dumps({ - 'error': 'Model {} is already loaded. {}'.format(model_name, str(e)) + "error": "Model {} is already loaded. {}".format(model_name, str(e)) }) except OSError as os_error: self._cleanup_config_file(tfs_config_file) self._cleanup_config_file(batching_config_file) if os_error.errno == 12: raise MultiModelException(falcon.HTTP_507, - 'Memory exhausted: ' - 'not enough memory to start TFS instance') + "Memory exhausted: " + "not enough memory to start TFS instance") else: raise MultiModelException(falcon.HTTP_500, os_error.strerror) else: res.status = falcon.HTTP_404 res.body = json.dumps({ - 'error': - 'Could not find valid base path {} for servable {}'.format(base_path, + "error": + "Could not find valid base path {} for servable {}".format(base_path, model_name) }) @@ -208,7 +208,7 @@ def _wait_for_model(self, model_name): try: response = requests.get(url) if response.status_code == 200: - versions = json.loads(response.content)['model_version_status'] + versions = json.loads(response.content)["model_version_status"] if all(version["state"] == "AVAILABLE" for version in versions): break except ConnectionError: @@ -220,7 +220,7 @@ def _handle_invocation_post(self, req, res, model_name=None): if model_name not in self._model_tfs_rest_port: res.status = falcon.HTTP_404 res.body = json.dumps({ - 'error': "Model {} is not loaded yet.".format(model_name) + "error": "Model {} is not loaded yet.".format(model_name) }) return else: @@ -235,7 +235,7 @@ def _handle_invocation_post(self, req, res, model_name=None): else: res.status = falcon.HTTP_400 res.body = json.dumps({ - 'error': 'Invocation request does not contain model name.' + "error": "Invocation request does not contain model name." }) else: data, context = tfs_utils.parse_request(req, self._tfs_rest_port, self._tfs_grpc_port, @@ -245,25 +245,25 @@ def _handle_invocation_post(self, req, res, model_name=None): res.status = falcon.HTTP_200 res.body, res.content_type = self._handlers(data, context) except Exception as e: # pylint: disable=broad-except - log.exception('exception handling request: {}'.format(e)) + log.exception("exception handling request: {}".format(e)) res.status = falcon.HTTP_500 res.body = json.dumps({ - 'error': str(e) - }).encode('utf-8') # pylint: disable=E1101 + "error": str(e) + }).encode("utf-8") # pylint: disable=E1101 def _import_handlers(self): - spec = importlib.util.spec_from_file_location('inference', INFERENCE_SCRIPT_PATH) + spec = importlib.util.spec_from_file_location("inference", INFERENCE_SCRIPT_PATH) inference = importlib.util.module_from_spec(spec) spec.loader.exec_module(inference) _custom_handler, _custom_input_handler, _custom_output_handler = None, None, None - if hasattr(inference, 'handler'): + if hasattr(inference, "handler"): _custom_handler = inference.handler - elif hasattr(inference, 'input_handler') and hasattr(inference, 'output_handler'): + elif hasattr(inference, "input_handler") and hasattr(inference, "output_handler"): _custom_input_handler = inference.input_handler _custom_output_handler = inference.output_handler else: - raise NotImplementedError('Handlers are not implemented correctly in user script.') + raise NotImplementedError("Handlers are not implemented correctly in user script.") return _custom_handler, _custom_input_handler, _custom_output_handler @@ -281,69 +281,69 @@ def handler(data, context): def on_get(self, req, res, model_name=None): # pylint: disable=W0613 if model_name is None: models_info = {} - uri = 'http://localhost:{}/v1/models/{}' + uri = "http://localhost:{}/v1/models/{}" for model, port in self._model_tfs_rest_port.items(): try: info = json.loads(requests.get(uri.format(port, model)).content) models_info[model] = info except ValueError as e: - log.exception('exception handling request: {}'.format(e)) + log.exception("exception handling request: {}".format(e)) res.status = falcon.HTTP_500 res.body = json.dumps({ - 'error': str(e) - }).encode('utf-8') + "error": str(e) + }).encode("utf-8") res.status = falcon.HTTP_200 res.body = json.dumps(models_info) else: if model_name not in self._model_tfs_rest_port: res.status = falcon.HTTP_404 res.body = json.dumps({ - 'error': 'Model {} is loaded yet.'.format(model_name) - }).encode('utf-8') + "error": "Model {} is loaded yet.".format(model_name) + }).encode("utf-8") else: port = self._model_tfs_rest_port[model_name] - uri = 'http://localhost:{}/v1/models/{}'.format(port, model_name) + uri = "http://localhost:{}/v1/models/{}".format(port, model_name) try: info = requests.get(uri) res.status = falcon.HTTP_200 res.body = json.dumps({ - 'model': info - }).encode('utf-8') + "model": info + }).encode("utf-8") except ValueError as e: - log.exception('exception handling GET models request.') + log.exception("exception handling GET models request.") res.status = falcon.HTTP_500 res.body = json.dumps({ - 'error': str(e) - }).encode('utf-8') + "error": str(e) + }).encode("utf-8") def on_delete(self, req, res, model_name): # pylint: disable=W0613 if model_name not in self._model_tfs_pid: res.status = falcon.HTTP_404 res.body = json.dumps({ - 'error': 'Model {} is not loaded yet'.format(model_name) + "error": "Model {} is not loaded yet".format(model_name) }) else: try: self._model_tfs_pid[model_name].kill() - os.remove('/sagemaker/tfs-config/{}/model-config.cfg'.format(model_name)) - os.rmdir('/sagemaker/tfs-config/{}'.format(model_name)) + os.remove("/sagemaker/tfs-config/{}/model-config.cfg".format(model_name)) + os.rmdir("/sagemaker/tfs-config/{}".format(model_name)) release_rest_port = self._model_tfs_rest_port[model_name] release_grpc_port = self._model_tfs_grpc_port[model_name] with lock(): - bisect.insort(self._tfs_ports['rest_port'], release_rest_port) - bisect.insort(self._tfs_ports['grpc_port'], release_grpc_port) + bisect.insort(self._tfs_ports["rest_port"], release_rest_port) + bisect.insort(self._tfs_ports["grpc_port"], release_grpc_port) del self._model_tfs_rest_port[model_name] del self._model_tfs_grpc_port[model_name] del self._model_tfs_pid[model_name] res.status = falcon.HTTP_200 res.body = json.dumps({ - 'success': 'Successfully unloaded model {}.'.format(model_name) + "success": "Successfully unloaded model {}.".format(model_name) }) except OSError as error: res.status = falcon.HTTP_500 res.body = json.dumps({ - 'error': str(error) - }).encode('utf-8') + "error": str(error) + }).encode("utf-8") def validate_model_dir(self, model_path): # model base path doesn't exits @@ -383,13 +383,13 @@ def __init__(self): self._ping_resource = PingResource() def add_routes(self, application): - application.add_route('/ping', self._ping_resource) - application.add_route('/invocations', self._python_service_resource) + application.add_route("/ping", self._ping_resource) + application.add_route("/invocations", self._python_service_resource) if self._enable_model_manager: - application.add_route('/models', self._python_service_resource) - application.add_route('/models/{model_name}', self._python_service_resource) - application.add_route('/models/{model_name}/invoke', self._python_service_resource) + application.add_route("/models", self._python_service_resource) + application.add_route("/models/{model_name}", self._python_service_resource) + application.add_route("/models/{model_name}/invoke", self._python_service_resource) app = falcon.API() diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py index ee084d51..7a539fe6 100644 --- a/docker/build_artifacts/sagemaker/serve.py +++ b/docker/build_artifacts/sagemaker/serve.py @@ -23,160 +23,160 @@ logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) -JS_PING = 'js_content ping' -JS_INVOCATIONS = 'js_content invocations' -GUNICORN_PING = 'proxy_pass http://gunicorn_upstream/ping' -GUNICORN_INVOCATIONS = 'proxy_pass http://gunicorn_upstream/invocations' +JS_PING = "js_content ping" +JS_INVOCATIONS = "js_content invocations" +GUNICORN_PING = "proxy_pass http://gunicorn_upstream/ping" +GUNICORN_INVOCATIONS = "proxy_pass http://gunicorn_upstream/invocations" -PYTHON_LIB_PATH = '/opt/ml/model/code/lib' -REQUIREMENTS_PATH = '/opt/ml/model/code/requirements.txt' -INFERENCE_PATH = '/opt/ml/model/code/inference.py' +PYTHON_LIB_PATH = "/opt/ml/model/code/lib" +REQUIREMENTS_PATH = "/opt/ml/model/code/requirements.txt" +INFERENCE_PATH = "/opt/ml/model/code/inference.py" class ServiceManager(object): def __init__(self): - self._state = 'initializing' + self._state = "initializing" self._nginx = None self._tfs = None self._gunicorn = None self._gunicorn_command = None self._enable_python_service = os.path.exists(INFERENCE_PATH) - self._tfs_version = os.environ.get('SAGEMAKER_TFS_VERSION', '1.13') - self._nginx_http_port = os.environ.get('SAGEMAKER_BIND_TO_PORT', '8080') - self._nginx_loglevel = os.environ.get('SAGEMAKER_TFS_NGINX_LOGLEVEL', 'error') - self._tfs_default_model_name = os.environ.get('SAGEMAKER_TFS_DEFAULT_MODEL_NAME', 'None') - self._sagemaker_port_range = os.environ.get('SAGEMAKER_SAFE_PORT_RANGE', None) - self._tfs_config_path = '/sagemaker/model-config.cfg' - self._tfs_batching_config_path = '/sagemaker/batching-config.cfg' - - _enable_batching = os.environ.get('SAGEMAKER_TFS_ENABLE_BATCHING', 'false').lower() - _enable_multi_model_endpoint = os.environ.get('SAGEMAKER_MULTI_MODEL', - 'false').lower() - - if _enable_batching not in ['true', 'false']: - raise ValueError('SAGEMAKER_TFS_ENABLE_BATCHING must be "true" or "false"') - self._tfs_enable_batching = _enable_batching == 'true' - - if _enable_multi_model_endpoint not in ['true', 'false']: - raise ValueError('SAGEMAKER_MULTI_MODEL must be "true" or "false"') - self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == 'true' + self._tfs_version = os.environ.get("SAGEMAKER_TFS_VERSION", "1.13") + self._nginx_http_port = os.environ.get("SAGEMAKER_BIND_TO_PORT", "8080") + self._nginx_loglevel = os.environ.get("SAGEMAKER_TFS_NGINX_LOGLEVEL", "error") + self._tfs_default_model_name = os.environ.get("SAGEMAKER_TFS_DEFAULT_MODEL_NAME", "None") + self._sagemaker_port_range = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE", None) + self._tfs_config_path = "/sagemaker/model-config.cfg" + self._tfs_batching_config_path = "/sagemaker/batching-config.cfg" + + _enable_batching = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower() + _enable_multi_model_endpoint = os.environ.get("SAGEMAKER_MULTI_MODEL", + "false").lower() + + if _enable_batching not in ["true", "false"]: + raise ValueError("SAGEMAKER_TFS_ENABLE_BATCHING must be 'true' or 'false'") + self._tfs_enable_batching = _enable_batching == "true" + + if _enable_multi_model_endpoint not in ["true", "false"]: + raise ValueError("SAGEMAKER_MULTI_MODEL must be 'true' or 'false'") + self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == "true" self._use_gunicorn = self._enable_python_service or self._tfs_enable_multi_model_endpoint if self._sagemaker_port_range is not None: - parts = self._sagemaker_port_range.split('-') + parts = self._sagemaker_port_range.split("-") low = int(parts[0]) hi = int(parts[1]) if low + 2 > hi: - raise ValueError('not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})' + raise ValueError("not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})" .format(self._sagemaker_port_range)) self._tfs_grpc_port = str(low) self._tfs_rest_port = str(low + 1) else: # just use the standard default ports - self._tfs_grpc_port = '9000' - self._tfs_rest_port = '8501' + self._tfs_grpc_port = "9000" + self._tfs_rest_port = "8501" # set environment variable for python service - os.environ['TFS_GRPC_PORT'] = self._tfs_grpc_port - os.environ['TFS_REST_PORT'] = self._tfs_rest_port + os.environ["TFS_GRPC_PORT"] = self._tfs_grpc_port + os.environ["TFS_REST_PORT"] = self._tfs_rest_port def _create_tfs_config(self): models = tfs_utils.find_models() if not models: - raise ValueError('no SavedModel bundles found!') + raise ValueError("no SavedModel bundles found!") - if self._tfs_default_model_name == 'None': + if self._tfs_default_model_name == "None": default_model = os.path.basename(models[0]) if default_model: self._tfs_default_model_name = default_model - log.info('using default model name: {}'.format(self._tfs_default_model_name)) + log.info("using default model name: {}".format(self._tfs_default_model_name)) else: - log.info('no default model detected') + log.info("no default model detected") # config (may) include duplicate 'config' keys, so we can't just dump a dict - config = 'model_config_list: {\n' + config = "model_config_list: {\n" for m in models: - config += ' config: {\n' - config += ' name: "{}",\n'.format(os.path.basename(m)) - config += ' base_path: "{}",\n'.format(m) - config += ' model_platform: "tensorflow"\n' - config += ' }\n' - config += '}\n' + config += " config: {\n" + config += " name: '{}',\n".format(os.path.basename(m)) + config += " base_path: '{}',\n".format(m) + config += " model_platform: 'tensorflow'\n" + config += " }\n" + config += "}\n" - log.info('tensorflow serving model config: \n%s\n', config) + log.info("tensorflow serving model config: \n%s\n", config) - with open('/sagemaker/model-config.cfg', 'w') as f: + with open("/sagemaker/model-config.cfg", "w") as f: f.write(config) def _setup_gunicorn(self): python_path_content = [] - python_path_option = '' + python_path_option = "" if self._enable_python_service: lib_path_exists = os.path.exists(PYTHON_LIB_PATH) requirements_exists = os.path.exists(REQUIREMENTS_PATH) - python_path_content = ['/opt/ml/model/code'] - python_path_option = '--pythonpath ' + python_path_content = ["/opt/ml/model/code"] + python_path_option = "--pythonpath " if lib_path_exists: python_path_content.append(PYTHON_LIB_PATH) if requirements_exists: if lib_path_exists: - log.warning('loading modules in "{}", ignoring requirements.txt' + log.warning("loading modules in '{}', ignoring requirements.txt" .format(PYTHON_LIB_PATH)) else: - log.info('installing packages from requirements.txt...') - pip_install_cmd = 'pip3 install -r {}'.format(REQUIREMENTS_PATH) + log.info("installing packages from requirements.txt...") + pip_install_cmd = "pip3 install -r {}".format(REQUIREMENTS_PATH) try: subprocess.check_call(pip_install_cmd.split()) except subprocess.CalledProcessError: - log.error('failed to install required packages, exiting.') + log.error("failed to install required packages, exiting.") self._stop() - raise ChildProcessError('failed to install required packages.') + raise ChildProcessError("failed to install required packages.") gunicorn_command = ( - 'gunicorn -b unix:/tmp/gunicorn.sock -k gevent --chdir /sagemaker ' - '{}{} -e TFS_GRPC_PORT={} -e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} ' - 'python_service:app').format(python_path_option, ','.join(python_path_content), + "gunicorn -b unix:/tmp/gunicorn.sock -k gevent --chdir /sagemaker " + "{}{} -e TFS_GRPC_PORT={} -e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} " + "python_service:app").format(python_path_option, ",".join(python_path_content), self._tfs_grpc_port, self._tfs_enable_multi_model_endpoint, self._sagemaker_port_range) - log.info('gunicorn command: {}'.format(gunicorn_command)) + log.info("gunicorn command: {}".format(gunicorn_command)) self._gunicorn_command = gunicorn_command def _create_nginx_config(self): template = self._read_nginx_template() - pattern = re.compile(r'%(\w+)%') + pattern = re.compile(r"%(\w+)%") template_values = { - 'TFS_VERSION': self._tfs_version, - 'TFS_REST_PORT': self._tfs_rest_port, - 'TFS_DEFAULT_MODEL_NAME': self._tfs_default_model_name, - 'NGINX_HTTP_PORT': self._nginx_http_port, - 'NGINX_LOG_LEVEL': self._nginx_loglevel, - 'FORWARD_PING_REQUESTS': GUNICORN_PING if self._use_gunicorn else JS_PING, - 'FORWARD_INVOCATION_REQUESTS': GUNICORN_INVOCATIONS if self._use_gunicorn + "TFS_VERSION": self._tfs_version, + "TFS_REST_PORT": self._tfs_rest_port, + "TFS_DEFAULT_MODEL_NAME": self._tfs_default_model_name, + "NGINX_HTTP_PORT": self._nginx_http_port, + "NGINX_LOG_LEVEL": self._nginx_loglevel, + "FORWARD_PING_REQUESTS": GUNICORN_PING if self._use_gunicorn else JS_PING, + "FORWARD_INVOCATION_REQUESTS": GUNICORN_INVOCATIONS if self._use_gunicorn else JS_INVOCATIONS, } config = pattern.sub(lambda x: template_values[x.group(1)], template) - log.info('nginx config: \n%s\n', config) + log.info("nginx config: \n%s\n", config) - with open('/sagemaker/nginx.conf', 'w') as f: + with open("/sagemaker/nginx.conf", "w") as f: f.write(config) def _read_nginx_template(self): - with open('/sagemaker/nginx.conf.template', 'r') as f: + with open("/sagemaker/nginx.conf.template", "r") as f: template = f.read() if not template: - raise ValueError('failed to read nginx.conf.template') + raise ValueError("failed to read nginx.conf.template") return template def _start_tfs(self): - self._log_version('tensorflow_model_server --version', 'tensorflow version info:') + self._log_version("tensorflow_model_server --version', 'tensorflow version info:") cmd = tfs_utils.tfs_command( self._tfs_grpc_port, self._tfs_rest_port, @@ -184,37 +184,37 @@ def _start_tfs(self): self._tfs_enable_batching, self._tfs_batching_config_path, ) - log.info('tensorflow serving command: {}'.format(cmd)) + log.info("tensorflow serving command: {}".format(cmd)) p = subprocess.Popen(cmd.split()) - log.info('started tensorflow serving (pid: %d)', p.pid) + log.info("started tensorflow serving (pid: %d)", p.pid) self._tfs = p def _start_gunicorn(self): - self._log_version('gunicorn --version', 'gunicorn version info:') + self._log_version("gunicorn --version", "gunicorn version info:") env = os.environ.copy() - env['TFS_DEFAULT_MODEL_NAME'] = self._tfs_default_model_name + env["TFS_DEFAULT_MODEL_NAME"] = self._tfs_default_model_name p = subprocess.Popen(self._gunicorn_command.split(), env=env) - log.info('started gunicorn (pid: %d)', p.pid) + log.info("started gunicorn (pid: %d)", p.pid) self._gunicorn = p def _start_nginx(self): - self._log_version('/usr/sbin/nginx -V', 'nginx version info:') - p = subprocess.Popen('/usr/sbin/nginx -c /sagemaker/nginx.conf'.split()) - log.info('started nginx (pid: %d)', p.pid) + self._log_version("/usr/sbin/nginx -V", "nginx version info:") + p = subprocess.Popen("/usr/sbin/nginx -c /sagemaker/nginx.conf".split()) + log.info("started nginx (pid: %d)", p.pid) self._nginx = p def _log_version(self, command, message): try: output = subprocess.check_output( command.split(), - stderr=subprocess.STDOUT).decode('utf-8', 'backslashreplace').strip() - log.info('{}\n{}'.format(message, output)) + stderr=subprocess.STDOUT).decode("utf-8", "backslashreplace").strip() + log.info("{}\n{}".format(message, output)) except subprocess.CalledProcessError: - log.warning('failed to run command: %s', command) + log.warning("failed to run command: %s", command) def _stop(self, *args): # pylint: disable=W0613 - self._state = 'stopping' - log.info('stopping services') + self._state = "stopping" + log.info("stopping services") try: os.kill(self._nginx.pid, signal.SIGQUIT) except OSError: @@ -229,19 +229,19 @@ def _stop(self, *args): # pylint: disable=W0613 except OSError: pass - self._state = 'stopped' - log.info('stopped') + self._state = "stopped" + log.info("stopped") def _wait_for_gunicorn(self): while True: - if os.path.exists('/tmp/gunicorn.sock'): - log.info('gunicorn server is ready!') + if os.path.exists("/tmp/gunicorn.sock"): + log.info("gunicorn server is ready!") return @contextmanager def _timeout(self, seconds): def _raise_timeout_error(signum, frame): - raise TimeoutError('time out after {} seconds'.format(seconds)) + raise TimeoutError("time out after {} seconds".format(seconds)) try: signal.signal(signal.SIGALRM, _raise_timeout_error) @@ -251,18 +251,18 @@ def _raise_timeout_error(signum, frame): signal.alarm(0) def start(self): - log.info('starting services') - self._state = 'starting' + log.info("starting services") + self._state = "starting" signal.signal(signal.SIGTERM, self._stop) self._create_nginx_config() if self._tfs_enable_batching: - log.info('batching is enabled') + log.info("batching is enabled") tfs_utils.create_batching_config(self._tfs_batching_config_path) if self._tfs_enable_multi_model_endpoint: - log.info('multi-model endpoint is enabled, TFS model servers will be started later') + log.info("multi-model endpoint is enabled, TFS model servers will be started later") else: tfs_utils.create_tfs_config( self._tfs_default_model_name, @@ -279,30 +279,30 @@ def start(self): self._wait_for_gunicorn() self._start_nginx() - self._state = 'started' + self._state = "started" while True: pid, status = os.wait() - if self._state != 'started': + if self._state != "started": break if pid == self._nginx.pid: - log.warning('unexpected nginx exit (status: {}). restarting.'.format(status)) + log.warning("unexpected nginx exit (status: {}). restarting.".format(status)) self._start_nginx() elif pid == self._tfs.pid: log.warning( - 'unexpected tensorflow serving exit (status: {}). restarting.'.format(status)) + "unexpected tensorflow serving exit (status: {}). restarting.".format(status)) self._start_tfs() elif self._gunicorn and pid == self._gunicorn.pid: - log.warning('unexpected gunicorn exit (status: {}). restarting.' + log.warning("unexpected gunicorn exit (status: {}). restarting." .format(status)) self._start_gunicorn() self._stop() -if __name__ == '__main__': +if __name__ == "__main__": ServiceManager().start() diff --git a/docker/build_artifacts/sagemaker/tensorflow-serving.js b/docker/build_artifacts/sagemaker/tensorflow-serving.js index 1c040b0a..fdce4472 100644 --- a/docker/build_artifacts/sagemaker/tensorflow-serving.js +++ b/docker/build_artifacts/sagemaker/tensorflow-serving.js @@ -1,15 +1,15 @@ -var tfs_base_uri = '/tfs/v1/models/' -var custom_attributes_header = 'X-Amzn-SageMaker-Custom-Attributes' +var tfs_base_uri = "/tfs/v1/models/" +var custom_attributes_header = "X-Amzn-SageMaker-Custom-Attributes" function invocations(r) { - var ct = r.headersIn['Content-Type'] + var ct = r.headersIn["Content-Type"] - if ('application/json' == ct || 'application/jsonlines' == ct || 'application/jsons' == ct) { + if ("application/json" == ct || "application/jsonlines" == ct || "application/jsons" == ct) { json_request(r) - } else if ('text/csv' == ct) { + } else if ("text/csv" == ct) { csv_request(r) } else { - return_error(r, 415, 'Unsupported Media Type: ' + (ct || 'Unknown')) + return_error(r, 415, "Unsupported Media Type: " + (ct || "Unknown")) } } @@ -20,7 +20,7 @@ function ping(r) { if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) { r.return(200) } else { - r.error('failed ping' + reply.responseBody) + r.error("failed ping" + reply.responseBody) r.return(502) } } @@ -37,16 +37,16 @@ function ping_without_model(r) { var uri = make_tfs_uri(r, true) var options = { - method: 'POST', - body: '{"instances": "invalid"}' + method: "POST", + body: "{'instances': 'invalid'}" } function callback (reply) { if (reply.status == 200 || reply.status == 400 || - reply.responseBody.includes('Servable not found for request: Latest(None)')) { + reply.responseBody.includes("Servable not found for request: Latest(None)")) { r.return(200) } else { - r.error('failed ping' + reply.responseBody) + r.error("failed ping" + reply.responseBody) r.return(502) } } @@ -56,7 +56,7 @@ function ping_without_model(r) { function return_error(r, code, message) { if (message) { - r.return(code, '{"error": "' + message + '"}') + r.return(code, "{'error': " + message + "'}'") } else { r.return(code) } @@ -65,7 +65,7 @@ function return_error(r, code, message) { function tfs_json_request(r, json) { var uri = make_tfs_uri(r, true) var options = { - method: 'POST', + method: "POST", body: json } @@ -77,9 +77,9 @@ function tfs_json_request(r, json) { body = body.replace("\\'instances\\'", "'instances'") } - if ('application/jsonlines' == accept || 'application/jsons' == accept) { - body = body.replace(/\n/g, '') - r.headersOut['Content-Type'] = accept + if ("application/jsonlines" == accept || "application/jsons" == accept) { + body = body.replace(/\n/g, "") + r.headersOut["Content-Type"] = accept } r.return(reply.status, body) } @@ -91,13 +91,13 @@ function tfs_json_request(r, json) { function make_tfs_uri(r, with_method) { var attributes = parse_custom_attributes(r) - var uri = tfs_base_uri + attributes['tfs-model-name'] - if ('tfs-model-version' in attributes) { - uri += '/versions/' + attributes['tfs-model-version'] + var uri = tfs_base_uri + attributes["tfs-model-name"] + if ("tfs-model-version" in attributes) { + uri += "/versions/" + attributes["tfs-model-version"] } if (with_method) { - uri += ':' + (attributes['tfs-method'] || 'predict') + uri += ":" + (attributes["tfs-method"] || "predict") } return uri @@ -111,7 +111,7 @@ function parse_custom_attributes(r) { var matches = header.match(kv_pattern) if (matches) { for (var i = 0; i < matches.length; i++) { - var kv = matches[i].split('=') + var kv = matches[i].split("=") if (kv.length === 2) { attributes[kv[0]] = kv[1] } @@ -120,14 +120,14 @@ function parse_custom_attributes(r) { } // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model - if (!attributes['tfs-model-name']) { + if (!attributes["tfs-model-name"]) { var uri_pattern = /\/models\/[^,]+\/invoke/g var model_name = r.uri.match(uri_pattern) if (model_name[0]) { - model_name = r.uri.replace('/models/', '').replace('/invoke', '') - attributes['tfs-model-name'] = model_name + model_name = r.uri.replace("/models/", "").replace("/invoke", "") + attributes["tfs-model-name"] = model_name } else { - attributes['tfs-model-name'] = r.variables.default_tfs_model + attributes["tfs-model-name"] = r.variables.default_tfs_model } } @@ -157,31 +157,31 @@ function is_json_lines(data) { function generic_json_request(r, data) { if (! /^\s*\[\s*\[/.test(data)) { - data = '[' + data + ']' + data = "[" + data + "]" } - var json = '{"instances":' + data + '}' + var json = "{'instances':" + data + "}" tfs_json_request(r, json) } function json_lines_request(r, data) { var lines = data.trim().split(/\r?\n/) var builder = [] - builder.push('{"instances":') + builder.push("{'instances':") if (lines.length != 1) { - builder.push('[') + builder.push("[") } for (var i = 0; i < lines.length; i++) { var line = lines[i].trim() if (line) { - var instance = (i == 0) ? '' : ',' + var instance = (i == 0) ? "" : "," instance += line builder.push(instance) } } - builder.push(lines.length == 1 ? '}' : ']}') + builder.push(lines.length == 1 ? "}" : "]}") tfs_json_request(r, builder.join('')) } @@ -191,7 +191,7 @@ function csv_request(r) { var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0 var lines = data.trim().split(/\r?\n/) var builder = [] - builder.push('{"instances":[') + builder.push("{'nstances':[") for (var i = 0; i < lines.length; i++) { var line = lines[i].trim() @@ -200,32 +200,32 @@ function csv_request(r) { // Only wrap line in brackets if there are multiple columns. // If there's only one column and it has a string with a comma, // the input will be wrapped in an extra set of brackets. - var has_multiple_columns = line.search(',') != -1 + var has_multiple_columns = line.search(",") != -1 if (has_multiple_columns) { - line_builder.push('[') + line_builder.push("[") } if (needs_quotes) { - line_builder.push('"') - line_builder.push(line.replace('"', '\\"').replace(',', '","')) - line_builder.push('"') + line_builder.push("'") + line_builder.push(line.replace("'", "\\'").replace(",", "','")) + line_builder.push("'") } else { line_builder.push(line) } if (has_multiple_columns) { - line_builder.push(']') + line_builder.push("]") } - var json_line = line_builder.join('') + var json_line = line_builder.join("") builder.push(json_line) if (i != lines.length - 1) - builder.push(',') + builder.push(",") } } - builder.push(']}') - tfs_json_request(r, builder.join('')) + builder.push("]}") + tfs_json_request(r, builder.join("")) } diff --git a/docker/build_artifacts/sagemaker/tfs_utils.py b/docker/build_artifacts/sagemaker/tfs_utils.py index ac12856f..f3ca0cb7 100644 --- a/docker/build_artifacts/sagemaker/tfs_utils.py +++ b/docker/build_artifacts/sagemaker/tfs_utils.py @@ -21,13 +21,13 @@ logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) -DEFAULT_CONTENT_TYPE = 'application/json' -DEFAULT_ACCEPT_HEADER = 'application/json' -CUSTOM_ATTRIBUTES_HEADER = 'X-Amzn-SageMaker-Custom-Attributes' +DEFAULT_CONTENT_TYPE = "application/json" +DEFAULT_ACCEPT_HEADER = "application/json" +CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes" -Context = namedtuple('Context', - 'model_name, model_version, method, rest_uri, grpc_port, ' - 'custom_attributes, request_content_type, accept_header, content_length') +Context = namedtuple("Context", + "model_name, model_version, method, rest_uri, grpc_port, " + "custom_attributes, request_content_type, accept_header, content_length") def parse_request(req, rest_port, grpc_port, default_model_name, model_name=None): @@ -35,16 +35,16 @@ def parse_request(req, rest_port, grpc_port, default_model_name, model_name=None tfs_uri = make_tfs_uri(rest_port, tfs_attributes, default_model_name, model_name) if not model_name: - model_name = tfs_attributes.get('tfs-model-name') + model_name = tfs_attributes.get("tfs-model-name") context = Context(model_name, - tfs_attributes.get('tfs-model-version'), - tfs_attributes.get('tfs-method'), + tfs_attributes.get("tfs-model-version"), + tfs_attributes.get("tfs-method"), tfs_uri, grpc_port, req.get_header(CUSTOM_ATTRIBUTES_HEADER), - req.get_header('Content-Type') or DEFAULT_CONTENT_TYPE, - req.get_header('Accept') or DEFAULT_ACCEPT_HEADER, + req.get_header("Content-Type") or DEFAULT_CONTENT_TYPE, + req.get_header("Accept") or DEFAULT_ACCEPT_HEADER, req.content_length) data = req.stream @@ -55,13 +55,13 @@ def make_tfs_uri(port, attributes, default_model_name, model_name=None): log.info("sagemaker tfs attributes: \n{}".format(attributes)) tfs_model_name = model_name or attributes.get("tfs-model-name", default_model_name) - tfs_model_version = attributes.get('tfs-model-version') - tfs_method = attributes.get('tfs-method', 'predict') + tfs_model_version = attributes.get("tfs-model-version") + tfs_method = attributes.get("tfs-method", "predict") - uri = 'http://localhost:{}/v1/models/{}'.format(port, tfs_model_name) + uri = "http://localhost:{}/v1/models/{}".format(port, tfs_model_name) if tfs_model_version: - uri += '/versions/' + tfs_model_version - uri += ':' + tfs_method + uri += "/versions/" + tfs_model_version + uri += ":" + tfs_method return uri @@ -75,13 +75,13 @@ def parse_tfs_custom_attributes(req): def create_tfs_config_individual_model(model_name, base_path): - config = 'model_config_list: {\n' - config += ' config: {\n' - config += ' name: "{}",\n'.format(model_name) - config += ' base_path: "{}",\n'.format(base_path) - config += ' model_platform: "tensorflow"\n' - config += ' }\n' - config += '}\n' + config = "model_config_list: {\n" + config += " config: {\n" + config += " name: '{}',\n".format(model_name) + config += " base_path: '{}',\n".format(base_path) + config += " model_platform: 'tensorflow'\n" + config += " }\n" + config += "}\n" return config @@ -91,29 +91,29 @@ def create_tfs_config( ): models = find_models() if not models: - raise ValueError('no SavedModel bundles found!') + raise ValueError("no SavedModel bundles found!") - if tfs_default_model_name == 'None': + if tfs_default_model_name == "None": default_model = os.path.basename(models[0]) if default_model: tfs_default_model_name = default_model - log.info('using default model name: {}'.format(tfs_default_model_name)) + log.info("using default model name: {}".format(tfs_default_model_name)) else: - log.info('no default model detected') + log.info("no default model detected") # config (may) include duplicate 'config' keys, so we can't just dump a dict - config = 'model_config_list: {\n' + config = "model_config_list: {\n" for m in models: - config += ' config: {\n' - config += ' name: "{}",\n'.format(os.path.basename(m)) - config += ' base_path: "{}",\n'.format(m) - config += ' model_platform: "tensorflow"\n' - config += ' }\n' - config += '}\n' + config += " config: {\n" + config += " name: '{}',\n".format(os.path.basename(m)) + config += " base_path: '{}',\n".format(m) + config += " model_platform: 'tensorflow'\n" + config += " }\n" + config += "}\n" - log.info('tensorflow serving model config: \n%s\n', config) + log.info("tensorflow serving model config: \n%s\n", config) - with open(tfs_config_path, 'w') as f: + with open(tfs_config_path, "w") as f: f.write(config) @@ -133,12 +133,12 @@ def tfs_command(tfs_grpc_port, def find_models(): - base_path = '/opt/ml/model' + base_path = "/opt/ml/model" models = [] for f in _find_saved_model_files(base_path): - parts = f.split('/') - if len(parts) >= 6 and re.match(r'^\d+$', parts[-2]): - model_path = '/'.join(parts[0:-2]) + parts = f.split("/") + if len(parts) >= 6 and re.match(r"^\d+$", parts[-2]): + model_path = "/".join(parts[0:-2]) if model_path not in models: models.append(model_path) return models @@ -149,7 +149,7 @@ def _find_saved_model_files(path): if e.is_dir(): yield from _find_saved_model_files(os.path.join(path, e.name)) else: - if e.name == 'saved_model.pb': + if e.name == "saved_model.pb": yield os.path.join(path, e.name) @@ -171,39 +171,39 @@ def __init__(self, key, env_var, value, defaulted_message): cpu_count = multiprocessing.cpu_count() batching_parameters = [ - _BatchingParameter('max_batch_size', 'SAGEMAKER_TFS_MAX_BATCH_SIZE', 8, + _BatchingParameter("max_batch_size", "SAGEMAKER_TFS_MAX_BATCH_SIZE", 8, "max_batch_size defaulted to {}. Set {} to override default. " "Tuning this parameter may yield better performance."), - _BatchingParameter('batch_timeout_micros', 'SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS', 1000, + _BatchingParameter("batch_timeout_micros", "SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS", 1000, "batch_timeout_micros defaulted to {}. Set {} to override " "default. Tuning this parameter may yield better performance."), - _BatchingParameter('num_batch_threads', 'SAGEMAKER_TFS_NUM_BATCH_THREADS', + _BatchingParameter("num_batch_threads", "SAGEMAKER_TFS_NUM_BATCH_THREADS", cpu_count, "num_batch_threads defaulted to {}," "the number of CPUs. Set {} to override default."), - _BatchingParameter('max_enqueued_batches', 'SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES', + _BatchingParameter("max_enqueued_batches", "SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES", # Batch limits number of concurrent requests, which limits number # of enqueued batches, so this can be set high for Batch - 100000000 if 'SAGEMAKER_BATCH' in os.environ else cpu_count, + 100000000 if "SAGEMAKER_BATCH" in os.environ else cpu_count, "max_enqueued_batches defaulted to {}. Set {} to override default. " "Tuning this parameter may be necessary to tune out-of-memory " "errors occur."), ] - warning_message = '' + warning_message = "" for batching_parameter in batching_parameters: if batching_parameter.env_var in os.environ: batching_parameter.value = os.environ[batching_parameter.env_var] else: warning_message += batching_parameter.defaulted_message.format( batching_parameter.value, batching_parameter.env_var) - warning_message += '\n' + warning_message += "\n" if warning_message: log.warning(warning_message) - config = '' + config = "" for batching_parameter in batching_parameters: - config += '%s { value: %s }\n' % (batching_parameter.key, batching_parameter.value) + config += "%s { value: %s }\n" % (batching_parameter.key, batching_parameter.value) - log.info('batching config: \n%s\n', config) - with open(batching_config_file, 'w') as f: + log.info("batching config: \n%s\n", config) + with open(batching_config_file, "w") as f: f.write(config) diff --git a/scripts/shared.sh b/scripts/shared.sh index 57be36dd..3a92382b 100755 --- a/scripts/shared.sh +++ b/scripts/shared.sh @@ -25,7 +25,7 @@ function get_short_version() { } function get_aws_account() { - aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text + aws --region us-west-2 sts --endpoint-url https://sts.us-west-2.amazonaws.com get-caller-identity --query 'Account' --output text } function get_ei_executable() { diff --git a/test/integration/local/conftest.py b/test/integration/local/conftest.py index 903885e2..86d97e2e 100644 --- a/test/integration/local/conftest.py +++ b/test/integration/local/conftest.py @@ -13,43 +13,43 @@ import pytest -FRAMEWORK_LATEST_VERSION = '1.13' -TFS_DOCKER_BASE_NAME = 'sagemaker-tensorflow-serving' +FRAMEWORK_LATEST_VERSION = "1.13" +TFS_DOCKER_BASE_NAME = "sagemaker-tensorflow-serving" def pytest_addoption(parser): - parser.addoption('--docker-base-name', default=TFS_DOCKER_BASE_NAME) - parser.addoption('--framework-version', default=FRAMEWORK_LATEST_VERSION, required=True) - parser.addoption('--processor', default='cpu', choices=['cpu', 'gpu']) - parser.addoption('--tag') + parser.addoption("--docker-base-name", default=TFS_DOCKER_BASE_NAME) + parser.addoption("--framework-version", default=FRAMEWORK_LATEST_VERSION, required=True) + parser.addoption("--processor", default="cpu", choices=["cpu", "gpu"]) + parser.addoption("--tag") -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def docker_base_name(request): - return request.config.getoption('--docker-base-name') + return request.config.getoption("--docker-base-name") -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def framework_version(request): - return request.config.getoption('--framework-version') + return request.config.getoption("--framework-version") -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def processor(request): - return request.config.getoption('--processor') + return request.config.getoption("--processor") -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def runtime_config(request, processor): - if processor == 'gpu': - return '--runtime=nvidia ' + if processor == "gpu": + return "--runtime=nvidia " else: - return '' + return "" -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def tag(request, framework_version, processor): - image_tag = request.config.getoption('--tag') + image_tag = request.config.getoption("--tag") if not image_tag: - image_tag = '{}-{}'.format(framework_version, processor) + image_tag = "{}-{}".format(framework_version, processor) return image_tag diff --git a/test/integration/local/multi_model_endpoint_test_utils.py b/test/integration/local/multi_model_endpoint_test_utils.py index bce63250..08802dd6 100644 --- a/test/integration/local/multi_model_endpoint_test_utils.py +++ b/test/integration/local/multi_model_endpoint_test_utils.py @@ -15,23 +15,23 @@ import json import requests -INVOCATION_URL = 'http://localhost:8080/models/{}/invoke' -MODELS_URL = 'http://localhost:8080/models' -DELETE_MODEL_URL = 'http://localhost:8080/models/{}' +INVOCATION_URL = "http://localhost:8080/models/{}/invoke" +MODELS_URL = "http://localhost:8080/models" +DELETE_MODEL_URL = "http://localhost:8080/models/{}" -def make_headers(content_type='application/json', method='predict'): +def make_headers(content_type="application/json", method="predict"): headers = { - 'Content-Type': content_type, - 'X-Amzn-SageMaker-Custom-Attributes': 'tfs-method=%s' % method + "Content-Type": content_type, + "X-Amzn-SageMaker-Custom-Attributes": "tfs-method=%s" % method } return headers -def make_invocation_request(data, model_name, content_type='application/json'): +def make_invocation_request(data, model_name, content_type="application/json"): headers = { - 'Content-Type': content_type, - 'X-Amzn-SageMaker-Custom-Attributes': 'tfs-method=predict' + "Content-Type": content_type, + "X-Amzn-SageMaker-Custom-Attributes": "tfs-method=predict" } response = requests.post(INVOCATION_URL.format(model_name), data=data, headers=headers) return response.status_code, response.content.decode(encodings.utf_8.getregentry().name) @@ -43,13 +43,13 @@ def make_list_model_request(): def make_get_model_request(model_name): - response = requests.get(MODELS_URL + '/{}'.format(model_name)) + response = requests.get(MODELS_URL + "/{}".format(model_name)) return response.status_code, response.content.decode(encodings.utf_8.getregentry().name) -def make_load_model_request(data, content_type='application/json'): +def make_load_model_request(data, content_type="application/json"): headers = { - 'Content-Type': content_type + "Content-Type": content_type } response = requests.post(MODELS_URL, data=data, headers=headers) return response.status_code, response.content.decode(encodings.utf_8.getregentry().name) diff --git a/test/integration/local/test_container.py b/test/integration/local/test_container.py index 32602939..00a82ecf 100644 --- a/test/integration/local/test_container.py +++ b/test/integration/local/test_container.py @@ -20,37 +20,37 @@ import pytest import requests -BASE_URL = 'http://localhost:8080/invocations' +BASE_URL = "http://localhost:8080/invocations" -@pytest.fixture(scope='session', autouse=True) +@pytest.fixture(scope="session", autouse=True) def volume(): try: - model_dir = os.path.abspath('test/resources/models') + model_dir = os.path.abspath("test/resources/models") subprocess.check_call( - 'docker volume create --name model_volume --opt type=none ' - '--opt device={} --opt o=bind'.format(model_dir).split()) + "docker volume create --name model_volume --opt type=none " + "--opt device={} --opt o=bind".format(model_dir).split()) yield model_dir finally: - subprocess.check_call('docker volume rm model_volume'.split()) + subprocess.check_call("docker volume rm model_volume".split()) -@pytest.fixture(scope='module', autouse=True, params=[True, False]) +@pytest.fixture(scope="module", autouse=True, params=[True, False]) def container(request, docker_base_name, tag, runtime_config): try: if request.param: - batching_config = ' -e SAGEMAKER_TFS_ENABLE_BATCHING=true' + batching_config = " -e SAGEMAKER_TFS_ENABLE_BATCHING=true" else: - batching_config = '' + batching_config = "" command = ( - 'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080' - ' --mount type=volume,source=model_volume,target=/opt/ml/model,readonly' - ' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three' - ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info' - ' -e SAGEMAKER_BIND_TO_PORT=8080' - ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999' - ' {}' - ' {}:{} serve' + "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" + " --mount type=volume,source=model_volume,target=/opt/ml/model,readonly" + " -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three" + " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" + " -e SAGEMAKER_BIND_TO_PORT=8080" + " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" + " {}" + " {}:{} serve" ).format(runtime_config, batching_config, docker_base_name, tag) proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) @@ -60,7 +60,7 @@ def container(request, docker_base_name, tag, runtime_config): while attempts < 40: time.sleep(3) try: - res_code = requests.get('http://localhost:8080/ping').status_code + res_code = requests.get("http://localhost:8080/ping").status_code if res_code == 200: break except: @@ -69,126 +69,126 @@ def container(request, docker_base_name, tag, runtime_config): yield proc.pid finally: - subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split()) + subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) -def make_request(data, content_type='application/json', method='predict'): +def make_request(data, content_type="application/json", method="predict"): headers = { - 'Content-Type': content_type, - 'X-Amzn-SageMaker-Custom-Attributes': - 'tfs-model-name=half_plus_three,tfs-method=%s' % method + "Content-Type": content_type, + "X-Amzn-SageMaker-Custom-Attributes": + "tfs-model-name=half_plus_three,tfs-method=%s" % method } response = requests.post(BASE_URL, data=data, headers=headers) - return json.loads(response.content.decode('utf-8')) + return json.loads(response.content.decode("utf-8")) def test_predict(): x = { - 'instances': [1.0, 2.0, 5.0] + "instances": [1.0, 2.0, 5.0] } y = make_request(json.dumps(x)) - assert y == {'predictions': [3.5, 4.0, 5.5]} + assert y == {"predictions": [3.5, 4.0, 5.5]} def test_predict_twice(): x = { - 'instances': [1.0, 2.0, 5.0] + "instances": [1.0, 2.0, 5.0] } y = make_request(json.dumps(x)) z = make_request(json.dumps(x)) - assert y == {'predictions': [3.5, 4.0, 5.5]} - assert z == {'predictions': [3.5, 4.0, 5.5]} + assert y == {"predictions": [3.5, 4.0, 5.5]} + assert z == {"predictions": [3.5, 4.0, 5.5]} def test_predict_two_instances(): x = { - 'instances': [[1.0, 2.0, 5.0], [1.0, 2.0, 5.0]] + "instances": [[1.0, 2.0, 5.0], [1.0, 2.0, 5.0]] } y = make_request(json.dumps(x)) - assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} + assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} def test_predict_jsons_json_content_type(): - x = '[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]' + x = "[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]" y = make_request(x) - assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} + assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} def test_predict_jsonlines(): - x = '[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]' - y = make_request(x, 'application/jsonlines') - assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} + x = "[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]" + y = make_request(x, "application/jsonlines") + assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} def test_predict_jsons(): - x = '[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]' - y = make_request(x, 'application/jsons') - assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} + x = "[1.0, 2.0, 5.0]\n[1.0, 2.0, 5.0]" + y = make_request(x, "application/jsons") + assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} def test_predict_jsons_2(): - x = '{"x": [1.0, 2.0, 5.0]}\n{"x": [1.0, 2.0, 5.0]}' + x = "{'x': [1.0, 2.0, 5.0]}\n{'x': [1.0, 2.0, 5.0]}" y = make_request(x) - assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} + assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} def test_predict_generic_json(): x = [1.0, 2.0, 5.0] y = make_request(json.dumps(x)) - assert y == {'predictions': [[3.5, 4.0, 5.5]]} + assert y == {"predictions": [[3.5, 4.0, 5.5]]} def test_predict_generic_json_two_instances(): x = [[1.0, 2.0, 5.0], [1.0, 2.0, 5.0]] y = make_request(json.dumps(x)) - assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} + assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} def test_predict_csv(): - x = '1.0' - y = make_request(x, 'text/csv') - assert y == {'predictions': [3.5]} + x = "1.0" + y = make_request(x, "text/csv") + assert y == {"predictions": [3.5]} def test_predict_csv_with_zero(): - x = '0.0' - y = make_request(x, 'text/csv') - assert y == {'predictions': [3.0]} + x = "0.0" + y = make_request(x, "text/csv") + assert y == {"predictions": [3.0]} def test_predict_csv_one_instance_three_values_with_zero(): - x = '0.0,2.0,5.0' - y = make_request(x, 'text/csv') - assert y == {'predictions': [[3.0, 4.0, 5.5]]} + x = "0.0,2.0,5.0" + y = make_request(x, "text/csv") + assert y == {"predictions": [[3.0, 4.0, 5.5]]} def test_predict_csv_one_instance_three_values(): - x = '1.0,2.0,5.0' - y = make_request(x, 'text/csv') - assert y == {'predictions': [[3.5, 4.0, 5.5]]} + x = "1.0,2.0,5.0" + y = make_request(x, "text/csv") + assert y == {"predictions": [[3.5, 4.0, 5.5]]} def test_predict_csv_two_instances_three_values(): - x = '1.0,2.0,5.0\n1.0,2.0,5.0' - y = make_request(x, 'text/csv') - assert y == {'predictions': [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} + x = "1.0,2.0,5.0\n1.0,2.0,5.0" + y = make_request(x, "text/csv") + assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} def test_predict_csv_three_instances(): - x = '1.0\n2.0\n5.0' - y = make_request(x, 'text/csv') - assert y == {'predictions': [3.5, 4.0, 5.5]} + x = "1.0\n2.0\n5.0" + y = make_request(x, "text/csv") + assert y == {"predictions": [3.5, 4.0, 5.5]} def test_predict_csv_wide_categorical_input(): - x = ('0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0\n' # noqa - '0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,6.0,0.0\n') # noqa + x = ("0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0\n" # noqa + "0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,6.0,0.0\n") # noqa - y = make_request(x, 'text/csv') - predictions = y['predictions'] + y = make_request(x, "text/csv") + predictions = y["predictions"] assert 2 == len(predictions) assert 30 == len(predictions[0]) @@ -198,12 +198,12 @@ def test_predict_csv_wide_categorical_input(): def test_regress(): x = { - 'signature_name': 'tensorflow/serving/regress', - 'examples': [{'x': 1.0}, {'x': 2.0}] + "signature_name": "tensorflow/serving/regress", + "examples": [{"x": 1.0}, {"x": 2.0}] } - y = make_request(json.dumps(x), method='regress') - assert y == {'results': [3.5, 4.0]} + y = make_request(json.dumps(x), method="regress") + assert y == {"results": [3.5, 4.0]} def test_regress_one_instance(): @@ -211,48 +211,48 @@ def test_regress_one_instance(): # but it is actually 'results' # this test will catch if they change api to match docs (unlikely) x = { - 'signature_name': 'tensorflow/serving/regress', - 'examples': [{'x': 1.0}] + "signature_name": "tensorflow/serving/regress", + "examples": [{"x": 1.0}] } - y = make_request(json.dumps(x), method='regress') - assert y == {'results': [3.5]} + y = make_request(json.dumps(x), method="regress") + assert y == {"results": [3.5]} def test_predict_bad_input(): - y = make_request('whatever') - assert 'error' in y + y = make_request("whatever") + assert "error" in y def test_predict_bad_input_instances(): - x = json.dumps({'junk': 'data'}) + x = json.dumps({"junk": "data"}) y = make_request(x) - assert y['error'].startswith('Failed to process element: 0 key: junk of \'instances\' list.') + assert y["error"].startswith("Failed to process element: 0 key: junk of \'instances\' list.") def test_predict_no_custom_attributes_header(): x = { - 'instances': [1.0, 2.0, 5.0] + "instances": [1.0, 2.0, 5.0] } headers = { 'Content-Type': 'application/json' } response = requests.post(BASE_URL, data=json.dumps(x), headers=headers) - y = json.loads(response.content.decode('utf-8')) + y = json.loads(response.content.decode("utf-8")) - assert y == {'predictions': [3.5, 4.0, 5.5]} + assert y == {"predictions": [3.5, 4.0, 5.5]} def test_predict_with_jsonlines(): x = { - 'instances': [1.0, 2.0, 5.0] + "instances": [1.0, 2.0, 5.0] } headers = { - 'Content-Type': 'application/json', - 'Accept': 'application/jsonlines' + "Content-Type": "application/json", + "Accept": "application/jsonlines" } response = requests.post(BASE_URL, data=json.dumps(x), headers=headers) - assert response.headers['Content-Type'] == 'application/jsonlines' - assert response.content.decode('utf-8') == '{ "predictions": [3.5, 4.0, 5.5 ]}' + assert response.headers["Content-Type"] == "application/jsonlines" + assert response.content.decode("utf-8") == "{ 'predictions': [3.5, 4.0, 5.5 ]}" diff --git a/test/integration/local/test_multi_model_endpoint.py b/test/integration/local/test_multi_model_endpoint.py index 811c531a..c1984bb6 100644 --- a/test/integration/local/test_multi_model_endpoint.py +++ b/test/integration/local/test_multi_model_endpoint.py @@ -23,32 +23,32 @@ from multi_model_endpoint_test_utils import make_invocation_request, make_list_model_request, \ make_get_model_request, make_load_model_request, make_unload_model_request -PING_URL = 'http://localhost:8080/ping' +PING_URL = "http://localhost:8080/ping" -@pytest.fixture(scope='session', autouse=True) +@pytest.fixture(scope="session", autouse=True) def volume(): try: - model_dir = os.path.abspath('test/resources/mme') + model_dir = os.path.abspath("test/resources/mme") subprocess.check_call( - 'docker volume create --name dynamic_endpoint_model_volume --opt type=none ' - '--opt device={} --opt o=bind'.format(model_dir).split()) + "docker volume create --name dynamic_endpoint_model_volume --opt type=none " + "--opt device={} --opt o=bind".format(model_dir).split()) yield model_dir finally: - subprocess.check_call('docker volume rm dynamic_endpoint_model_volume'.split()) + subprocess.check_call("docker volume rm dynamic_endpoint_model_volume".split()) -@pytest.fixture(scope='module', autouse=True) +@pytest.fixture(scope="module", autouse=True) def container(request, docker_base_name, tag, runtime_config): try: command = ( - 'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080' - ' --mount type=volume,source=dynamic_endpoint_model_volume,target=/opt/ml/models,readonly' - ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info' - ' -e SAGEMAKER_BIND_TO_PORT=8080' - ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999' - ' -e SAGEMAKER_MULTI_MODEL=true' - ' {}:{} serve' + "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" + " --mount type=volume,source=dynamic_endpoint_model_volume,target=/opt/ml/models,readonly" + " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" + " -e SAGEMAKER_BIND_TO_PORT=8080" + " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" + " -e SAGEMAKER_MULTI_MODEL=true" + " {}:{} serve" ).format(runtime_config, docker_base_name, tag) proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) @@ -57,7 +57,7 @@ def container(request, docker_base_name, tag, runtime_config): while attempts < 40: time.sleep(3) try: - res_code = requests.get('http://localhost:8080/ping').status_code + res_code = requests.get("http://localhost:8080/ping").status_code if res_code == 200: break except: @@ -66,7 +66,7 @@ def container(request, docker_base_name, tag, runtime_config): yield proc.pid finally: - subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split()) + subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) def test_ping(): @@ -76,9 +76,9 @@ def test_ping(): def test_container_start_invocation_fail(): x = { - 'instances': [1.0, 2.0, 5.0] + "instances": [1.0, 2.0, 5.0] } - code, y = make_invocation_request(json.dumps(x), 'half_plus_three') + code, y = make_invocation_request(json.dumps(x), "half_plus_three") y = json.loads(y) assert code == 404 assert "Model half_plus_three is not loaded yet." in str(y) @@ -93,28 +93,28 @@ def test_list_models_empty(): def test_delete_unloaded_model(): # unloads the given model/version, no-op if not loaded - model_name = 'non-existing-model' + model_name = "non-existing-model" code, res = make_unload_model_request(model_name) assert code == 404 - assert 'Model {} is not loaded yet'.format(model_name) in res + assert "Model {} is not loaded yet".format(model_name) in res def test_delete_model(): - model_name = 'half_plus_three' + model_name = "half_plus_three" model_data = { - 'model_name': model_name, - 'url': '/opt/ml/models/half_plus_three' + "model_name": model_name, + "url": "/opt/ml/models/half_plus_three" } code, res = make_load_model_request(json.dumps(model_data)) assert code == 200 - assert 'Successfully loaded model {}'.format(model_name) in res + assert "Successfully loaded model {}".format(model_name) in res x = { - 'instances': [1.0, 2.0, 5.0] + "instances": [1.0, 2.0, 5.0] } _, y = make_invocation_request(json.dumps(x), model_name) y = json.loads(y) - assert y == {'predictions': [3.5, 4.0, 5.5]} + assert y == {"predictions": [3.5, 4.0, 5.5]} code_unload, res2 = make_unload_model_request(model_name) assert code_unload == 200 @@ -122,43 +122,43 @@ def test_delete_model(): code_invoke, y2 = make_invocation_request(json.dumps(x), model_name) y2 = json.loads(y2) assert code_invoke == 404 - assert 'Model {} is not loaded yet.'.format(model_name) in str(y2) + assert "Model {} is not loaded yet.".format(model_name) in str(y2) def test_load_two_models(): - model_name_1 = 'half_plus_two' + model_name_1 = "half_plus_two" model_data_1 = { - 'model_name': model_name_1, - 'url': '/opt/ml/models/half_plus_two' + "model_name": model_name_1, + "url": "/opt/ml/models/half_plus_two" } code1, res1 = make_load_model_request(json.dumps(model_data_1)) assert code1 == 200 - assert 'Successfully loaded model {}'.format(model_name_1) in res1 + assert "Successfully loaded model {}".format(model_name_1) in res1 # load second model - model_name_2 = 'half_plus_three' + model_name_2 = "half_plus_three" model_data_2 = { - 'model_name': model_name_2, - 'url': '/opt/ml/models/half_plus_three' + "model_name": model_name_2, + "url": "/opt/ml/models/half_plus_three" } code2, res2 = make_load_model_request(json.dumps(model_data_2)) assert code2 == 200 - assert 'Successfully loaded model {}'.format(model_name_2) in res2 + assert "Successfully loaded model {}".format(model_name_2) in res2 # make invocation request to the first model x = { - 'instances': [1.0, 2.0, 5.0] + "instances": [1.0, 2.0, 5.0] } code_invoke1, y1 = make_invocation_request(json.dumps(x), model_name_1) y1 = json.loads(y1) assert code_invoke1 == 200 - assert y1 == {'predictions': [2.5, 3.0, 4.5]} + assert y1 == {"predictions": [2.5, 3.0, 4.5]} # make invocation request to the second model - code_invoke2, y2 = make_invocation_request(json.dumps(x), 'half_plus_three') + code_invoke2, y2 = make_invocation_request(json.dumps(x), "half_plus_three") y2 = json.loads(y2) assert code_invoke2 == 200 - assert y2 == {'predictions': [3.5, 4.0, 5.5]} + assert y2 == {"predictions": [3.5, 4.0, 5.5]} code_list, res3 = make_list_model_request() res3 = json.loads(res3) @@ -166,48 +166,48 @@ def test_load_two_models(): def test_load_one_model_two_times(): - model_name = 'cifar' + model_name = "cifar" model_data = { - 'model_name': model_name, - 'url': '/opt/ml/models/cifar' + "model_name": model_name, + "url": "/opt/ml/models/cifar" } code_load, res = make_load_model_request(json.dumps(model_data)) assert code_load == 200 - assert 'Successfully loaded model {}'.format(model_name) in res + assert "Successfully loaded model {}".format(model_name) in res code_load2, res2 = make_load_model_request(json.dumps(model_data)) assert code_load2 == 409 - assert'Model {} is already loaded'.format(model_name) in res2 + assert "Model {} is already loaded".format(model_name) in res2 def test_load_non_existing_model(): - model_name = 'non-existing' - base_path = '/opt/ml/models/non-existing' + model_name = "non-existing" + base_path = "/opt/ml/models/non-existing" model_data = { - 'model_name': model_name, - 'url': base_path + "model_name": model_name, + "url": base_path } code, res = make_load_model_request(json.dumps(model_data)) assert code == 404 - assert 'Could not find valid base path {} for servable {}'.format(base_path, model_name) in str(res) + assert "Could not find valid base path {} for servable {}".format(base_path, model_name) in str(res) def test_bad_model_reqeust(): bad_model_data = { - 'model_name': 'model_name', - 'uri': '/opt/ml/models/non-existing' + "model_name": "model_name", + "uri": "/opt/ml/models/non-existing" } code, _ = make_load_model_request(json.dumps(bad_model_data)) assert code == 500 def test_invalid_model_version(): - model_name = 'invalid_version' - base_path = '/opt/ml/models/invalid_version' + model_name = "invalid_version" + base_path = "/opt/ml/models/invalid_version" invalid_model_version_data = { - 'model_name': model_name, - 'url': base_path + "model_name": model_name, + "url": base_path } code, res = make_load_model_request(json.dumps(invalid_model_version_data)) assert code == 404 - assert 'Could not find valid base path {} for servable {}'.format(base_path, model_name) in str(res) + assert "Could not find valid base path {} for servable {}".format(base_path, model_name) in str(res) diff --git a/test/integration/local/test_pre_post_processing.py b/test/integration/local/test_pre_post_processing.py index 3d72b612..e2ec89cc 100644 --- a/test/integration/local/test_pre_post_processing.py +++ b/test/integration/local/test_pre_post_processing.py @@ -22,42 +22,42 @@ import requests -PING_URL = 'http://localhost:8080/ping' -INVOCATIONS_URL = 'http://localhost:8080/invocations' +PING_URL = "http://localhost:8080/ping" +INVOCATIONS_URL = "http://localhost:8080/invocations" -@pytest.fixture(scope='module', autouse=True, params=['1', '2', '3', '4', '5']) +@pytest.fixture(scope="module", autouse=True, params=["1", "2", "3", "4", "5"]) def volume(tmpdir_factory, request): try: print(str(tmpdir_factory)) - model_dir = os.path.join(tmpdir_factory.mktemp('test'), 'model') - code_dir = os.path.join(model_dir, 'code') - test_example = 'test/resources/examples/test{}'.format(request.param) + model_dir = os.path.join(tmpdir_factory.mktemp("test"), "model") + code_dir = os.path.join(model_dir, "code") + test_example = "test/resources/examples/test{}".format(request.param) - model_src_dir = 'test/resources/models' + model_src_dir = "test/resources/models" shutil.copytree(model_src_dir, model_dir) shutil.copytree(test_example, code_dir) - volume_name = f'model_volume_{request.param}' + volume_name = f"model_volume_{request.param}" subprocess.check_call( - 'docker volume create --name {} --opt type=none ' - '--opt device={} --opt o=bind'.format(volume_name, model_dir).split()) + "docker volume create --name {} --opt type=none " + "--opt device={} --opt o=bind".format(volume_name, model_dir).split()) yield volume_name finally: - subprocess.check_call(f'docker volume rm {volume_name}'.split()) + subprocess.check_call(f"docker volume rm {volume_name}".split()) -@pytest.fixture(scope='module', autouse=True) +@pytest.fixture(scope="module", autouse=True) def container(volume, docker_base_name, tag, runtime_config): try: command = ( - 'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080' - ' --mount type=volume,source={},target=/opt/ml/model,readonly' - ' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three' - ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info' - ' -e SAGEMAKER_BIND_TO_PORT=8080' - ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999' - ' {}:{} serve' + "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" + " --mount type=volume,source={},target=/opt/ml/model,readonly" + " -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three" + " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" + " -e SAGEMAKER_BIND_TO_PORT=8080" + " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" + " {}:{} serve" ).format(runtime_config, volume, docker_base_name, tag) proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) @@ -66,7 +66,7 @@ def container(volume, docker_base_name, tag, runtime_config): while attempts < 40: time.sleep(3) try: - res_code = requests.get('http://localhost:8080/ping').status_code + res_code = requests.get("http://localhost:8080/ping").status_code if res_code == 200: break except: @@ -75,56 +75,56 @@ def container(volume, docker_base_name, tag, runtime_config): yield proc.pid finally: - subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split()) + subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) def make_headers(content_type, method): headers = { - 'Content-Type': content_type, - 'X-Amzn-SageMaker-Custom-Attributes': 'tfs-model-name=half_plus_three,tfs-method=%s' % method + "Content-Type": content_type, + "X-Amzn-SageMaker-Custom-Attributes": "tfs-model-name=half_plus_three,tfs-method=%s" % method } return headers def test_predict_json(): - headers = make_headers('application/json', 'predict') - data = '{"instances": [1.0, 2.0, 5.0]}' + headers = make_headers("application/json", "predict") + data = "{'instances': [1.0, 2.0, 5.0]}" response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json() - assert response == {'predictions': [3.5, 4.0, 5.5]} + assert response == {"predictions": [3.5, 4.0, 5.5]} def test_zero_content(): - headers = make_headers('application/json', 'predict') - data = '' + headers = make_headers("application/json", "predict") + data = "" response = requests.post(INVOCATIONS_URL, data=data, headers=headers) assert 500 == response.status_code - assert 'document is empty' in response.text + assert "document is empty" in response.text def test_large_input(): - headers = make_headers('text/csv', 'predict') - data_file = 'test/resources/inputs/test-large.csv' + headers = make_headers("text/csv", "predict") + data_file = "test/resources/inputs/test-large.csv" - with open(data_file, 'r') as file: + with open(data_file, "r") as file: large_data = file.read() response = requests.post(INVOCATIONS_URL, data=large_data, headers=headers).json() - predictions = response['predictions'] + predictions = response["predictions"] assert len(predictions) == 753936 def test_csv_input(): - headers = make_headers('text/csv', 'predict') - data = '1.0,2.0,5.0' + headers = make_headers("text/csv", "predict") + data = "1.0,2.0,5.0" response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json() - assert response == {'predictions': [3.5, 4.0, 5.5]} + assert response == {"predictions": [3.5, 4.0, 5.5]} def test_unsupported_content_type(): - headers = make_headers('unsupported-type', 'predict') - data = 'aW1hZ2UgYnl0ZXM=' + headers = make_headers("unsupported-type", "predict") + data = "aW1hZ2UgYnl0ZXM=" response = requests.post(INVOCATIONS_URL, data=data, headers=headers) assert 500 == response.status_code - assert 'unsupported content type' in response.text + assert "unsupported content type" in response.text def test_ping_service(): diff --git a/test/integration/local/test_pre_post_processing_mme.py b/test/integration/local/test_pre_post_processing_mme.py index 2fce6063..f4c8730d 100644 --- a/test/integration/local/test_pre_post_processing_mme.py +++ b/test/integration/local/test_pre_post_processing_mme.py @@ -26,43 +26,43 @@ make_get_model_request, make_load_model_request, make_unload_model_request, make_headers -PING_URL = 'http://localhost:8080/ping' -INVOCATION_URL = 'http://localhost:8080/models/{}/invoke' -MODEL_NAME = 'half_plus_three' +PING_URL = "http://localhost:8080/ping" +INVOCATION_URL = "http://localhost:8080/models/{}/invoke" +MODEL_NAME = "half_plus_three" -@pytest.fixture(scope='module', autouse=True) +@pytest.fixture(scope="module", autouse=True) def volume(tmpdir_factory, request): try: print(str(tmpdir_factory)) - model_dir = os.path.join(tmpdir_factory.mktemp('test'), 'model') - code_dir = os.path.join(model_dir, 'code') - test_example = 'test/resources/examples/test1' + model_dir = os.path.join(tmpdir_factory.mktemp("test"), "model") + code_dir = os.path.join(model_dir, "code") + test_example = "test/resources/examples/test1" - model_src_dir = 'test/resources/models' + model_src_dir = "test/resources/models" shutil.copytree(model_src_dir, model_dir) shutil.copytree(test_example, code_dir) - volume_name = f'model_volume_1' + volume_name = f"model_volume_1" subprocess.check_call( - 'docker volume create --name {} --opt type=none ' - '--opt device={} --opt o=bind'.format(volume_name, model_dir).split()) + "docker volume create --name {} --opt type=none " + "--opt device={} --opt o=bind".format(volume_name, model_dir).split()) yield volume_name finally: - subprocess.check_call(f'docker volume rm {volume_name}'.split()) + subprocess.check_call(f"docker volume rm {volume_name}".split()) -@pytest.fixture(scope='module', autouse=True) +@pytest.fixture(scope="module", autouse=True) def container(volume, docker_base_name, tag, runtime_config): try: command = ( - 'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080' - ' --mount type=volume,source={},target=/opt/ml/models,readonly' - ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info' - ' -e SAGEMAKER_BIND_TO_PORT=8080' - ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999' - ' -e SAGEMAKER_MULTI_MODEL=True' - ' {}:{} serve' + "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" + " --mount type=volume,source={},target=/opt/ml/models,readonly" + " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" + " -e SAGEMAKER_BIND_TO_PORT=8080" + " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" + " -e SAGEMAKER_MULTI_MODEL=True" + " {}:{} serve" ).format(runtime_config, volume, docker_base_name, tag) proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) @@ -71,7 +71,7 @@ def container(volume, docker_base_name, tag, runtime_config): while attempts < 40: time.sleep(3) try: - res_code = requests.get('http://localhost:8080/ping').status_code + res_code = requests.get("http://localhost:8080/ping").status_code if res_code == 200: break except: @@ -80,14 +80,14 @@ def container(volume, docker_base_name, tag, runtime_config): yield proc.pid finally: - subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split()) + subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) @pytest.fixture def model(): model_data = { - 'model_name': MODEL_NAME, - 'url': '/opt/ml/models/half_plus_three' + "model_name": MODEL_NAME, + "url": "/opt/ml/models/half_plus_three" } make_load_model_request(json.dumps(model_data)) return MODEL_NAME @@ -100,40 +100,40 @@ def test_ping_service(): def test_predict_json(model): headers = make_headers() - data = '{"instances": [1.0, 2.0, 5.0]}' + data = "{'instances': [1.0, 2.0, 5.0]}" response = requests.post(INVOCATION_URL.format(model), data=data, headers=headers).json() - assert response == {'predictions': [3.5, 4.0, 5.5]} + assert response == {"predictions": [3.5, 4.0, 5.5]} def test_zero_content(): headers = make_headers() - x = '' + x = "" response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers) assert 500 == response.status_code - assert 'document is empty' in response.text + assert "document is empty" in response.text def test_large_input(): - data_file = 'test/resources/inputs/test-large.csv' + data_file = "test/resources/inputs/test-large.csv" - with open(data_file, 'r') as file: + with open(data_file, "r") as file: x = file.read() - headers = make_headers(content_type='text/csv') + headers = make_headers(content_type="text/csv") response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers).json() - predictions = response['predictions'] + predictions = response["predictions"] assert len(predictions) == 753936 def test_csv_input(): - headers = make_headers(content_type='text/csv') - data = '1.0,2.0,5.0' + headers = make_headers(content_type="text/csv") + data = "1.0,2.0,5.0" response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers).json() - assert response == {'predictions': [3.5, 4.0, 5.5]} + assert response == {"predictions": [3.5, 4.0, 5.5]} def test_unsupported_content_type(): - headers = make_headers('unsupported-type', 'predict') - data = 'aW1hZ2UgYnl0ZXM=' + headers = make_headers("unsupported-type", "predict") + data = "aW1hZ2UgYnl0ZXM=" response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers) assert 500 == response.status_code - assert 'unsupported content type' in response.text + assert "unsupported content type" in response.text diff --git a/test/integration/local/test_tfs_batching.py b/test/integration/local/test_tfs_batching.py index 3460e08f..6584848d 100644 --- a/test/integration/local/test_tfs_batching.py +++ b/test/integration/local/test_tfs_batching.py @@ -19,42 +19,42 @@ import pytest -@pytest.fixture(scope='session', autouse=True) +@pytest.fixture(scope="session", autouse=True) def volume(): try: - model_dir = os.path.abspath('test/resources/models') + model_dir = os.path.abspath("test/resources/models") subprocess.check_call( - 'docker volume create --name batching_model_volume --opt type=none ' - '--opt device={} --opt o=bind'.format(model_dir).split()) + "docker volume create --name batching_model_volume --opt type=none " + "--opt device={} --opt o=bind".format(model_dir).split()) yield model_dir finally: - subprocess.check_call('docker volume rm batching_model_volume'.split()) + subprocess.check_call("docker volume rm batching_model_volume".split()) def test_run_tfs_with_batching_parameters(docker_base_name, tag, runtime_config): try: command = ( - 'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080' - ' --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly' - ' -e SAGEMAKER_TFS_ENABLE_BATCHING=true' - ' -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16' - ' -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500' - ' -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100' - ' -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1' - ' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three' - ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info' - ' -e SAGEMAKER_BIND_TO_PORT=8080' - ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999' - ' {}:{} serve' + "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" + " --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly" + " -e SAGEMAKER_TFS_ENABLE_BATCHING=true" + " -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16" + " -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500" + " -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100" + " -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1" + " -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three" + " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" + " -e SAGEMAKER_BIND_TO_PORT=8080" + " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" + " {}:{} serve" ).format(runtime_config, docker_base_name, tag) proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) lines_seen = { - 'max_batch_size { value: 16 }': 0, - 'batch_timeout_micros { value: 500 }': 0, - 'num_batch_threads { value: 100 }': 0, - 'max_enqueued_batches { value: 1 }': 0 + "max_batch_size { value: 16 }": 0, + "batch_timeout_micros { value: 500 }": 0, + "num_batch_threads { value: 100 }": 0, + "max_enqueued_batches { value: 1 }": 0 } for stdout_line in iter(proc.stdout.readline, ""): @@ -68,4 +68,4 @@ def test_run_tfs_with_batching_parameters(docker_base_name, tag, runtime_config) break finally: - subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split()) + subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split()) diff --git a/test/integration/sagemaker/conftest.py b/test/integration/sagemaker/conftest.py index 24b264b9..d4864600 100644 --- a/test/integration/sagemaker/conftest.py +++ b/test/integration/sagemaker/conftest.py @@ -19,105 +19,105 @@ # these regions have some p2 and p3 instances, but not enough for automated testing NO_P2_REGIONS = [ - 'ca-central-1', - 'eu-central-1', - 'eu-west-2', - 'us-west-1', - 'eu-west-3', - 'eu-north-1', - 'sa-east-1', - 'ap-east-1', - 'me-south-1' + "ca-central-1", + "eu-central-1", + "eu-west-2", + "us-west-1", + "eu-west-3", + "eu-north-1", + "sa-east-1", + "ap-east-1", + "me-south-1" ] NO_P3_REGIONS = [ - 'ap-southeast-1', - 'ap-southeast-2', - 'ap-south-1', - 'ca-central-1', - 'eu-central-1', - 'eu-west-2', - 'us-west-1', - 'eu-west-3', - 'eu-north-1', - 'sa-east-1', - 'ap-east-1', - 'me-south-1' + "ap-southeast-1", + "ap-southeast-2", + "ap-south-1", + "a-central-1", + "eu-central-1", + "eu-west-2", + "us-west-1", + "eu-west-3", + "eu-north-1", + "sa-east-1", + "ap-east-1", + "me-south-1" ] def pytest_addoption(parser): - parser.addoption('--region', default='us-west-2') - parser.addoption('--registry') - parser.addoption('--repo') - parser.addoption('--versions') - parser.addoption('--instance-types') - parser.addoption('--accelerator-type') - parser.addoption('--tag') + parser.addoption("--region", default="us-west-2") + parser.addoption("--registry") + parser.addoption("--repo") + parser.addoption("--versions") + parser.addoption("--instance-types") + parser.addoption("--accelerator-type") + parser.addoption("--tag") def pytest_configure(config): - os.environ['TEST_REGION'] = config.getoption('--region') - os.environ['TEST_VERSIONS'] = config.getoption('--versions') or '1.11.1,1.12.0,1.13.0' - os.environ['TEST_INSTANCE_TYPES'] = (config.getoption('--instance-types') or - 'ml.m5.xlarge,ml.p2.xlarge') + os.environ["TEST_REGION"] = config.getoption("--region") + os.environ["TEST_VERSIONS"] = config.getoption("--versions") or "1.11.1,1.12.0,1.13.0" + os.environ["TEST_INSTANCE_TYPES"] = (config.getoption("--instance-types") or + "ml.m5.xlarge,ml.p2.xlarge") - os.environ['TEST_EI_VERSIONS'] = config.getoption('--versions') or '1.11,1.12' - os.environ['TEST_EI_INSTANCE_TYPES'] = (config.getoption('--instance-types') or - 'ml.m5.xlarge') + os.environ["TEST_EI_VERSIONS"] = config.getoption("--versions") or "1.11,1.12" + os.environ["TEST_EI_INSTANCE_TYPES"] = (config.getoption("--instance-types") or + "ml.m5.xlarge") - if config.getoption('--tag'): - os.environ['TEST_VERSIONS'] = config.getoption('--tag') - os.environ['TEST_EI_VERSIONS'] = config.getoption('--tag') + if config.getoption("--tag"): + os.environ["TEST_VERSIONS"] = config.getoption("--tag") + os.environ["TEST_EI_VERSIONS"] = config.getoption("--tag") -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def region(request): - return request.config.getoption('--region') + return request.config.getoption("--region") -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def registry(request, region): - if request.config.getoption('--registry'): - return request.config.getoption('--registry') + if request.config.getoption("--registry"): + return request.config.getoption("--registry") sts = boto3.client( - 'sts', + "sts", region_name=region, - endpoint_url='https://sts.{}.amazonaws.com'.format(region) + endpoint_url="https://sts.{}.amazonaws.com".format(region) ) - return sts.get_caller_identity()['Account'] + return sts.get_caller_identity()["Account"] -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def boto_session(region): return boto3.Session(region_name=region) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def sagemaker_client(boto_session): - return boto_session.client('sagemaker') + return boto_session.client("sagemaker") -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def sagemaker_runtime_client(boto_session): - return boto_session.client('runtime.sagemaker') + return boto_session.client("runtime.sagemaker") def unique_name_from_base(base, max_length=63): - unique = '%04x' % random.randrange(16 ** 4) # 4-digit hex + unique = "%04x" % random.randrange(16 ** 4) # 4-digit hex ts = str(int(time.time())) available_length = max_length - 2 - len(ts) - len(unique) trimmed = base[:available_length] - return '{}-{}-{}'.format(trimmed, ts, unique) + return "{}-{}-{}".format(trimmed, ts, unique) @pytest.fixture def model_name(): - return unique_name_from_base('test-tfs') + return unique_name_from_base("test-tfs") @pytest.fixture(autouse=True) def skip_gpu_instance_restricted_regions(region, instance_type): - if (region in NO_P2_REGIONS and instance_type.startswith('ml.p2')) or \ - (region in NO_P3_REGIONS and instance_type.startswith('ml.p3')): - pytest.skip('Skipping GPU test in region {}'.format(region)) + if (region in NO_P2_REGIONS and instance_type.startswith("ml.p2")) or \ + (region in NO_P3_REGIONS and instance_type.startswith("ml.p3")): + pytest.skip("Skipping GPU test in region {}".format(region)) diff --git a/test/integration/sagemaker/test_ei.py b/test/integration/sagemaker/test_ei.py index e91bad11..c7244e80 100644 --- a/test/integration/sagemaker/test_ei.py +++ b/test/integration/sagemaker/test_ei.py @@ -17,23 +17,23 @@ import util -EI_SUPPORTED_REGIONS = ['us-east-1', 'us-east-2', 'us-west-2', - 'eu-west-1', 'ap-northeast-1', 'ap-northeast-2'] +EI_SUPPORTED_REGIONS = ["us-east-1", "us-east-2", "us-west-2", + "eu-west-1", "ap-northeast-1", "ap-northeast-2"] -@pytest.fixture(params=os.environ['TEST_EI_VERSIONS'].split(',')) +@pytest.fixture(params=os.environ["TEST_EI_VERSIONS"].split(",")) def version(request): return request.param @pytest.fixture def repo(request): - return request.config.getoption('--repo') or 'sagemaker-tensorflow-serving-eia' + return request.config.getoption("--repo") or "sagemaker-tensorflow-serving-eia" @pytest.fixture def tag(request, version): - return request.config.getoption('--tag') or f'{version}-cpu' + return request.config.getoption("--tag") or f"{version}-cpu" @pytest.fixture @@ -41,37 +41,37 @@ def image_uri(registry, region, repo, tag): return util.image_uri(registry, region, repo, tag) -@pytest.fixture(params=os.environ['TEST_EI_INSTANCE_TYPES'].split(',')) +@pytest.fixture(params=os.environ["TEST_EI_INSTANCE_TYPES"].split(",")) def instance_type(request, region): return request.param -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def accelerator_type(request): - return request.config.getoption('--accelerator-type') or 'ml.eia1.medium' + return request.config.getoption("--accelerator-type") or "ml.eia1.medium" -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def model_data(region): - return ('s3://sagemaker-sample-data-{}/tensorflow/model' - '/resnet/resnet_50_v2_fp32_NCHW.tar.gz').format(region) + return ("s3://sagemaker-sample-data-{}/tensorflow/model" + "/resnet/resnet_50_v2_fp32_NCHW.tar.gz").format(region) @pytest.fixture def input_data(): - return {'instances': [[[[random.random() for _ in range(3)] for _ in range(3)]]]} + return {"instances": [[[[random.random() for _ in range(3)] for _ in range(3)]]]} @pytest.fixture def skip_if_no_accelerator(accelerator_type): if accelerator_type is None: - pytest.skip('Skipping because accelerator type was not provided') + pytest.skip("Skipping because accelerator type was not provided") @pytest.fixture def skip_if_non_supported_ei_region(region): if region not in EI_SUPPORTED_REGIONS: - pytest.skip('EI is not supported in {}'.format(region)) + pytest.skip("EI is not supported in {}".format(region)) @pytest.mark.skip_if_non_supported_ei_region() diff --git a/test/integration/sagemaker/test_tfs.py b/test/integration/sagemaker/test_tfs.py index 436351d8..2f67c0e9 100644 --- a/test/integration/sagemaker/test_tfs.py +++ b/test/integration/sagemaker/test_tfs.py @@ -16,27 +16,27 @@ import util -NON_P3_REGIONS = ['ap-southeast-1', 'ap-southeast-2', 'ap-south-1', - 'ca-central-1', 'eu-central-1', 'eu-west-2', 'us-west-1'] +NON_P3_REGIONS = ["ap-southeast-1", "ap-southeast-2", "ap-south-1", + "ca-central-1", "eu-central-1", "eu-west-2", "us-west-1"] -@pytest.fixture(params=os.environ['TEST_VERSIONS'].split(',')) +@pytest.fixture(params=os.environ["TEST_VERSIONS"].split(",")) def version(request): return request.param -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def repo(request): - return request.config.getoption('--repo') or 'sagemaker-tensorflow-serving' + return request.config.getoption("--repo") or "sagemaker-tensorflow-serving" @pytest.fixture def tag(request, version, instance_type): - if request.config.getoption('--tag'): - return request.config.getoption('--tag') + if request.config.getoption("--tag"): + return request.config.getoption("--tag") - arch = 'gpu' if instance_type.startswith('ml.p') else 'cpu' - return f'{version}-{arch}' + arch = "gpu" if instance_type.startswith("ml.p") else "cpu" + return f"{version}-{arch}" @pytest.fixture @@ -44,21 +44,21 @@ def image_uri(registry, region, repo, tag): return util.image_uri(registry, region, repo, tag) -@pytest.fixture(params=os.environ['TEST_INSTANCE_TYPES'].split(',')) +@pytest.fixture(params=os.environ["TEST_INSTANCE_TYPES"].split(",")) def instance_type(request, region): return request.param -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def accelerator_type(): return None -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def tfs_model(region, boto_session): return util.find_or_put_model_data(region, boto_session, - 'test/data/tfs-model.tar.gz') + "test/data/tfs-model.tar.gz") @pytest.fixture(scope='session') diff --git a/test/integration/sagemaker/util.py b/test/integration/sagemaker/util.py index 9118fad7..f5247b17 100644 --- a/test/integration/sagemaker/util.py +++ b/test/integration/sagemaker/util.py @@ -20,15 +20,15 @@ import time logger = logging.getLogger(__name__) -BATCH_CSV = os.path.join('test', 'data', 'batch.csv') +BATCH_CSV = os.path.join("test", "data", "batch.csv") def image_uri(registry, region, repo, tag): - return f'{registry}.dkr.ecr.{region}.amazonaws.com/{repo}:{tag}' + return f"{registry}.dkr.ecr.{region}.amazonaws.com/{repo}:{tag}" def _execution_role(boto_session): - return boto_session.resource('iam').Role('SageMakerRole').arn + return boto_session.resource("iam").Role("SageMakerRole").arn @contextlib.contextmanager @@ -37,81 +37,81 @@ def sagemaker_model(boto_session, sagemaker_client, image_uri, model_name, model ModelName=model_name, ExecutionRoleArn=_execution_role(boto_session), PrimaryContainer={ - 'Image': image_uri, - 'ModelDataUrl': model_data + "Image": image_uri, + "ModelDataUrl": model_data }) try: yield model finally: - logger.info('deleting model %s', model_name) + logger.info("deleting model %s", model_name) sagemaker_client.delete_model(ModelName=model_name) def _production_variants(model_name, instance_type, accelerator_type): production_variants = [{ - 'VariantName': 'AllTraffic', - 'ModelName': model_name, - 'InitialInstanceCount': 1, - 'InstanceType': instance_type + "VariantName": "AllTraffic", + "ModelName": model_name, + "InitialInstanceCount": 1, + "InstanceType": instance_type }] if accelerator_type: - production_variants[0]['AcceleratorType'] = accelerator_type + production_variants[0]["AcceleratorType"] = accelerator_type return production_variants def _test_bucket(region, boto_session): sts = boto_session.client( - 'sts', + "sts", region_name=region, - endpoint_url='https://sts.{}.amazonaws.com'.format(region) + endpoint_url="https://sts.{}.amazonaws.com".format(region) ) - account = sts.get_caller_identity()['Account'] - return f'sagemaker-{region}-{account}' + account = sts.get_caller_identity()["Account"] + return f"sagemaker-{region}-{account}" def find_or_put_model_data(region, boto_session, local_path): model_file = os.path.basename(local_path) bucket = _test_bucket(region, boto_session) - key = f'test-tfs/{model_file}' + key = f"test-tfs/{model_file}" - s3 = boto_session.client('s3', region) + s3 = boto_session.client("s3", region) try: s3.head_bucket(Bucket=bucket) except botocore.exceptions.ClientError as e: - if e.response['Error']['Code'] != '404': + if e.response["Error"]["Code"] != "404": raise # bucket doesn't exist, create it - if region == 'us-east-1': + if region == "us-east-1": s3.create_bucket(Bucket=bucket) else: s3.create_bucket(Bucket=bucket, - CreateBucketConfiguration={'LocationConstraint': region}) + CreateBucketConfiguration={"LocationConstraint": region}) try: s3.head_object(Bucket=bucket, Key=key) except botocore.exceptions.ClientError as e: - if e.response['Error']['Code'] != '404': + if e.response["Error"]["Code"] != "404": raise # file doesn't exist - upload it s3.upload_file(local_path, bucket, key) - return f's3://{bucket}/{key}' + return f"s3://{bucket}/{key}" @contextlib.contextmanager def sagemaker_endpoint(sagemaker_client, model_name, instance_type, accelerator_type=None): - logger.info('creating endpoint %s', model_name) + logger.info("creating endpoint %s", model_name) # Add jitter so we can run tests in parallel without running into service side limits. delay = round(random.random()*5, 3) - logger.info('waiting for {} seconds'.format(delay)) + logger.info("waiting for {} seconds".format(delay)) time.sleep(delay) production_variants = _production_variants(model_name, instance_type, accelerator_type) @@ -121,74 +121,74 @@ def sagemaker_endpoint(sagemaker_client, model_name, instance_type, accelerator_ sagemaker_client.create_endpoint(EndpointName=model_name, EndpointConfigName=model_name) try: - sagemaker_client.get_waiter('endpoint_in_service').wait(EndpointName=model_name) + sagemaker_client.get_waiter("endpoint_in_service").wait(EndpointName=model_name) finally: - status = sagemaker_client.describe_endpoint(EndpointName=model_name)['EndpointStatus'] - if status != 'InService': - raise ValueError(f'failed to create endpoint {model_name}') + status = sagemaker_client.describe_endpoint(EndpointName=model_name)["EndpointStatus"] + if status != "InService": + raise ValueError(f"failed to create endpoint {model_name}") try: yield model_name # return the endpoint name finally: - logger.info('deleting endpoint and endpoint config %s', model_name) + logger.info("deleting endpoint and endpoint config %s", model_name) sagemaker_client.delete_endpoint(EndpointName=model_name) sagemaker_client.delete_endpoint_config(EndpointConfigName=model_name) def _create_transform_job_request(model_name, batch_output, batch_input, instance_type): return { - 'TransformJobName': model_name, - 'ModelName': model_name, - 'BatchStrategy': 'MultiRecord', - 'TransformOutput': { - 'S3OutputPath': batch_output + "TransformJobName": model_name, + "ModelName": model_name, + "BatchStrategy": "MultiRecord", + "TransformOutput": { + "S3OutputPath": batch_output }, - 'TransformInput': { - 'DataSource': { - 'S3DataSource': { - 'S3DataType': 'S3Prefix', - 'S3Uri': batch_input + "TransformInput": { + "DataSource": { + "S3DataSource": { + "S3DataType": "S3Prefix", + "S3Uri": batch_input } }, - 'ContentType': 'text/csv', - 'SplitType': 'Line', - 'CompressionType': 'None' + "ContentType": "text/csv", + "SplitType": "Line", + "CompressionType": "None" }, - 'TransformResources': { - 'InstanceType': instance_type, - 'InstanceCount': 1 + "TransformResources": { + "InstanceType": instance_type, + "InstanceCount": 1 } } def _read_batch_output(region, boto_session, bucket, model_name): - s3 = boto_session.client('s3', region) - output_file = f'/tmp/{model_name}.out' - s3.download_file(bucket, f'output/{model_name}/batch.csv.out', output_file) - return json.loads(open(output_file, 'r').read())['predictions'] + s3 = boto_session.client("s3", region) + output_file = f"/tmp/{model_name}.out" + s3.download_file(bucket, f"output/{model_name}/batch.csv.out", output_file) + return json.loads(open(output_file, "r").read())["predictions"] def _wait_for_transform_job(region, boto_session, sagemaker_client, model_name, poll, timeout): - status = sagemaker_client.describe_transform_job(TransformJobName=model_name)['TransformJobStatus'] + status = sagemaker_client.describe_transform_job(TransformJobName=model_name)["TransformJobStatus"] job_runtime = 0 - while status == 'InProgress': + while status == "InProgress": - logger.info(f'Waiting for batch transform job {model_name} to finish') + logger.info(f"Waiting for batch transform job {model_name} to finish") time.sleep(poll) job_runtime += poll if job_runtime > timeout: - raise ValueError(f'Batch transform job {model_name} exceeded maximum runtime {timeout} seconds') + raise ValueError(f"Batch transform job {model_name} exceeded maximum runtime {timeout} seconds") - status = sagemaker_client.describe_transform_job(TransformJobName=model_name)['TransformJobStatus'] - if status == 'Completed': + status = sagemaker_client.describe_transform_job(TransformJobName=model_name)["TransformJobStatus"] + if status == "Completed": return _read_batch_output(region=region, boto_session=boto_session, bucket=_test_bucket(region, boto_session), model_name=model_name) - if status == 'Failed': - raise ValueError(f'Failed to execute batch transform job {model_name}') - if status in ['Stopped', 'Stopping']: - raise ValueError(f'Batch transform job {model_name} was stopped') + if status == "Failed": + raise ValueError(f"Failed to execute batch transform job {model_name}") + if status in ["Stopped", "Stopping"]: + raise ValueError(f"Batch transform job {model_name} was stopped") def run_batch_transform_job(region, boto_session, model_data, image_uri, @@ -198,7 +198,7 @@ def run_batch_transform_job(region, boto_session, model_data, image_uri, with sagemaker_model(boto_session, sagemaker_client, image_uri, model_name, model_data): batch_input = find_or_put_model_data(region, boto_session, BATCH_CSV) bucket = _test_bucket(region, boto_session) - batch_output = f's3://{bucket}/output/{model_name}' + batch_output = f"s3://{bucket}/output/{model_name}" request = _create_transform_job_request( model_name=model_name, batch_input=batch_input, @@ -216,10 +216,10 @@ def run_batch_transform_job(region, boto_session, model_data, image_uri, def invoke_endpoint(sagemaker_runtime_client, endpoint_name, input_data): response = sagemaker_runtime_client.invoke_endpoint(EndpointName=endpoint_name, - ContentType='application/json', + ContentType="application/json", Body=json.dumps(input_data)) - result = json.loads(response['Body'].read().decode()) - assert result['predictions'] is not None + result = json.loads(response["Body"].read().decode()) + assert result["predictions"] is not None return result From c2dc3563a601ae2091b5cce28119a59bff334ccc Mon Sep 17 00:00:00 2001 From: Chuyang Deng Date: Tue, 30 Jun 2020 17:33:42 -0700 Subject: [PATCH 2/5] fix: change single-quotes to double-quotes --- docker/2.1/__init__.py | 0 docker/2.1/deep_learning_container.py | 109 ----- docker/2.1/dockerd-entrypoint.py | 22 - docker/2.1/sagemaker/__init__.py | 12 - docker/2.1/sagemaker/multi_model_utils.py | 52 --- docker/2.1/sagemaker/nginx.conf.template | 64 --- docker/2.1/sagemaker/python_service.py | 397 ------------------ docker/2.1/sagemaker/serve | 3 - docker/2.1/sagemaker/serve.py | 308 -------------- docker/2.1/sagemaker/tensorflow-serving.js | 231 ---------- docker/2.1/sagemaker/tfs_utils.py | 209 --------- docker/build_artifacts/sagemaker/serve.py | 2 +- .../sagemaker/tensorflow-serving.js | 84 ++-- .../local/multi_model_endpoint_test_utils.py | 11 +- test/integration/local/test_container.py | 4 +- .../local/test_pre_post_processing.py | 2 +- .../local/test_pre_post_processing_mme.py | 2 +- 17 files changed, 52 insertions(+), 1460 deletions(-) delete mode 100644 docker/2.1/__init__.py delete mode 100644 docker/2.1/deep_learning_container.py delete mode 100644 docker/2.1/dockerd-entrypoint.py delete mode 100644 docker/2.1/sagemaker/__init__.py delete mode 100644 docker/2.1/sagemaker/multi_model_utils.py delete mode 100644 docker/2.1/sagemaker/nginx.conf.template delete mode 100644 docker/2.1/sagemaker/python_service.py delete mode 100755 docker/2.1/sagemaker/serve delete mode 100644 docker/2.1/sagemaker/serve.py delete mode 100644 docker/2.1/sagemaker/tensorflow-serving.js delete mode 100644 docker/2.1/sagemaker/tfs_utils.py diff --git a/docker/2.1/__init__.py b/docker/2.1/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/docker/2.1/deep_learning_container.py b/docker/2.1/deep_learning_container.py deleted file mode 100644 index 1e82e61e..00000000 --- a/docker/2.1/deep_learning_container.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import re -import json -import logging -import requests - - -def _validate_instance_id(instance_id): - """ - Validate instance ID - """ - instance_id_regex = r"^(i-\S{17})" - compiled_regex = re.compile(instance_id_regex) - match = compiled_regex.match(instance_id) - - if not match: - return None - - return match.group(1) - - -def _retrieve_instance_id(): - """ - Retrieve instance ID from instance metadata service - """ - instance_id = None - url = "http://169.254.169.254/latest/meta-data/instance-id" - response = requests_helper(url, timeout=0.1) - - if response is not None: - instance_id = _validate_instance_id(response.text) - - return instance_id - - -def _retrieve_instance_region(): - """ - Retrieve instance region from instance metadata service - """ - region = None - valid_regions = ['ap-northeast-1', 'ap-northeast-2', 'ap-southeast-1', 'ap-southeast-2', - 'ap-south-1', 'ca-central-1', 'eu-central-1', 'eu-north-1', - 'eu-west-1', 'eu-west-2', 'eu-west-3', 'sa-east-1', - 'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2'] - - url = "http://169.254.169.254/latest/dynamic/instance-identity/document" - response = requests_helper(url, timeout=0.1) - - if response is not None: - response_json = json.loads(response.text) - - if response_json['region'] in valid_regions: - region = response_json['region'] - - return region - - -def query_bucket(): - """ - GET request on an empty object from an Amazon S3 bucket - """ - response = None - instance_id = _retrieve_instance_id() - region = _retrieve_instance_region() - - if instance_id is not None and region is not None: - url = ("https://aws-deep-learning-containers-{0}.s3.{0}.amazonaws.com" - "/dlc-containers.txt?x-instance-id={1}".format(region, instance_id)) - response = requests_helper(url, timeout=0.2) - - logging.debug("Query bucket finished: {}".format(response)) - - return response - - -def requests_helper(url, timeout): - response = None - try: - response = requests.get(url, timeout=timeout) - except requests.exceptions.RequestException as e: - logging.error("Request exception: {}".format(e)) - - return response - - -def main(): - """ - Invoke bucket query - """ - # Logs are not necessary for normal run. Remove this line while debugging. - logging.getLogger().disabled = True - - logging.basicConfig(level=logging.ERROR) - query_bucket() - - -if __name__ == '__main__': - main() diff --git a/docker/2.1/dockerd-entrypoint.py b/docker/2.1/dockerd-entrypoint.py deleted file mode 100644 index fc4ce388..00000000 --- a/docker/2.1/dockerd-entrypoint.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import os.path -import subprocess -import shlex -import sys - -if not os.path.exists("/opt/ml/input/config"): - subprocess.call(["python", "/usr/local/bin/deep_learning_container.py", "&>/dev/null", "&"]) - -subprocess.check_call(shlex.split(' '.join(sys.argv[1:]))) diff --git a/docker/2.1/sagemaker/__init__.py b/docker/2.1/sagemaker/__init__.py deleted file mode 100644 index 04fbf5d9..00000000 --- a/docker/2.1/sagemaker/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. diff --git a/docker/2.1/sagemaker/multi_model_utils.py b/docker/2.1/sagemaker/multi_model_utils.py deleted file mode 100644 index 5d2c47f4..00000000 --- a/docker/2.1/sagemaker/multi_model_utils.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import fcntl -import signal -import time -from contextlib import contextmanager - -MODEL_CONFIG_FILE = "/sagemaker/model-config.cfg" -DEFAULT_LOCK_FILE = "/sagemaker/lock-file.lock" - - -@contextmanager -def lock(path=DEFAULT_LOCK_FILE): - f = open(path, "w") - fd = f.fileno() - fcntl.lockf(fd, fcntl.LOCK_EX) - - try: - yield - finally: - time.sleep(1) - fcntl.lockf(fd, fcntl.LOCK_UN) - - -@contextmanager -def timeout(seconds=60): - def _raise_timeout_error(signum, frame): - raise Exception(408, "Timed out after {} seconds".format(seconds)) - - try: - signal.signal(signal.SIGALRM, _raise_timeout_error) - signal.alarm(seconds) - yield - finally: - signal.alarm(0) - - -class MultiModelException(Exception): - def __init__(self, code, msg): - Exception.__init__(self, code, msg) - self.code = code - self.msg = msg diff --git a/docker/2.1/sagemaker/nginx.conf.template b/docker/2.1/sagemaker/nginx.conf.template deleted file mode 100644 index 5ccfed3d..00000000 --- a/docker/2.1/sagemaker/nginx.conf.template +++ /dev/null @@ -1,64 +0,0 @@ -load_module modules/ngx_http_js_module.so; - -worker_processes auto; -daemon off; -pid /tmp/nginx.pid; -error_log /dev/stderr %NGINX_LOG_LEVEL%; - -worker_rlimit_nofile 4096; - -events { - worker_connections 2048; -} - -http { - include /etc/nginx/mime.types; - default_type application/json; - access_log /dev/stdout combined; - js_include tensorflow-serving.js; - - upstream tfs_upstream { - server localhost:%TFS_REST_PORT%; - } - - upstream gunicorn_upstream { - server unix:/tmp/gunicorn.sock fail_timeout=1; - } - - server { - listen %NGINX_HTTP_PORT% deferred; - client_max_body_size 0; - client_body_buffer_size 100m; - subrequest_output_buffer_size 100m; - - set $tfs_version %TFS_VERSION%; - set $default_tfs_model %TFS_DEFAULT_MODEL_NAME%; - - location /tfs { - rewrite ^/tfs/(.*) /$1 break; - proxy_redirect off; - proxy_pass_request_headers off; - proxy_set_header Content-Type 'application/json'; - proxy_set_header Accept 'application/json'; - proxy_pass http://tfs_upstream; - } - - location /ping { - %FORWARD_PING_REQUESTS%; - } - - location /invocations { - %FORWARD_INVOCATION_REQUESTS%; - } - - location /models { - proxy_pass http://gunicorn_upstream/models; - } - - location / { - return 404 '{"error": "Not Found"}'; - } - - keepalive_timeout 3; - } -} diff --git a/docker/2.1/sagemaker/python_service.py b/docker/2.1/sagemaker/python_service.py deleted file mode 100644 index 0014b6dd..00000000 --- a/docker/2.1/sagemaker/python_service.py +++ /dev/null @@ -1,397 +0,0 @@ -# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import bisect -import importlib.util -import json -import logging -import os -import subprocess -import time - -import falcon -import requests - -from multi_model_utils import lock, timeout, MultiModelException -import tfs_utils - -SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get("SAGEMAKER_MULTI_MODEL", "false").lower() == "true" -INFERENCE_SCRIPT_PATH = "/opt/ml/{}/code/inference.py".format("models" - if SAGEMAKER_MULTI_MODEL_ENABLED - else "model") -PYTHON_PROCESSING_ENABLED = os.path.exists(INFERENCE_SCRIPT_PATH) -SAGEMAKER_BATCHING_ENABLED = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower() -MODEL_CONFIG_FILE_PATH = "/sagemaker/model-config.cfg" -TFS_GRPC_PORT = os.environ.get("TFS_GRPC_PORT") -TFS_REST_PORT = os.environ.get("TFS_REST_PORT") -SAGEMAKER_TFS_PORT_RANGE = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE") - - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger(__name__) - -CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes" - - -def default_handler(data, context): - """A default inference request handler that directly send post request to TFS rest port with - un-processed data and return un-processed response - - :param data: input data - :param context: context instance that contains tfs_rest_uri - :return: inference response from TFS model server - """ - response = requests.post(context.rest_uri, data=data) - return response.content, context.accept_header - - -class PythonServiceResource: - - def __init__(self): - if SAGEMAKER_MULTI_MODEL_ENABLED: - self._model_tfs_rest_port = {} - self._model_tfs_grpc_port = {} - self._model_tfs_pid = {} - self._tfs_ports = self._parse_sagemaker_port_range(SAGEMAKER_TFS_PORT_RANGE) - else: - self._tfs_grpc_port = TFS_GRPC_PORT - self._tfs_rest_port = TFS_REST_PORT - - self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true" - self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None") - - if PYTHON_PROCESSING_ENABLED: - self._handler, self._input_handler, self._output_handler = self._import_handlers() - self._handlers = self._make_handler(self._handler, - self._input_handler, - self._output_handler) - else: - self._handlers = default_handler - - def on_post(self, req, res, model_name=None): - log.info(req.uri) - if model_name or "invocations" in req.uri: - self._handle_invocation_post(req, res, model_name) - else: - data = json.loads(req.stream.read().decode("utf-8")) - self._handle_load_model_post(res, data) - - def _parse_sagemaker_port_range(self, port_range): - lower, upper = port_range.split('-') - lower = int(lower) - upper = lower + int((int(upper) - lower) * 0.9) # only utilizing 90% of the ports - rest_port = lower - grpc_port = (lower + upper) // 2 - tfs_ports = { - "rest_port": [port for port in range(rest_port, grpc_port)], - "grpc_port": [port for port in range(grpc_port, upper)], - } - return tfs_ports - - def _ports_available(self): - with lock(): - rest_ports = self._tfs_ports["rest_port"] - grpc_ports = self._tfs_ports["grpc_port"] - return len(rest_ports) > 0 and len(grpc_ports) > 0 - - def _handle_load_model_post(self, res, data): # noqa: C901 - model_name = data["model_name"] - base_path = data["url"] - - # model is already loaded - if model_name in self._model_tfs_pid: - res.status = falcon.HTTP_409 - res.body = json.dumps({ - "error": "Model {} is already loaded.".format(model_name) - }) - - # check if there are available ports - if not self._ports_available(): - res.status = falcon.HTTP_507 - res.body = json.dumps({ - "error": "Memory exhausted: no available ports to load the model." - }) - with lock(): - self._model_tfs_rest_port[model_name] = self._tfs_ports["rest_port"].pop() - self._model_tfs_grpc_port[model_name] = self._tfs_ports["grpc_port"].pop() - - # validate model files are in the specified base_path - if self.validate_model_dir(base_path): - try: - tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path) - tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format(model_name) - log.info("tensorflow serving model config: \n%s\n", tfs_config) - os.makedirs(os.path.dirname(tfs_config_file)) - with open(tfs_config_file, "w") as f: - f.write(tfs_config) - - batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format( - model_name) - if self._tfs_enable_batching: - tfs_utils.create_batching_config(batching_config_file) - - cmd = tfs_utils.tfs_command( - self._model_tfs_grpc_port[model_name], - self._model_tfs_rest_port[model_name], - tfs_config_file, - self._tfs_enable_batching, - batching_config_file, - ) - p = subprocess.Popen(cmd.split()) - self._wait_for_model(model_name) - - log.info("started tensorflow serving (pid: %d)", p.pid) - # update model name <-> tfs pid map - self._model_tfs_pid[model_name] = p - - res.status = falcon.HTTP_200 - res.body = json.dumps({ - "success": - "Successfully loaded model {}, " - "listening on rest port {} " - "and grpc port {}.".format(model_name, - self._model_tfs_rest_port, - self._model_tfs_grpc_port,) - }) - except MultiModelException as multi_model_exception: - self._cleanup_config_file(tfs_config_file) - self._cleanup_config_file(batching_config_file) - if multi_model_exception.code == 409: - res.status = falcon.HTTP_409 - res.body = multi_model_exception.msg - elif multi_model_exception.code == 408: - res.status = falcon.HTTP_408 - res.body = multi_model_exception.msg - else: - raise MultiModelException(falcon.HTTP_500, multi_model_exception.msg) - except FileExistsError as e: - res.status = falcon.HTTP_409 - res.body = json.dumps({ - "error": "Model {} is already loaded. {}".format(model_name, str(e)) - }) - except OSError as os_error: - self._cleanup_config_file(tfs_config_file) - self._cleanup_config_file(batching_config_file) - if os_error.errno == 12: - raise MultiModelException(falcon.HTTP_507, - "Memory exhausted: " - "not enough memory to start TFS instance") - else: - raise MultiModelException(falcon.HTTP_500, os_error.strerror) - else: - res.status = falcon.HTTP_404 - res.body = json.dumps({ - "error": - "Could not find valid base path {} for servable {}".format(base_path, - model_name) - }) - - def _cleanup_config_file(self, config_file): - if os.path.exists(config_file): - os.remove(config_file) - - def _wait_for_model(self, model_name): - url = "http://localhost:{}/v1/models/{}".format(self._model_tfs_rest_port[model_name], - model_name) - with timeout(): - while True: - time.sleep(0.5) - try: - response = requests.get(url) - if response.status_code == 200: - versions = json.loads(response.content)["model_version_status"] - if all(version["state"] == "AVAILABLE" for version in versions): - break - except ConnectionError: - log.exception("Failed to load models.") - - def _handle_invocation_post(self, req, res, model_name=None): - if SAGEMAKER_MULTI_MODEL_ENABLED: - if model_name: - if model_name not in self._model_tfs_rest_port: - res.status = falcon.HTTP_404 - res.body = json.dumps({ - "error": "Model {} is not loaded yet.".format(model_name) - }) - return - else: - log.info("model name: {}".format(model_name)) - rest_port = self._model_tfs_rest_port[model_name] - log.info("rest port: {}".format(str(self._model_tfs_rest_port[model_name]))) - grpc_port = self._model_tfs_grpc_port[model_name] - log.info("grpc port: {}".format(str(self._model_tfs_grpc_port[model_name]))) - data, context = tfs_utils.parse_request(req, rest_port, grpc_port, - self._tfs_default_model_name, - model_name) - else: - res.status = falcon.HTTP_400 - res.body = json.dumps({ - "error": "Invocation request does not contain model name." - }) - else: - data, context = tfs_utils.parse_request(req, self._tfs_rest_port, self._tfs_grpc_port, - self._tfs_default_model_name) - - try: - res.status = falcon.HTTP_200 - res.body, res.content_type = self._handlers(data, context) - except Exception as e: # pylint: disable=broad-except - log.exception("exception handling request: {}".format(e)) - res.status = falcon.HTTP_500 - res.body = json.dumps({ - "error": str(e) - }).encode("utf-8") # pylint: disable=E1101 - - def _import_handlers(self): - spec = importlib.util.spec_from_file_location("inference", INFERENCE_SCRIPT_PATH) - inference = importlib.util.module_from_spec(spec) - spec.loader.exec_module(inference) - - _custom_handler, _custom_input_handler, _custom_output_handler = None, None, None - if hasattr(inference, "handler"): - _custom_handler = inference.handler - elif hasattr(inference, "input_handler") and hasattr(inference, "output_handler"): - _custom_input_handler = inference.input_handler - _custom_output_handler = inference.output_handler - else: - raise NotImplementedError("Handlers are not implemented correctly in user script.") - - return _custom_handler, _custom_input_handler, _custom_output_handler - - def _make_handler(self, custom_handler, custom_input_handler, custom_output_handler): - if custom_handler: - return custom_handler - - def handler(data, context): - processed_input = custom_input_handler(data, context) - response = requests.post(context.rest_uri, data=processed_input) - return custom_output_handler(response, context) - - return handler - - def on_get(self, req, res, model_name=None): # pylint: disable=W0613 - if model_name is None: - models_info = {} - uri = "http://localhost:{}/v1/models/{}" - for model, port in self._model_tfs_rest_port.items(): - try: - info = json.loads(requests.get(uri.format(port, model)).content) - models_info[model] = info - except ValueError as e: - log.exception("exception handling request: {}".format(e)) - res.status = falcon.HTTP_500 - res.body = json.dumps({ - "error": str(e) - }).encode("utf-8") - res.status = falcon.HTTP_200 - res.body = json.dumps(models_info) - else: - if model_name not in self._model_tfs_rest_port: - res.status = falcon.HTTP_404 - res.body = json.dumps({ - "error": "Model {} is loaded yet.".format(model_name) - }).encode("utf-8") - else: - port = self._model_tfs_rest_port[model_name] - uri = "http://localhost:{}/v1/models/{}".format(port, model_name) - try: - info = requests.get(uri) - res.status = falcon.HTTP_200 - res.body = json.dumps({ - "model": info - }).encode("utf-8") - except ValueError as e: - log.exception("exception handling GET models request.") - res.status = falcon.HTTP_500 - res.body = json.dumps({ - "error": str(e) - }).encode("utf-8") - - def on_delete(self, req, res, model_name): # pylint: disable=W0613 - if model_name not in self._model_tfs_pid: - res.status = falcon.HTTP_404 - res.body = json.dumps({ - "error": "Model {} is not loaded yet".format(model_name) - }) - else: - try: - self._model_tfs_pid[model_name].kill() - os.remove("/sagemaker/tfs-config/{}/model-config.cfg".format(model_name)) - os.rmdir("/sagemaker/tfs-config/{}".format(model_name)) - release_rest_port = self._model_tfs_rest_port[model_name] - release_grpc_port = self._model_tfs_grpc_port[model_name] - with lock(): - bisect.insort(self._tfs_ports["rest_port"], release_rest_port) - bisect.insort(self._tfs_ports["grpc_port"], release_grpc_port) - del self._model_tfs_rest_port[model_name] - del self._model_tfs_grpc_port[model_name] - del self._model_tfs_pid[model_name] - res.status = falcon.HTTP_200 - res.body = json.dumps({ - "success": "Successfully unloaded model {}.".format(model_name) - }) - except OSError as error: - res.status = falcon.HTTP_500 - res.body = json.dumps({ - "error": str(error) - }).encode("utf-8") - - def validate_model_dir(self, model_path): - # model base path doesn't exits - if not os.path.exists(model_path): - return False - versions = [] - for _, dirs, _ in os.walk(model_path): - for dirname in dirs: - log.info("dirname: {}".format(dirname)) - if dirname.isdigit(): - versions.append(dirname) - return self.validate_model_versions(versions) - - def validate_model_versions(self, versions): - log.info(versions) - if not versions: - return False - for v in versions: - if v.isdigit(): - # TensorFlow model server will succeed with any versions found - # even if there are directories that's not a valid model version, - # the loading will succeed. - return True - return False - - -class PingResource: - def on_get(self, req, res): # pylint: disable=W0613 - res.status = falcon.HTTP_200 - - -class ServiceResources: - def __init__(self): - self._enable_python_processing = PYTHON_PROCESSING_ENABLED - self._enable_model_manager = SAGEMAKER_MULTI_MODEL_ENABLED - self._python_service_resource = PythonServiceResource() - self._ping_resource = PingResource() - - def add_routes(self, application): - application.add_route("/ping", self._ping_resource) - application.add_route("/invocations", self._python_service_resource) - - if self._enable_model_manager: - application.add_route("/models", self._python_service_resource) - application.add_route("/models/{model_name}", self._python_service_resource) - application.add_route("/models/{model_name}/invoke", self._python_service_resource) - - -app = falcon.API() -resources = ServiceResources() -resources.add_routes(app) diff --git a/docker/2.1/sagemaker/serve b/docker/2.1/sagemaker/serve deleted file mode 100755 index 9fac6a93..00000000 --- a/docker/2.1/sagemaker/serve +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -python3 /sagemaker/serve.py diff --git a/docker/2.1/sagemaker/serve.py b/docker/2.1/sagemaker/serve.py deleted file mode 100644 index 7a539fe6..00000000 --- a/docker/2.1/sagemaker/serve.py +++ /dev/null @@ -1,308 +0,0 @@ -# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import logging -import os -import re -import signal -import subprocess -import tfs_utils - -from contextlib import contextmanager - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger(__name__) - -JS_PING = "js_content ping" -JS_INVOCATIONS = "js_content invocations" -GUNICORN_PING = "proxy_pass http://gunicorn_upstream/ping" -GUNICORN_INVOCATIONS = "proxy_pass http://gunicorn_upstream/invocations" - -PYTHON_LIB_PATH = "/opt/ml/model/code/lib" -REQUIREMENTS_PATH = "/opt/ml/model/code/requirements.txt" -INFERENCE_PATH = "/opt/ml/model/code/inference.py" - - -class ServiceManager(object): - def __init__(self): - self._state = "initializing" - self._nginx = None - self._tfs = None - self._gunicorn = None - self._gunicorn_command = None - self._enable_python_service = os.path.exists(INFERENCE_PATH) - self._tfs_version = os.environ.get("SAGEMAKER_TFS_VERSION", "1.13") - self._nginx_http_port = os.environ.get("SAGEMAKER_BIND_TO_PORT", "8080") - self._nginx_loglevel = os.environ.get("SAGEMAKER_TFS_NGINX_LOGLEVEL", "error") - self._tfs_default_model_name = os.environ.get("SAGEMAKER_TFS_DEFAULT_MODEL_NAME", "None") - self._sagemaker_port_range = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE", None) - self._tfs_config_path = "/sagemaker/model-config.cfg" - self._tfs_batching_config_path = "/sagemaker/batching-config.cfg" - - _enable_batching = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower() - _enable_multi_model_endpoint = os.environ.get("SAGEMAKER_MULTI_MODEL", - "false").lower() - - if _enable_batching not in ["true", "false"]: - raise ValueError("SAGEMAKER_TFS_ENABLE_BATCHING must be 'true' or 'false'") - self._tfs_enable_batching = _enable_batching == "true" - - if _enable_multi_model_endpoint not in ["true", "false"]: - raise ValueError("SAGEMAKER_MULTI_MODEL must be 'true' or 'false'") - self._tfs_enable_multi_model_endpoint = _enable_multi_model_endpoint == "true" - - self._use_gunicorn = self._enable_python_service or self._tfs_enable_multi_model_endpoint - - if self._sagemaker_port_range is not None: - parts = self._sagemaker_port_range.split("-") - low = int(parts[0]) - hi = int(parts[1]) - if low + 2 > hi: - raise ValueError("not enough ports available in SAGEMAKER_SAFE_PORT_RANGE ({})" - .format(self._sagemaker_port_range)) - self._tfs_grpc_port = str(low) - self._tfs_rest_port = str(low + 1) - else: - # just use the standard default ports - self._tfs_grpc_port = "9000" - self._tfs_rest_port = "8501" - - # set environment variable for python service - os.environ["TFS_GRPC_PORT"] = self._tfs_grpc_port - os.environ["TFS_REST_PORT"] = self._tfs_rest_port - - def _create_tfs_config(self): - models = tfs_utils.find_models() - if not models: - raise ValueError("no SavedModel bundles found!") - - if self._tfs_default_model_name == "None": - default_model = os.path.basename(models[0]) - if default_model: - self._tfs_default_model_name = default_model - log.info("using default model name: {}".format(self._tfs_default_model_name)) - else: - log.info("no default model detected") - - # config (may) include duplicate 'config' keys, so we can't just dump a dict - config = "model_config_list: {\n" - for m in models: - config += " config: {\n" - config += " name: '{}',\n".format(os.path.basename(m)) - config += " base_path: '{}',\n".format(m) - config += " model_platform: 'tensorflow'\n" - config += " }\n" - config += "}\n" - - log.info("tensorflow serving model config: \n%s\n", config) - - with open("/sagemaker/model-config.cfg", "w") as f: - f.write(config) - - def _setup_gunicorn(self): - python_path_content = [] - python_path_option = "" - - if self._enable_python_service: - lib_path_exists = os.path.exists(PYTHON_LIB_PATH) - requirements_exists = os.path.exists(REQUIREMENTS_PATH) - python_path_content = ["/opt/ml/model/code"] - python_path_option = "--pythonpath " - - if lib_path_exists: - python_path_content.append(PYTHON_LIB_PATH) - - if requirements_exists: - if lib_path_exists: - log.warning("loading modules in '{}', ignoring requirements.txt" - .format(PYTHON_LIB_PATH)) - else: - log.info("installing packages from requirements.txt...") - pip_install_cmd = "pip3 install -r {}".format(REQUIREMENTS_PATH) - try: - subprocess.check_call(pip_install_cmd.split()) - except subprocess.CalledProcessError: - log.error("failed to install required packages, exiting.") - self._stop() - raise ChildProcessError("failed to install required packages.") - - gunicorn_command = ( - "gunicorn -b unix:/tmp/gunicorn.sock -k gevent --chdir /sagemaker " - "{}{} -e TFS_GRPC_PORT={} -e SAGEMAKER_MULTI_MODEL={} -e SAGEMAKER_SAFE_PORT_RANGE={} " - "python_service:app").format(python_path_option, ",".join(python_path_content), - self._tfs_grpc_port, self._tfs_enable_multi_model_endpoint, - self._sagemaker_port_range) - - log.info("gunicorn command: {}".format(gunicorn_command)) - self._gunicorn_command = gunicorn_command - - def _create_nginx_config(self): - template = self._read_nginx_template() - pattern = re.compile(r"%(\w+)%") - - template_values = { - "TFS_VERSION": self._tfs_version, - "TFS_REST_PORT": self._tfs_rest_port, - "TFS_DEFAULT_MODEL_NAME": self._tfs_default_model_name, - "NGINX_HTTP_PORT": self._nginx_http_port, - "NGINX_LOG_LEVEL": self._nginx_loglevel, - "FORWARD_PING_REQUESTS": GUNICORN_PING if self._use_gunicorn else JS_PING, - "FORWARD_INVOCATION_REQUESTS": GUNICORN_INVOCATIONS if self._use_gunicorn - else JS_INVOCATIONS, - } - - config = pattern.sub(lambda x: template_values[x.group(1)], template) - log.info("nginx config: \n%s\n", config) - - with open("/sagemaker/nginx.conf", "w") as f: - f.write(config) - - def _read_nginx_template(self): - with open("/sagemaker/nginx.conf.template", "r") as f: - template = f.read() - if not template: - raise ValueError("failed to read nginx.conf.template") - - return template - - def _start_tfs(self): - self._log_version("tensorflow_model_server --version', 'tensorflow version info:") - cmd = tfs_utils.tfs_command( - self._tfs_grpc_port, - self._tfs_rest_port, - self._tfs_config_path, - self._tfs_enable_batching, - self._tfs_batching_config_path, - ) - log.info("tensorflow serving command: {}".format(cmd)) - p = subprocess.Popen(cmd.split()) - log.info("started tensorflow serving (pid: %d)", p.pid) - self._tfs = p - - def _start_gunicorn(self): - self._log_version("gunicorn --version", "gunicorn version info:") - env = os.environ.copy() - env["TFS_DEFAULT_MODEL_NAME"] = self._tfs_default_model_name - p = subprocess.Popen(self._gunicorn_command.split(), env=env) - log.info("started gunicorn (pid: %d)", p.pid) - self._gunicorn = p - - def _start_nginx(self): - self._log_version("/usr/sbin/nginx -V", "nginx version info:") - p = subprocess.Popen("/usr/sbin/nginx -c /sagemaker/nginx.conf".split()) - log.info("started nginx (pid: %d)", p.pid) - self._nginx = p - - def _log_version(self, command, message): - try: - output = subprocess.check_output( - command.split(), - stderr=subprocess.STDOUT).decode("utf-8", "backslashreplace").strip() - log.info("{}\n{}".format(message, output)) - except subprocess.CalledProcessError: - log.warning("failed to run command: %s", command) - - def _stop(self, *args): # pylint: disable=W0613 - self._state = "stopping" - log.info("stopping services") - try: - os.kill(self._nginx.pid, signal.SIGQUIT) - except OSError: - pass - try: - if self._gunicorn: - os.kill(self._gunicorn.pid, signal.SIGTERM) - except OSError: - pass - try: - os.kill(self._tfs.pid, signal.SIGTERM) - except OSError: - pass - - self._state = "stopped" - log.info("stopped") - - def _wait_for_gunicorn(self): - while True: - if os.path.exists("/tmp/gunicorn.sock"): - log.info("gunicorn server is ready!") - return - - @contextmanager - def _timeout(self, seconds): - def _raise_timeout_error(signum, frame): - raise TimeoutError("time out after {} seconds".format(seconds)) - - try: - signal.signal(signal.SIGALRM, _raise_timeout_error) - signal.alarm(seconds) - yield - finally: - signal.alarm(0) - - def start(self): - log.info("starting services") - self._state = "starting" - signal.signal(signal.SIGTERM, self._stop) - - self._create_nginx_config() - - if self._tfs_enable_batching: - log.info("batching is enabled") - tfs_utils.create_batching_config(self._tfs_batching_config_path) - - if self._tfs_enable_multi_model_endpoint: - log.info("multi-model endpoint is enabled, TFS model servers will be started later") - else: - tfs_utils.create_tfs_config( - self._tfs_default_model_name, - self._tfs_config_path - ) - self._create_tfs_config() - self._start_tfs() - - if self._use_gunicorn: - self._setup_gunicorn() - self._start_gunicorn() - # make sure gunicorn is up - with self._timeout(seconds=30): - self._wait_for_gunicorn() - - self._start_nginx() - self._state = "started" - - while True: - pid, status = os.wait() - - if self._state != "started": - break - - if pid == self._nginx.pid: - log.warning("unexpected nginx exit (status: {}). restarting.".format(status)) - self._start_nginx() - - elif pid == self._tfs.pid: - log.warning( - "unexpected tensorflow serving exit (status: {}). restarting.".format(status)) - self._start_tfs() - - elif self._gunicorn and pid == self._gunicorn.pid: - log.warning("unexpected gunicorn exit (status: {}). restarting." - .format(status)) - self._start_gunicorn() - - self._stop() - - -if __name__ == "__main__": - ServiceManager().start() diff --git a/docker/2.1/sagemaker/tensorflow-serving.js b/docker/2.1/sagemaker/tensorflow-serving.js deleted file mode 100644 index fdce4472..00000000 --- a/docker/2.1/sagemaker/tensorflow-serving.js +++ /dev/null @@ -1,231 +0,0 @@ -var tfs_base_uri = "/tfs/v1/models/" -var custom_attributes_header = "X-Amzn-SageMaker-Custom-Attributes" - -function invocations(r) { - var ct = r.headersIn["Content-Type"] - - if ("application/json" == ct || "application/jsonlines" == ct || "application/jsons" == ct) { - json_request(r) - } else if ("text/csv" == ct) { - csv_request(r) - } else { - return_error(r, 415, "Unsupported Media Type: " + (ct || "Unknown")) - } -} - -function ping(r) { - var uri = make_tfs_uri(r, false) - - function callback (reply) { - if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) { - r.return(200) - } else { - r.error("failed ping" + reply.responseBody) - r.return(502) - } - } - - r.subrequest(uri, callback) -} - -function ping_without_model(r) { - // hack for TF 1.11 and MME - // for TF 1.11, send an arbitrary fixed request to the default model. - // if response is 400, the model is ok (but input was bad), so return 200 - // for MME, the default model name is None and does not exist - // also return 200 in unlikely case our request was really valid - - var uri = make_tfs_uri(r, true) - var options = { - method: "POST", - body: "{'instances': 'invalid'}" - } - - function callback (reply) { - if (reply.status == 200 || reply.status == 400 || - reply.responseBody.includes("Servable not found for request: Latest(None)")) { - r.return(200) - } else { - r.error("failed ping" + reply.responseBody) - r.return(502) - } - } - - r.subrequest(uri, options, callback) -} - -function return_error(r, code, message) { - if (message) { - r.return(code, "{'error': " + message + "'}'") - } else { - r.return(code) - } -} - -function tfs_json_request(r, json) { - var uri = make_tfs_uri(r, true) - var options = { - method: "POST", - body: json - } - - var accept = r.headersIn.Accept - function callback (reply) { - var body = reply.responseBody - if (reply.status == 400) { - // "fix" broken json escaping in \'instances\' message - body = body.replace("\\'instances\\'", "'instances'") - } - - if ("application/jsonlines" == accept || "application/jsons" == accept) { - body = body.replace(/\n/g, "") - r.headersOut["Content-Type"] = accept - } - r.return(reply.status, body) - } - - r.subrequest(uri, options, callback) - -} - -function make_tfs_uri(r, with_method) { - var attributes = parse_custom_attributes(r) - - var uri = tfs_base_uri + attributes["tfs-model-name"] - if ("tfs-model-version" in attributes) { - uri += "/versions/" + attributes["tfs-model-version"] - } - - if (with_method) { - uri += ":" + (attributes["tfs-method"] || "predict") - } - - return uri -} - -function parse_custom_attributes(r) { - var attributes = {} - var kv_pattern = /tfs-[a-z\-]+=[^,]+/g - var header = r.headersIn[custom_attributes_header] - if (header) { - var matches = header.match(kv_pattern) - if (matches) { - for (var i = 0; i < matches.length; i++) { - var kv = matches[i].split("=") - if (kv.length === 2) { - attributes[kv[0]] = kv[1] - } - } - } - } - - // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model - if (!attributes["tfs-model-name"]) { - var uri_pattern = /\/models\/[^,]+\/invoke/g - var model_name = r.uri.match(uri_pattern) - if (model_name[0]) { - model_name = r.uri.replace("/models/", "").replace("/invoke", "") - attributes["tfs-model-name"] = model_name - } else { - attributes["tfs-model-name"] = r.variables.default_tfs_model - } - } - - return attributes -} - -function json_request(r) { - var data = r.requestBody - - if (is_json_lines(data)) { - json_lines_request(r, data) - } else if (is_tfs_json(data)) { - tfs_json_request(r, data) - } else { - generic_json_request(r, data) - } -} - -function is_tfs_json(data) { - return /"(instances|inputs|examples)"\s*:/.test(data) -} - -function is_json_lines(data) { - // objects separated only by (optional) whitespace means jsons/json-lines - return /[}\]]\s*[\[{]/.test(data) -} - -function generic_json_request(r, data) { - if (! /^\s*\[\s*\[/.test(data)) { - data = "[" + data + "]" - } - - var json = "{'instances':" + data + "}" - tfs_json_request(r, json) -} - -function json_lines_request(r, data) { - var lines = data.trim().split(/\r?\n/) - var builder = [] - builder.push("{'instances':") - if (lines.length != 1) { - builder.push("[") - } - - for (var i = 0; i < lines.length; i++) { - var line = lines[i].trim() - if (line) { - var instance = (i == 0) ? "" : "," - instance += line - builder.push(instance) - } - } - - builder.push(lines.length == 1 ? "}" : "]}") - tfs_json_request(r, builder.join('')) -} - -function csv_request(r) { - var data = r.requestBody - // look for initial quote or numeric-only data in 1st field - var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0 - var lines = data.trim().split(/\r?\n/) - var builder = [] - builder.push("{'nstances':[") - - for (var i = 0; i < lines.length; i++) { - var line = lines[i].trim() - if (line) { - var line_builder = [] - // Only wrap line in brackets if there are multiple columns. - // If there's only one column and it has a string with a comma, - // the input will be wrapped in an extra set of brackets. - var has_multiple_columns = line.search(",") != -1 - - if (has_multiple_columns) { - line_builder.push("[") - } - - if (needs_quotes) { - line_builder.push("'") - line_builder.push(line.replace("'", "\\'").replace(",", "','")) - line_builder.push("'") - } else { - line_builder.push(line) - } - - if (has_multiple_columns) { - line_builder.push("]") - } - - var json_line = line_builder.join("") - builder.push(json_line) - - if (i != lines.length - 1) - builder.push(",") - } - } - - builder.push("]}") - tfs_json_request(r, builder.join("")) -} diff --git a/docker/2.1/sagemaker/tfs_utils.py b/docker/2.1/sagemaker/tfs_utils.py deleted file mode 100644 index f3ca0cb7..00000000 --- a/docker/2.1/sagemaker/tfs_utils.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import logging -import multiprocessing -import os -import re - -from collections import namedtuple - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger(__name__) - -DEFAULT_CONTENT_TYPE = "application/json" -DEFAULT_ACCEPT_HEADER = "application/json" -CUSTOM_ATTRIBUTES_HEADER = "X-Amzn-SageMaker-Custom-Attributes" - -Context = namedtuple("Context", - "model_name, model_version, method, rest_uri, grpc_port, " - "custom_attributes, request_content_type, accept_header, content_length") - - -def parse_request(req, rest_port, grpc_port, default_model_name, model_name=None): - tfs_attributes = parse_tfs_custom_attributes(req) - tfs_uri = make_tfs_uri(rest_port, tfs_attributes, default_model_name, model_name) - - if not model_name: - model_name = tfs_attributes.get("tfs-model-name") - - context = Context(model_name, - tfs_attributes.get("tfs-model-version"), - tfs_attributes.get("tfs-method"), - tfs_uri, - grpc_port, - req.get_header(CUSTOM_ATTRIBUTES_HEADER), - req.get_header("Content-Type") or DEFAULT_CONTENT_TYPE, - req.get_header("Accept") or DEFAULT_ACCEPT_HEADER, - req.content_length) - - data = req.stream - return data, context - - -def make_tfs_uri(port, attributes, default_model_name, model_name=None): - log.info("sagemaker tfs attributes: \n{}".format(attributes)) - - tfs_model_name = model_name or attributes.get("tfs-model-name", default_model_name) - tfs_model_version = attributes.get("tfs-model-version") - tfs_method = attributes.get("tfs-method", "predict") - - uri = "http://localhost:{}/v1/models/{}".format(port, tfs_model_name) - if tfs_model_version: - uri += "/versions/" + tfs_model_version - uri += ":" + tfs_method - return uri - - -def parse_tfs_custom_attributes(req): - attributes = {} - header = req.get_header(CUSTOM_ATTRIBUTES_HEADER) - if header: - matches = re.findall(r"(tfs-[a-z\-]+=[^,]+)", header) - attributes = dict(attribute.split("=") for attribute in matches) - return attributes - - -def create_tfs_config_individual_model(model_name, base_path): - config = "model_config_list: {\n" - config += " config: {\n" - config += " name: '{}',\n".format(model_name) - config += " base_path: '{}',\n".format(base_path) - config += " model_platform: 'tensorflow'\n" - config += " }\n" - config += "}\n" - return config - - -def create_tfs_config( - tfs_default_model_name, - tfs_config_path, -): - models = find_models() - if not models: - raise ValueError("no SavedModel bundles found!") - - if tfs_default_model_name == "None": - default_model = os.path.basename(models[0]) - if default_model: - tfs_default_model_name = default_model - log.info("using default model name: {}".format(tfs_default_model_name)) - else: - log.info("no default model detected") - - # config (may) include duplicate 'config' keys, so we can't just dump a dict - config = "model_config_list: {\n" - for m in models: - config += " config: {\n" - config += " name: '{}',\n".format(os.path.basename(m)) - config += " base_path: '{}',\n".format(m) - config += " model_platform: 'tensorflow'\n" - config += " }\n" - config += "}\n" - - log.info("tensorflow serving model config: \n%s\n", config) - - with open(tfs_config_path, "w") as f: - f.write(config) - - -def tfs_command(tfs_grpc_port, - tfs_rest_port, - tfs_config_path, - tfs_enable_batching, - tfs_batching_config_file): - cmd = "tensorflow_model_server " \ - "--port={} " \ - "--rest_api_port={} " \ - "--model_config_file={} " \ - "--max_num_load_retries=0 {}" \ - .format(tfs_grpc_port, tfs_rest_port, tfs_config_path, - get_tfs_batching_args(tfs_enable_batching, tfs_batching_config_file)) - return cmd - - -def find_models(): - base_path = "/opt/ml/model" - models = [] - for f in _find_saved_model_files(base_path): - parts = f.split("/") - if len(parts) >= 6 and re.match(r"^\d+$", parts[-2]): - model_path = "/".join(parts[0:-2]) - if model_path not in models: - models.append(model_path) - return models - - -def _find_saved_model_files(path): - for e in os.scandir(path): - if e.is_dir(): - yield from _find_saved_model_files(os.path.join(path, e.name)) - else: - if e.name == "saved_model.pb": - yield os.path.join(path, e.name) - - -def get_tfs_batching_args(enable_batching, tfs_batching_config): - if enable_batching: - return "--enable_batching=true " \ - "--batching_parameters_file={}".format(tfs_batching_config) - else: - return "" - - -def create_batching_config(batching_config_file): - class _BatchingParameter: - def __init__(self, key, env_var, value, defaulted_message): - self.key = key - self.env_var = env_var - self.value = value - self.defaulted_message = defaulted_message - - cpu_count = multiprocessing.cpu_count() - batching_parameters = [ - _BatchingParameter("max_batch_size", "SAGEMAKER_TFS_MAX_BATCH_SIZE", 8, - "max_batch_size defaulted to {}. Set {} to override default. " - "Tuning this parameter may yield better performance."), - _BatchingParameter("batch_timeout_micros", "SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS", 1000, - "batch_timeout_micros defaulted to {}. Set {} to override " - "default. Tuning this parameter may yield better performance."), - _BatchingParameter("num_batch_threads", "SAGEMAKER_TFS_NUM_BATCH_THREADS", - cpu_count, "num_batch_threads defaulted to {}," - "the number of CPUs. Set {} to override default."), - _BatchingParameter("max_enqueued_batches", "SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES", - # Batch limits number of concurrent requests, which limits number - # of enqueued batches, so this can be set high for Batch - 100000000 if "SAGEMAKER_BATCH" in os.environ else cpu_count, - "max_enqueued_batches defaulted to {}. Set {} to override default. " - "Tuning this parameter may be necessary to tune out-of-memory " - "errors occur."), - ] - - warning_message = "" - for batching_parameter in batching_parameters: - if batching_parameter.env_var in os.environ: - batching_parameter.value = os.environ[batching_parameter.env_var] - else: - warning_message += batching_parameter.defaulted_message.format( - batching_parameter.value, batching_parameter.env_var) - warning_message += "\n" - if warning_message: - log.warning(warning_message) - - config = "" - for batching_parameter in batching_parameters: - config += "%s { value: %s }\n" % (batching_parameter.key, batching_parameter.value) - - log.info("batching config: \n%s\n", config) - with open(batching_config_file, "w") as f: - f.write(config) diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py index 7a539fe6..9d814feb 100644 --- a/docker/build_artifacts/sagemaker/serve.py +++ b/docker/build_artifacts/sagemaker/serve.py @@ -176,7 +176,7 @@ def _read_nginx_template(self): return template def _start_tfs(self): - self._log_version("tensorflow_model_server --version', 'tensorflow version info:") + self._log_version("tensorflow_model_server --version", "tensorflow version info:") cmd = tfs_utils.tfs_command( self._tfs_grpc_port, self._tfs_rest_port, diff --git a/docker/build_artifacts/sagemaker/tensorflow-serving.js b/docker/build_artifacts/sagemaker/tensorflow-serving.js index fdce4472..1c040b0a 100644 --- a/docker/build_artifacts/sagemaker/tensorflow-serving.js +++ b/docker/build_artifacts/sagemaker/tensorflow-serving.js @@ -1,15 +1,15 @@ -var tfs_base_uri = "/tfs/v1/models/" -var custom_attributes_header = "X-Amzn-SageMaker-Custom-Attributes" +var tfs_base_uri = '/tfs/v1/models/' +var custom_attributes_header = 'X-Amzn-SageMaker-Custom-Attributes' function invocations(r) { - var ct = r.headersIn["Content-Type"] + var ct = r.headersIn['Content-Type'] - if ("application/json" == ct || "application/jsonlines" == ct || "application/jsons" == ct) { + if ('application/json' == ct || 'application/jsonlines' == ct || 'application/jsons' == ct) { json_request(r) - } else if ("text/csv" == ct) { + } else if ('text/csv' == ct) { csv_request(r) } else { - return_error(r, 415, "Unsupported Media Type: " + (ct || "Unknown")) + return_error(r, 415, 'Unsupported Media Type: ' + (ct || 'Unknown')) } } @@ -20,7 +20,7 @@ function ping(r) { if (reply.status == 200 && reply.responseBody.includes('"AVAILABLE"')) { r.return(200) } else { - r.error("failed ping" + reply.responseBody) + r.error('failed ping' + reply.responseBody) r.return(502) } } @@ -37,16 +37,16 @@ function ping_without_model(r) { var uri = make_tfs_uri(r, true) var options = { - method: "POST", - body: "{'instances': 'invalid'}" + method: 'POST', + body: '{"instances": "invalid"}' } function callback (reply) { if (reply.status == 200 || reply.status == 400 || - reply.responseBody.includes("Servable not found for request: Latest(None)")) { + reply.responseBody.includes('Servable not found for request: Latest(None)')) { r.return(200) } else { - r.error("failed ping" + reply.responseBody) + r.error('failed ping' + reply.responseBody) r.return(502) } } @@ -56,7 +56,7 @@ function ping_without_model(r) { function return_error(r, code, message) { if (message) { - r.return(code, "{'error': " + message + "'}'") + r.return(code, '{"error": "' + message + '"}') } else { r.return(code) } @@ -65,7 +65,7 @@ function return_error(r, code, message) { function tfs_json_request(r, json) { var uri = make_tfs_uri(r, true) var options = { - method: "POST", + method: 'POST', body: json } @@ -77,9 +77,9 @@ function tfs_json_request(r, json) { body = body.replace("\\'instances\\'", "'instances'") } - if ("application/jsonlines" == accept || "application/jsons" == accept) { - body = body.replace(/\n/g, "") - r.headersOut["Content-Type"] = accept + if ('application/jsonlines' == accept || 'application/jsons' == accept) { + body = body.replace(/\n/g, '') + r.headersOut['Content-Type'] = accept } r.return(reply.status, body) } @@ -91,13 +91,13 @@ function tfs_json_request(r, json) { function make_tfs_uri(r, with_method) { var attributes = parse_custom_attributes(r) - var uri = tfs_base_uri + attributes["tfs-model-name"] - if ("tfs-model-version" in attributes) { - uri += "/versions/" + attributes["tfs-model-version"] + var uri = tfs_base_uri + attributes['tfs-model-name'] + if ('tfs-model-version' in attributes) { + uri += '/versions/' + attributes['tfs-model-version'] } if (with_method) { - uri += ":" + (attributes["tfs-method"] || "predict") + uri += ':' + (attributes['tfs-method'] || 'predict') } return uri @@ -111,7 +111,7 @@ function parse_custom_attributes(r) { var matches = header.match(kv_pattern) if (matches) { for (var i = 0; i < matches.length; i++) { - var kv = matches[i].split("=") + var kv = matches[i].split('=') if (kv.length === 2) { attributes[kv[0]] = kv[1] } @@ -120,14 +120,14 @@ function parse_custom_attributes(r) { } // for MME invocations, tfs-model-name is in the uri, or use default_tfs_model - if (!attributes["tfs-model-name"]) { + if (!attributes['tfs-model-name']) { var uri_pattern = /\/models\/[^,]+\/invoke/g var model_name = r.uri.match(uri_pattern) if (model_name[0]) { - model_name = r.uri.replace("/models/", "").replace("/invoke", "") - attributes["tfs-model-name"] = model_name + model_name = r.uri.replace('/models/', '').replace('/invoke', '') + attributes['tfs-model-name'] = model_name } else { - attributes["tfs-model-name"] = r.variables.default_tfs_model + attributes['tfs-model-name'] = r.variables.default_tfs_model } } @@ -157,31 +157,31 @@ function is_json_lines(data) { function generic_json_request(r, data) { if (! /^\s*\[\s*\[/.test(data)) { - data = "[" + data + "]" + data = '[' + data + ']' } - var json = "{'instances':" + data + "}" + var json = '{"instances":' + data + '}' tfs_json_request(r, json) } function json_lines_request(r, data) { var lines = data.trim().split(/\r?\n/) var builder = [] - builder.push("{'instances':") + builder.push('{"instances":') if (lines.length != 1) { - builder.push("[") + builder.push('[') } for (var i = 0; i < lines.length; i++) { var line = lines[i].trim() if (line) { - var instance = (i == 0) ? "" : "," + var instance = (i == 0) ? '' : ',' instance += line builder.push(instance) } } - builder.push(lines.length == 1 ? "}" : "]}") + builder.push(lines.length == 1 ? '}' : ']}') tfs_json_request(r, builder.join('')) } @@ -191,7 +191,7 @@ function csv_request(r) { var needs_quotes = data.search(/^\s*("|[\d.Ee+\-]+.*)/) != 0 var lines = data.trim().split(/\r?\n/) var builder = [] - builder.push("{'nstances':[") + builder.push('{"instances":[') for (var i = 0; i < lines.length; i++) { var line = lines[i].trim() @@ -200,32 +200,32 @@ function csv_request(r) { // Only wrap line in brackets if there are multiple columns. // If there's only one column and it has a string with a comma, // the input will be wrapped in an extra set of brackets. - var has_multiple_columns = line.search(",") != -1 + var has_multiple_columns = line.search(',') != -1 if (has_multiple_columns) { - line_builder.push("[") + line_builder.push('[') } if (needs_quotes) { - line_builder.push("'") - line_builder.push(line.replace("'", "\\'").replace(",", "','")) - line_builder.push("'") + line_builder.push('"') + line_builder.push(line.replace('"', '\\"').replace(',', '","')) + line_builder.push('"') } else { line_builder.push(line) } if (has_multiple_columns) { - line_builder.push("]") + line_builder.push(']') } - var json_line = line_builder.join("") + var json_line = line_builder.join('') builder.push(json_line) if (i != lines.length - 1) - builder.push(",") + builder.push(',') } } - builder.push("]}") - tfs_json_request(r, builder.join("")) + builder.push(']}') + tfs_json_request(r, builder.join('')) } diff --git a/test/integration/local/multi_model_endpoint_test_utils.py b/test/integration/local/multi_model_endpoint_test_utils.py index 08802dd6..508b6615 100644 --- a/test/integration/local/multi_model_endpoint_test_utils.py +++ b/test/integration/local/multi_model_endpoint_test_utils.py @@ -11,7 +11,6 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -import encodings import json import requests @@ -34,17 +33,17 @@ def make_invocation_request(data, model_name, content_type="application/json"): "X-Amzn-SageMaker-Custom-Attributes": "tfs-method=predict" } response = requests.post(INVOCATION_URL.format(model_name), data=data, headers=headers) - return response.status_code, response.content.decode(encodings.utf_8.getregentry().name) + return response.status_code, response.content.decode("utf-8") def make_list_model_request(): response = requests.get(MODELS_URL) - return response.status_code, response.content.decode(encodings.utf_8.getregentry().name) + return response.status_code, response.content.decode("utf-8") def make_get_model_request(model_name): response = requests.get(MODELS_URL + "/{}".format(model_name)) - return response.status_code, response.content.decode(encodings.utf_8.getregentry().name) + return response.status_code, response.content.decode("utf-8") def make_load_model_request(data, content_type="application/json"): @@ -52,9 +51,9 @@ def make_load_model_request(data, content_type="application/json"): "Content-Type": content_type } response = requests.post(MODELS_URL, data=data, headers=headers) - return response.status_code, response.content.decode(encodings.utf_8.getregentry().name) + return response.status_code, response.content.decode("utf-8") def make_unload_model_request(model_name): response = requests.delete(DELETE_MODEL_URL.format(model_name)) - return response.status_code, response.content.decode(encodings.utf_8.getregentry().name) + return response.status_code, response.content.decode("utf-8") diff --git a/test/integration/local/test_container.py b/test/integration/local/test_container.py index 00a82ecf..112df792 100644 --- a/test/integration/local/test_container.py +++ b/test/integration/local/test_container.py @@ -130,7 +130,7 @@ def test_predict_jsons(): def test_predict_jsons_2(): - x = "{'x': [1.0, 2.0, 5.0]}\n{'x': [1.0, 2.0, 5.0]}" + x = "{\"x\": [1.0, 2.0, 5.0]}\n{\"x\": [1.0, 2.0, 5.0]}" y = make_request(x) assert y == {"predictions": [[3.5, 4.0, 5.5], [3.5, 4.0, 5.5]]} @@ -255,4 +255,4 @@ def test_predict_with_jsonlines(): } response = requests.post(BASE_URL, data=json.dumps(x), headers=headers) assert response.headers["Content-Type"] == "application/jsonlines" - assert response.content.decode("utf-8") == "{ 'predictions': [3.5, 4.0, 5.5 ]}" + assert response.content.decode("utf-8") == "{ \"predictions\": [3.5, 4.0, 5.5 ]}" diff --git a/test/integration/local/test_pre_post_processing.py b/test/integration/local/test_pre_post_processing.py index e2ec89cc..3a490d4f 100644 --- a/test/integration/local/test_pre_post_processing.py +++ b/test/integration/local/test_pre_post_processing.py @@ -88,7 +88,7 @@ def make_headers(content_type, method): def test_predict_json(): headers = make_headers("application/json", "predict") - data = "{'instances': [1.0, 2.0, 5.0]}" + data = "{\"instances\": [1.0, 2.0, 5.0]}" response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json() assert response == {"predictions": [3.5, 4.0, 5.5]} diff --git a/test/integration/local/test_pre_post_processing_mme.py b/test/integration/local/test_pre_post_processing_mme.py index f4c8730d..7e63b610 100644 --- a/test/integration/local/test_pre_post_processing_mme.py +++ b/test/integration/local/test_pre_post_processing_mme.py @@ -100,7 +100,7 @@ def test_ping_service(): def test_predict_json(model): headers = make_headers() - data = "{'instances': [1.0, 2.0, 5.0]}" + data = "{\"instances\": [1.0, 2.0, 5.0]}" response = requests.post(INVOCATION_URL.format(model), data=data, headers=headers).json() assert response == {"predictions": [3.5, 4.0, 5.5]} From 2807e34614eccac34edcbc66537c28ff8711944d Mon Sep 17 00:00:00 2001 From: Chuyang Deng Date: Wed, 22 Jul 2020 13:31:48 -0700 Subject: [PATCH 3/5] update quotes --- README.md | 36 +++++++++---------- .../sagemaker/python_service.py | 20 +++++------ docker/build_artifacts/sagemaker/serve.py | 2 +- test/integration/local/conftest.py | 6 ++-- test/integration/local/test_container.py | 14 ++++---- .../local/test_pre_post_processing.py | 12 +++---- .../local/test_pre_post_processing_mme.py | 14 ++++---- test/integration/local/test_tfs_batching.py | 22 ++++++------ test/integration/sagemaker/conftest.py | 14 ++++---- test/integration/sagemaker/test_tfs.py | 26 +++++++------- 10 files changed, 83 insertions(+), 83 deletions(-) diff --git a/README.md b/README.md index dbe22668..18a2fc3e 100644 --- a/README.md +++ b/README.md @@ -620,24 +620,24 @@ To deploy a Multi-Model endpoint with TFS container, please start the container ### Multi-Model Interfaces We provide four different interfaces for user to interact with a Multi-Model Mode container: -+---------------------+---------------------------------+---------------------------------------------+ -| Functionality | Request | Response/Actions | -+---------------------+---------------------------------+---------------------------------------------+ -| List A Single Model | GET /models/{model_name} | Information about the specified model | -+---------------------+---------------------------------+---------------------------------------------+ -| List All Models | GET /models | List of Information about all loaded models | -+---------------------+---------------------------------+---------------------------------------------+ -| | POST /models | Load model with "model_name" from | -| | data = { | specified url | -| Load A Model | "model_name": , | | -| | "url": | | -| | } | | -+---------------------+---------------------------------+---------------------------------------------+ -| Make Invocations | POST /models/{model_name}/invoke| Return inference result from | -| | data = | the specified model | -+---------------------+---------------------------------+---------------------------------------------+ -| Unload A Model | DELETE /models/{model_name} | Unload the specified model | -+---------------------+---------------------------------+---------------------------------------------+ + +---------------------+---------------------------------+---------------------------------------------+ + | Functionality | Request | Response/Actions | + +---------------------+---------------------------------+---------------------------------------------+ + | List A Single Model | GET /models/{model_name} | Information about the specified model | + +---------------------+---------------------------------+---------------------------------------------+ + | List All Models | GET /models | List of Information about all loaded models | + +---------------------+---------------------------------+---------------------------------------------+ + | | POST /models | Load model with "model_name" from | + | | data = { | specified url | + | Load A Model | "model_name": , | | + | | "url": | | + | | } | | + +---------------------+---------------------------------+---------------------------------------------+ + | Make Invocations | POST /models/{model_name}/invoke| Return inference result from | + | | data = | the specified model | + +---------------------+---------------------------------+---------------------------------------------+ + | Unload A Model | DELETE /models/{model_name} | Unload the specified model | + +---------------------+---------------------------------+---------------------------------------------+ ### Maximum Number of Models Also please note the environment variable ``SAGEMAKER_SAFE_PORT_RANGE`` will limit the number of models that can be loaded to the endpoint at the same time. diff --git a/docker/build_artifacts/sagemaker/python_service.py b/docker/build_artifacts/sagemaker/python_service.py index 60a2d04b..44b9498c 100644 --- a/docker/build_artifacts/sagemaker/python_service.py +++ b/docker/build_artifacts/sagemaker/python_service.py @@ -27,14 +27,14 @@ from multi_model_utils import lock, timeout, MultiModelException import tfs_utils -SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get('SAGEMAKER_MULTI_MODEL', 'false').lower() == 'true' -INFERENCE_SCRIPT_PATH = '/opt/ml/model/code/inference.py' +SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get("SAGEMAKER_MULTI_MODEL", "false").lower() == "true" +INFERENCE_SCRIPT_PATH = "/opt/ml/model/code/inference.py" -SAGEMAKER_BATCHING_ENABLED = os.environ.get('SAGEMAKER_TFS_ENABLE_BATCHING', 'false').lower() -MODEL_CONFIG_FILE_PATH = '/sagemaker/model-config.cfg' -TFS_GRPC_PORT = os.environ.get('TFS_GRPC_PORT') -TFS_REST_PORT = os.environ.get('TFS_REST_PORT') -SAGEMAKER_TFS_PORT_RANGE = os.environ.get('SAGEMAKER_SAFE_PORT_RANGE') +SAGEMAKER_BATCHING_ENABLED = os.environ.get("SAGEMAKER_TFS_ENABLE_BATCHING", "false").lower() +MODEL_CONFIG_FILE_PATH = "/sagemaker/model-config.cfg" +TFS_GRPC_PORT = os.environ.get("TFS_GRPC_PORT") +TFS_REST_PORT = os.environ.get("TFS_REST_PORT") +SAGEMAKER_TFS_PORT_RANGE = os.environ.get("SAGEMAKER_SAFE_PORT_RANGE") logging.basicConfig(level=logging.INFO) @@ -78,8 +78,8 @@ def __init__(self): else: self._handlers = default_handler - self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == 'true' - self._tfs_default_model_name = os.environ.get('TFS_DEFAULT_MODEL_NAME', "None") + self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == "true" + self._tfs_default_model_name = os.environ.get("TFS_DEFAULT_MODEL_NAME", "None") def on_post(self, req, res, model_name=None): log.info(req.uri) @@ -296,7 +296,7 @@ def _import_handlers(self, model_name=None): inference_script = INFERENCE_SCRIPT_PATH if model_name: inference_script = "/opt/ml/models/{}/model/code/inference.py".format(model_name) - spec = importlib.util.spec_from_file_location('inference', inference_script) + spec = importlib.util.spec_from_file_location("inference", inference_script) inference = importlib.util.module_from_spec(spec) spec.loader.exec_module(inference) diff --git a/docker/build_artifacts/sagemaker/serve.py b/docker/build_artifacts/sagemaker/serve.py index 0896bc3c..231cbc59 100644 --- a/docker/build_artifacts/sagemaker/serve.py +++ b/docker/build_artifacts/sagemaker/serve.py @@ -269,7 +269,7 @@ def start(self): self._create_nginx_config() if self._tfs_enable_batching: - log.info('batching is enabled') + log.info("batching is enabled") tfs_utils.create_batching_config(self._tfs_batching_config_path) if self._use_gunicorn: diff --git a/test/integration/local/conftest.py b/test/integration/local/conftest.py index 0cef34f9..050ba552 100644 --- a/test/integration/local/conftest.py +++ b/test/integration/local/conftest.py @@ -58,6 +58,6 @@ def tag(request, framework_version, processor): @pytest.fixture(autouse=True) def skip_by_device_type(request, processor): is_gpu = processor == "gpu" - if (request.node.get_closest_marker('skip_gpu') and is_gpu) or \ - (request.node.get_closest_marker('skip_cpu') and not is_gpu): - pytest.skip('Skipping because running on \'{}\' instance'.format(processor)) + if (request.node.get_closest_marker("skip_gpu") and is_gpu) or \ + (request.node.get_closest_marker("skip_cpu") and not is_gpu): + pytest.skip("Skipping because running on \"{}\" instance".format(processor)) diff --git a/test/integration/local/test_container.py b/test/integration/local/test_container.py index 2cacbb36..21650dd3 100644 --- a/test/integration/local/test_container.py +++ b/test/integration/local/test_container.py @@ -43,13 +43,13 @@ def container(request, docker_base_name, tag, runtime_config): else: batching_config = "" command = ( - 'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080' - ' --mount type=volume,source=model_volume,target=/opt/ml/model,readonly' - ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info' - ' -e SAGEMAKER_BIND_TO_PORT=8080' - ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999' - ' {}' - ' {}:{} serve' + "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" + " --mount type=volume,source=model_volume,target=/opt/ml/model,readonly" + " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" + " -e SAGEMAKER_BIND_TO_PORT=8080" + " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" + " {}" + " {}:{} serve" ).format(runtime_config, batching_config, docker_base_name, tag) proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) diff --git a/test/integration/local/test_pre_post_processing.py b/test/integration/local/test_pre_post_processing.py index b12e485e..1106b0e5 100644 --- a/test/integration/local/test_pre_post_processing.py +++ b/test/integration/local/test_pre_post_processing.py @@ -51,12 +51,12 @@ def volume(tmpdir_factory, request): def container(volume, docker_base_name, tag, runtime_config): try: command = ( - 'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080' - ' --mount type=volume,source={},target=/opt/ml/model,readonly' - ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info' - ' -e SAGEMAKER_BIND_TO_PORT=8080' - ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999' - ' {}:{} serve' + "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" + " --mount type=volume,source={},target=/opt/ml/model,readonly" + " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" + " -e SAGEMAKER_BIND_TO_PORT=8080" + " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" + " {}:{} serve" ).format(runtime_config, volume, docker_base_name, tag) proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) diff --git a/test/integration/local/test_pre_post_processing_mme.py b/test/integration/local/test_pre_post_processing_mme.py index 0c373cbd..fdd5438d 100644 --- a/test/integration/local/test_pre_post_processing_mme.py +++ b/test/integration/local/test_pre_post_processing_mme.py @@ -55,13 +55,13 @@ def volume(tmpdir_factory, request): def container(volume, docker_base_name, tag, runtime_config): try: command = ( - 'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080' - ' --mount type=volume,source={},target=/opt/ml/models/half_plus_three/model,readonly' - ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info' - ' -e SAGEMAKER_BIND_TO_PORT=8080' - ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999' - ' -e SAGEMAKER_MULTI_MODEL=True' - ' {}:{} serve' + "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" + " --mount type=volume,source={},target=/opt/ml/models/half_plus_three/model,readonly" + " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" + " -e SAGEMAKER_BIND_TO_PORT=8080" + " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" + " -e SAGEMAKER_MULTI_MODEL=True" + " {}:{} serve" ).format(runtime_config, volume, docker_base_name, tag) proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT) diff --git a/test/integration/local/test_tfs_batching.py b/test/integration/local/test_tfs_batching.py index 2e05aa7c..54d893b7 100644 --- a/test/integration/local/test_tfs_batching.py +++ b/test/integration/local/test_tfs_batching.py @@ -34,17 +34,17 @@ def volume(): def test_run_tfs_with_batching_parameters(docker_base_name, tag, runtime_config): try: command = ( - 'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080' - ' --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly' - ' -e SAGEMAKER_TFS_ENABLE_BATCHING=true' - ' -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16' - ' -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500' - ' -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100' - ' -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1' - ' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info' - ' -e SAGEMAKER_BIND_TO_PORT=8080' - ' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999' - ' {}:{} serve' + "docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080" + " --mount type=volume,source=batching_model_volume,target=/opt/ml/model,readonly" + " -e SAGEMAKER_TFS_ENABLE_BATCHING=true" + " -e SAGEMAKER_TFS_MAX_BATCH_SIZE=16" + " -e SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS=500" + " -e SAGEMAKER_TFS_NUM_BATCH_THREADS=100" + " -e SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES=1" + " -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info" + " -e SAGEMAKER_BIND_TO_PORT=8080" + " -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999" + " {}:{} serve" ).format(runtime_config, docker_base_name, tag) proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) diff --git a/test/integration/sagemaker/conftest.py b/test/integration/sagemaker/conftest.py index 27b49156..5009979a 100644 --- a/test/integration/sagemaker/conftest.py +++ b/test/integration/sagemaker/conftest.py @@ -118,14 +118,14 @@ def model_name(): @pytest.fixture(autouse=True) def skip_gpu_instance_restricted_regions(region, instance_type): - if (region in NO_P2_REGIONS and instance_type.startswith('ml.p2')) or \ - (region in NO_P3_REGIONS and instance_type.startswith('ml.p3')): - pytest.skip('Skipping GPU test in region {}'.format(region)) + if (region in NO_P2_REGIONS and instance_type.startswith("ml.p2")) or \ + (region in NO_P3_REGIONS and instance_type.startswith("ml.p3")): + pytest.skip("Skipping GPU test in region {}".format(region)) @pytest.fixture(autouse=True) def skip_by_device_type(request, instance_type): - is_gpu = instance_type[3] in ['g', 'p'] - if (request.node.get_closest_marker('skip_gpu') and is_gpu) or \ - (request.node.get_closest_marker('skip_cpu') and not is_gpu): - pytest.skip('Skipping because running on \'{}\' instance'.format(instance_type)) + is_gpu = instance_type[3] in ["g", "p"] + if (request.node.get_closest_marker("skip_gpu") and is_gpu) or \ + (request.node.get_closest_marker("skip_cpu") and not is_gpu): + pytest.skip("Skipping because running on \"{}\" instance".format(instance_type)) diff --git a/test/integration/sagemaker/test_tfs.py b/test/integration/sagemaker/test_tfs.py index 2f67c0e9..f73ce35c 100644 --- a/test/integration/sagemaker/test_tfs.py +++ b/test/integration/sagemaker/test_tfs.py @@ -65,20 +65,20 @@ def tfs_model(region, boto_session): def python_model_with_requirements(region, boto_session): return util.find_or_put_model_data(region, boto_session, - 'test/data/python-with-requirements.tar.gz') + "test/data/python-with-requirements.tar.gz") @pytest.fixture(scope='session') def python_model_with_lib(region, boto_session): return util.find_or_put_model_data(region, boto_session, - 'test/data/python-with-lib.tar.gz') + "test/data/python-with-lib.tar.gz") def test_tfs_model(boto_session, sagemaker_client, sagemaker_runtime_client, model_name, tfs_model, image_uri, instance_type, accelerator_type): - input_data = {'instances': [1.0, 2.0, 5.0]} + input_data = {"instances": [1.0, 2.0, 5.0]} util.create_and_invoke_endpoint(boto_session, sagemaker_client, sagemaker_runtime_client, model_name, tfs_model, image_uri, instance_type, accelerator_type, input_data) @@ -104,34 +104,34 @@ def test_python_model_with_requirements(boto_session, sagemaker_client, python_model_with_requirements, image_uri, instance_type, accelerator_type): - if 'p3' in instance_type: - pytest.skip('skip for p3 instance') + if "p3" in instance_type: + pytest.skip("skip for p3 instance") # the python service needs to transform this to get a valid prediction - input_data = {'x': [1.0, 2.0, 5.0]} + input_data = {"x": [1.0, 2.0, 5.0]} output_data = util.create_and_invoke_endpoint(boto_session, sagemaker_client, sagemaker_runtime_client, model_name, python_model_with_requirements, image_uri, instance_type, accelerator_type, input_data) # python service adds this to tfs response - assert output_data['python'] is True - assert output_data['pillow'] == '6.0.0' + assert output_data["python"] is True + assert output_data["pillow"] == "6.0.0" def test_python_model_with_lib(boto_session, sagemaker_client, sagemaker_runtime_client, model_name, python_model_with_lib, image_uri, instance_type, accelerator_type): - if 'p3' in instance_type: - pytest.skip('skip for p3 instance') + if "p3" in instance_type: + pytest.skip("skip for p3 instance") # the python service needs to transform this to get a valid prediction - input_data = {'x': [1.0, 2.0, 5.0]} + input_data = {"x": [1.0, 2.0, 5.0]} output_data = util.create_and_invoke_endpoint(boto_session, sagemaker_client, sagemaker_runtime_client, model_name, python_model_with_lib, image_uri, instance_type, accelerator_type, input_data) # python service adds this to tfs response - assert output_data['python'] is True - assert output_data['dummy_module'] == '0.1' + assert output_data["python"] is True + assert output_data["dummy_module"] == "0.1" From 0f5fa25391697292ed59600372967ef0f27c9a3c Mon Sep 17 00:00:00 2001 From: Chuyang Deng Date: Wed, 22 Jul 2020 13:35:44 -0700 Subject: [PATCH 4/5] remove hardcoded region --- scripts/shared.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/shared.sh b/scripts/shared.sh index 3a92382b..57be36dd 100755 --- a/scripts/shared.sh +++ b/scripts/shared.sh @@ -25,7 +25,7 @@ function get_short_version() { } function get_aws_account() { - aws --region us-west-2 sts --endpoint-url https://sts.us-west-2.amazonaws.com get-caller-identity --query 'Account' --output text + aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text } function get_ei_executable() { From 21af6ef3ef50c2f928b2f62907668fefc7435e85 Mon Sep 17 00:00:00 2001 From: Chuyang Deng Date: Wed, 22 Jul 2020 13:47:10 -0700 Subject: [PATCH 5/5] update test quotes --- test/integration/local/test_pre_post_processing_mme.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/test/integration/local/test_pre_post_processing_mme.py b/test/integration/local/test_pre_post_processing_mme.py index fdd5438d..6a7d1b45 100644 --- a/test/integration/local/test_pre_post_processing_mme.py +++ b/test/integration/local/test_pre_post_processing_mme.py @@ -85,13 +85,8 @@ def container(volume, docker_base_name, tag, runtime_config): @pytest.fixture def model(): model_data = { -<<<<<<< HEAD "model_name": MODEL_NAME, - "url": "/opt/ml/models/half_plus_three" -======= - 'model_name': MODEL_NAME, - 'url': '/opt/ml/models/half_plus_three/model/half_plus_three' ->>>>>>> 2d2cfadc557d9b8716eefae2e6982eaab91d82ad + "url": "/opt/ml/models/half_plus_three/model/half_plus_three" } make_load_model_request(json.dumps(model_data)) return MODEL_NAME