Skip to content
This repository was archived by the owner on May 23, 2024. It is now read-only.

change: update MME Pre/Post-Processing model and script paths #153

Merged
merged 13 commits into from
Jul 16, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -645,23 +645,22 @@ Only 90% of the ports will be utilized and each loaded model will be allocated w
For example, if the ``SAGEMAKER_SAFE_PORT_RANGE`` is between 9000 to 9999, the maximum number of models that can be loaded to the endpoint at the same time would be 499 ((9999 - 9000) * 0.9 / 2).

### Using Multi-Model Endpoint with Pre/Post-Processing
Multi-Model Endpoint can be used together with Pre/Post-Processing. However, please note that in Multi-Model mode, the path of ``inference.py`` is ``/opt/ml/models/code`` instead of ``/opt/ml/model/code``.
Also, all loaded models will share the same ``inference.py`` to handle invocation requests. An example of the directory structure of Multi-Model Endpoint and Pre/Post-Processing would look like this:
Multi-Model Endpoint can be used together with Pre/Post-Processing. Each model will need its own ``inference.py`` otherwise default handlers will be used. An example of the directory structure of Multi-Model Endpoint and Pre/Post-Processing would look like this:

model1
/opt/ml/models/model1/model
|--[model_version_number]
|--variables
|--saved_model.pb
model2
/opt/ml/models/model2/model
|--[model_version_number]
|--assets
|--variables
|--saved_model.pb
code
|--lib
|--external_module
|--inference.py
|--requirements.txt
code
|--lib
|--external_module
|--inference.py
|--requirements.txt

## Contributing

Expand Down
72 changes: 55 additions & 17 deletions docker/build_artifacts/sagemaker/python_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import subprocess
import time
import sys

import falcon
import requests
Expand All @@ -27,10 +28,8 @@
import tfs_utils

SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get('SAGEMAKER_MULTI_MODEL', 'false').lower() == 'true'
INFERENCE_SCRIPT_PATH = '/opt/ml/{}/code/inference.py'.format('models'
if SAGEMAKER_MULTI_MODEL_ENABLED
else 'model')
PYTHON_PROCESSING_ENABLED = os.path.exists(INFERENCE_SCRIPT_PATH)
INFERENCE_SCRIPT_PATH = '/opt/ml/model/code/inference.py'

SAGEMAKER_BATCHING_ENABLED = os.environ.get('SAGEMAKER_TFS_ENABLE_BATCHING', 'false').lower()
MODEL_CONFIG_FILE_PATH = '/sagemaker/model-config.cfg'
TFS_GRPC_PORT = os.environ.get('TFS_GRPC_PORT')
Expand Down Expand Up @@ -64,21 +63,24 @@ def __init__(self):
self._model_tfs_grpc_port = {}
self._model_tfs_pid = {}
self._tfs_ports = self._parse_sagemaker_port_range(SAGEMAKER_TFS_PORT_RANGE)
# If Multi-Model mode is enabled, dependencies/handlers will be imported
# during the _handle_load_model_post()
self.model_handlers = {}
else:
self._tfs_grpc_port = TFS_GRPC_PORT
self._tfs_rest_port = TFS_REST_PORT

if os.path.exists(INFERENCE_SCRIPT_PATH):
self._handler, self._input_handler, self._output_handler = self._import_handlers()
self._handlers = self._make_handler(self._handler,
self._input_handler,
self._output_handler)
else:
self._handlers = default_handler

self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == 'true'
self._tfs_default_model_name = os.environ.get('TFS_DEFAULT_MODEL_NAME', "None")

if PYTHON_PROCESSING_ENABLED:
self._handler, self._input_handler, self._output_handler = self._import_handlers()
self._handlers = self._make_handler(self._handler,
self._input_handler,
self._output_handler)
else:
self._handlers = default_handler

def on_post(self, req, res, model_name=None):
log.info(req.uri)
if model_name or "invocations" in req.uri:
Expand Down Expand Up @@ -129,6 +131,9 @@ def _handle_load_model_post(self, res, data): # noqa: C901
# validate model files are in the specified base_path
if self.validate_model_dir(base_path):
try:
# install custom dependencies, import handlers
self._import_custom_modules(model_name)

tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path)
tfs_config_file = '/sagemaker/tfs-config/{}/model-config.cfg'.format(model_name)
log.info('tensorflow serving model config: \n%s\n', tfs_config)
Expand Down Expand Up @@ -197,6 +202,33 @@ def _handle_load_model_post(self, res, data): # noqa: C901
model_name)
})

def _import_custom_modules(self, model_name):
inference_script_path = "/opt/ml/models/{}/model/code/inference.py".format(model_name)
requirements_file_path = "/opt/ml/models/{}/model/code/requirements.txt".format(model_name)
python_lib_path = "/opt/ml/models/{}/model/code/lib".format(model_name)

if os.path.exists(requirements_file_path):
log.info("pip install dependencies from requirements.txt")
pip_install_cmd = "pip3 install -r {}".format(requirements_file_path)
try:
subprocess.check_call(pip_install_cmd.split())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't matter here, but for future reference: https://docs.python.org/3.7/library/shlex.html#shlex.split

except subprocess.CalledProcessError:
log.error('failed to install required packages, exiting.')
raise ChildProcessError('failed to install required packages.')

if os.path.exists(python_lib_path):
log.info("add Python code library path")
sys.path.append(python_lib_path)

if os.path.exists(inference_script_path):
handler, input_handler, output_handler = self._import_handlers(model_name)
model_handlers = self._make_handler(handler,
input_handler,
output_handler)
self.model_handlers[model_name] = model_handlers
else:
self.model_handlers[model_name] = default_handler

def _cleanup_config_file(self, config_file):
if os.path.exists(config_file):
os.remove(config_file)
Expand Down Expand Up @@ -249,16 +281,24 @@ def _handle_invocation_post(self, req, res, model_name=None):

try:
res.status = falcon.HTTP_200
res.body, res.content_type = self._handlers(data, context)
if SAGEMAKER_MULTI_MODEL_ENABLED:
with lock():
handlers = self.model_handlers[model_name]
res.body, res.content_type = handlers(data, context)
else:
res.body, res.content_type = self._handlers(data, context)
except Exception as e: # pylint: disable=broad-except
log.exception('exception handling request: {}'.format(e))
res.status = falcon.HTTP_500
res.body = json.dumps({
'error': str(e)
}).encode('utf-8') # pylint: disable=E1101

def _import_handlers(self):
spec = importlib.util.spec_from_file_location('inference', INFERENCE_SCRIPT_PATH)
def _import_handlers(self, model_name=None):
inference_script = INFERENCE_SCRIPT_PATH
if model_name:
inference_script = "/opt/ml/models/{}/model/code/inference.py".format(model_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might make more sense to pass this string to the function so that we don't have it duplicated across the code?

spec = importlib.util.spec_from_file_location('inference', inference_script)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: use " instead of '

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I have a separate PR to address the quotes issue: #149

inference = importlib.util.module_from_spec(spec)
spec.loader.exec_module(inference)

Expand Down Expand Up @@ -358,7 +398,6 @@ def validate_model_dir(self, model_path):
versions = []
for _, dirs, _ in os.walk(model_path):
for dirname in dirs:
log.info("dirname: {}".format(dirname))
if dirname.isdigit():
versions.append(dirname)
return self.validate_model_versions(versions)
Expand All @@ -383,7 +422,6 @@ def on_get(self, req, res): # pylint: disable=W0613

class ServiceResources:
def __init__(self):
self._enable_python_processing = PYTHON_PROCESSING_ENABLED
self._enable_model_manager = SAGEMAKER_MULTI_MODEL_ENABLED
self._python_service_resource = PythonServiceResource()
self._ping_resource = PingResource()
Expand Down
4 changes: 2 additions & 2 deletions test/integration/local/test_multi_model_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
import pytest
import requests

from multi_model_endpoint_test_utils import make_invocation_request, make_list_model_request, \
make_get_model_request, make_load_model_request, make_unload_model_request
from multi_model_endpoint_test_utils import make_invocation_request, make_list_model_request,\
make_load_model_request, make_unload_model_request

PING_URL = 'http://localhost:8080/ping'

Expand Down
7 changes: 3 additions & 4 deletions test/integration/local/test_pre_post_processing_mme.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@

import requests

from multi_model_endpoint_test_utils import make_invocation_request, make_list_model_request, \
make_get_model_request, make_load_model_request, make_unload_model_request, make_headers
from multi_model_endpoint_test_utils import make_load_model_request, make_headers


PING_URL = 'http://localhost:8080/ping'
Expand Down Expand Up @@ -57,7 +56,7 @@ def container(volume, docker_base_name, tag, runtime_config):
try:
command = (
'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
' --mount type=volume,source={},target=/opt/ml/models,readonly'
' --mount type=volume,source={},target=/opt/ml/models/half_plus_three/model,readonly'
' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
' -e SAGEMAKER_BIND_TO_PORT=8080'
' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
Expand Down Expand Up @@ -87,7 +86,7 @@ def container(volume, docker_base_name, tag, runtime_config):
def model():
model_data = {
'model_name': MODEL_NAME,
'url': '/opt/ml/models/half_plus_three'
'url': '/opt/ml/models/half_plus_three/model/half_plus_three'
}
make_load_model_request(json.dumps(model_data))
return MODEL_NAME
Expand Down