Skip to content
This repository was archived by the owner on May 23, 2024. It is now read-only.

change: update MME Pre/Post-Processing model and script paths #153

Merged
merged 13 commits into from
Jul 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -645,23 +645,21 @@ Only 90% of the ports will be utilized and each loaded model will be allocated w
For example, if the ``SAGEMAKER_SAFE_PORT_RANGE`` is between 9000 to 9999, the maximum number of models that can be loaded to the endpoint at the same time would be 499 ((9999 - 9000) * 0.9 / 2).

### Using Multi-Model Endpoint with Pre/Post-Processing
Multi-Model Endpoint can be used together with Pre/Post-Processing. However, please note that in Multi-Model mode, the path of ``inference.py`` is ``/opt/ml/models/code`` instead of ``/opt/ml/model/code``.
Also, all loaded models will share the same ``inference.py`` to handle invocation requests. An example of the directory structure of Multi-Model Endpoint and Pre/Post-Processing would look like this:
Multi-Model Endpoint can be used together with Pre/Post-Processing. Each model will need its own ``inference.py`` otherwise default handlers will be used. An example of the directory structure of Multi-Model Endpoint and Pre/Post-Processing would look like this:

model1
/opt/ml/models/model1/model
|--[model_version_number]
|--variables
|--saved_model.pb
model2
/opt/ml/models/model2/model
|--[model_version_number]
|--assets
|--variables
|--saved_model.pb
code
|--lib
|--external_module
|--inference.py
|--requirements.txt
code
|--lib
|--external_module
|--inference.py

## Contributing

Expand Down
70 changes: 53 additions & 17 deletions docker/build_artifacts/sagemaker/python_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import subprocess
import time
import sys

import falcon
import requests
Expand All @@ -27,10 +28,8 @@
import tfs_utils

SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get('SAGEMAKER_MULTI_MODEL', 'false').lower() == 'true'
INFERENCE_SCRIPT_PATH = '/opt/ml/{}/code/inference.py'.format('models'
if SAGEMAKER_MULTI_MODEL_ENABLED
else 'model')
PYTHON_PROCESSING_ENABLED = os.path.exists(INFERENCE_SCRIPT_PATH)
INFERENCE_SCRIPT_PATH = '/opt/ml/model/code/inference.py'

SAGEMAKER_BATCHING_ENABLED = os.environ.get('SAGEMAKER_TFS_ENABLE_BATCHING', 'false').lower()
MODEL_CONFIG_FILE_PATH = '/sagemaker/model-config.cfg'
TFS_GRPC_PORT = os.environ.get('TFS_GRPC_PORT')
Expand Down Expand Up @@ -64,21 +63,24 @@ def __init__(self):
self._model_tfs_grpc_port = {}
self._model_tfs_pid = {}
self._tfs_ports = self._parse_sagemaker_port_range(SAGEMAKER_TFS_PORT_RANGE)
# If Multi-Model mode is enabled, dependencies/handlers will be imported
# during the _handle_load_model_post()
self.model_handlers = {}
else:
self._tfs_grpc_port = TFS_GRPC_PORT
self._tfs_rest_port = TFS_REST_PORT

if os.path.exists(INFERENCE_SCRIPT_PATH):
self._handler, self._input_handler, self._output_handler = self._import_handlers()
self._handlers = self._make_handler(self._handler,
self._input_handler,
self._output_handler)
else:
self._handlers = default_handler

self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == 'true'
self._tfs_default_model_name = os.environ.get('TFS_DEFAULT_MODEL_NAME', "None")

if PYTHON_PROCESSING_ENABLED:
self._handler, self._input_handler, self._output_handler = self._import_handlers()
self._handlers = self._make_handler(self._handler,
self._input_handler,
self._output_handler)
else:
self._handlers = default_handler

def on_post(self, req, res, model_name=None):
log.info(req.uri)
if model_name or "invocations" in req.uri:
Expand Down Expand Up @@ -129,6 +131,9 @@ def _handle_load_model_post(self, res, data): # noqa: C901
# validate model files are in the specified base_path
if self.validate_model_dir(base_path):
try:
# install custom dependencies, import handlers
self._import_custom_modules(model_name)

tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path)
tfs_config_file = '/sagemaker/tfs-config/{}/model-config.cfg'.format(model_name)
log.info('tensorflow serving model config: \n%s\n', tfs_config)
Expand Down Expand Up @@ -197,6 +202,31 @@ def _handle_load_model_post(self, res, data): # noqa: C901
model_name)
})

def _import_custom_modules(self, model_name):
inference_script_path = "/opt/ml/models/{}/model/code/inference.py".format(model_name)
requirements_file_path = "/opt/ml/models/{}/model/code/requirements.txt".format(model_name)
python_lib_path = "/opt/ml/models/{}/model/code/lib".format(model_name)

if os.path.exists(requirements_file_path):
log.info("pip install dependencies from requirements.txt")
pip_install_cmd = "pip3 install -r {}".format(requirements_file_path)
try:
subprocess.check_call(pip_install_cmd.split())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't matter here, but for future reference: https://docs.python.org/3.7/library/shlex.html#shlex.split

except subprocess.CalledProcessError:
log.error('failed to install required packages, exiting.')
raise ChildProcessError('failed to install required packages.')

if os.path.exists(python_lib_path):
log.info("add Python code library path")
sys.path.append(python_lib_path)

if os.path.exists(inference_script_path):
handler, input_handler, output_handler = self._import_handlers(model_name)
model_handlers = self._make_handler(handler, input_handler, output_handler)
self.model_handlers[model_name] = model_handlers
else:
self.model_handlers[model_name] = default_handler

def _cleanup_config_file(self, config_file):
if os.path.exists(config_file):
os.remove(config_file)
Expand Down Expand Up @@ -249,16 +279,24 @@ def _handle_invocation_post(self, req, res, model_name=None):

try:
res.status = falcon.HTTP_200
res.body, res.content_type = self._handlers(data, context)
if SAGEMAKER_MULTI_MODEL_ENABLED:
with lock():
handlers = self.model_handlers[model_name]
res.body, res.content_type = handlers(data, context)
else:
res.body, res.content_type = self._handlers(data, context)
except Exception as e: # pylint: disable=broad-except
log.exception('exception handling request: {}'.format(e))
res.status = falcon.HTTP_500
res.body = json.dumps({
'error': str(e)
}).encode('utf-8') # pylint: disable=E1101

def _import_handlers(self):
spec = importlib.util.spec_from_file_location('inference', INFERENCE_SCRIPT_PATH)
def _import_handlers(self, model_name=None):
inference_script = INFERENCE_SCRIPT_PATH
if model_name:
inference_script = "/opt/ml/models/{}/model/code/inference.py".format(model_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might make more sense to pass this string to the function so that we don't have it duplicated across the code?

spec = importlib.util.spec_from_file_location('inference', inference_script)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: use " instead of '

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I have a separate PR to address the quotes issue: #149

inference = importlib.util.module_from_spec(spec)
spec.loader.exec_module(inference)

Expand Down Expand Up @@ -358,7 +396,6 @@ def validate_model_dir(self, model_path):
versions = []
for _, dirs, _ in os.walk(model_path):
for dirname in dirs:
log.info("dirname: {}".format(dirname))
if dirname.isdigit():
versions.append(dirname)
return self.validate_model_versions(versions)
Expand All @@ -383,7 +420,6 @@ def on_get(self, req, res): # pylint: disable=W0613

class ServiceResources:
def __init__(self):
self._enable_python_processing = PYTHON_PROCESSING_ENABLED
self._enable_model_manager = SAGEMAKER_MULTI_MODEL_ENABLED
self._python_service_resource = PythonServiceResource()
self._ping_resource = PingResource()
Expand Down
13 changes: 7 additions & 6 deletions docker/build_artifacts/sagemaker/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def __init__(self):

def _create_tfs_config(self):
models = tfs_utils.find_models()

if not models:
raise ValueError('no SavedModel bundles found!')

Expand Down Expand Up @@ -255,12 +256,6 @@ def start(self):
self._state = 'starting'
signal.signal(signal.SIGTERM, self._stop)

self._create_nginx_config()

if self._tfs_enable_batching:
log.info('batching is enabled')
tfs_utils.create_batching_config(self._tfs_batching_config_path)

if self._tfs_enable_multi_model_endpoint:
log.info('multi-model endpoint is enabled, TFS model servers will be started later')
else:
Expand All @@ -271,6 +266,12 @@ def start(self):
self._create_tfs_config()
self._start_tfs()

self._create_nginx_config()

if self._tfs_enable_batching:
log.info('batching is enabled')
tfs_utils.create_batching_config(self._tfs_batching_config_path)

if self._use_gunicorn:
self._setup_gunicorn()
self._start_gunicorn()
Expand Down
2 changes: 0 additions & 2 deletions docker/build_artifacts/sagemaker/tfs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@ def create_tfs_config(
config += ' }\n'
config += '}\n'

log.info('tensorflow serving model config: \n%s\n', config)

with open(tfs_config_path, 'w') as f:
f.write(config)

Expand Down
8 changes: 8 additions & 0 deletions test/integration/local/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,11 @@ def tag(request, framework_version, processor):
if not image_tag:
image_tag = '{}-{}'.format(framework_version, processor)
return image_tag


@pytest.fixture(autouse=True)
def skip_by_device_type(request, processor):
is_gpu = processor == "gpu"
if (request.node.get_closest_marker('skip_gpu') and is_gpu) or \
(request.node.get_closest_marker('skip_cpu') and not is_gpu):
pytest.skip('Skipping because running on \'{}\' instance'.format(processor))
1 change: 0 additions & 1 deletion test/integration/local/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def container(request, docker_base_name, tag, runtime_config):
command = (
'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
' --mount type=volume,source=model_volume,target=/opt/ml/model,readonly'
' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three'
' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
' -e SAGEMAKER_BIND_TO_PORT=8080'
' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
Expand Down
18 changes: 16 additions & 2 deletions test/integration/local/test_multi_model_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@
import pytest
import requests

from multi_model_endpoint_test_utils import make_invocation_request, make_list_model_request, \
make_get_model_request, make_load_model_request, make_unload_model_request
from multi_model_endpoint_test_utils import (
make_invocation_request,
make_list_model_request,
make_load_model_request,
make_unload_model_request,
)

PING_URL = 'http://localhost:8080/ping'

Expand Down Expand Up @@ -69,11 +73,13 @@ def container(request, docker_base_name, tag, runtime_config):
subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split())


@pytest.mark.skip_gpu
def test_ping():
res = requests.get(PING_URL)
assert res.status_code == 200


@pytest.mark.skip_gpu
def test_container_start_invocation_fail():
x = {
'instances': [1.0, 2.0, 5.0]
Expand All @@ -84,13 +90,15 @@ def test_container_start_invocation_fail():
assert "Model half_plus_three is not loaded yet." in str(y)


@pytest.mark.skip_gpu
def test_list_models_empty():
code, res = make_list_model_request()
res = json.loads(res)
assert code == 200
assert len(res) == 0


@pytest.mark.skip_gpu
def test_delete_unloaded_model():
# unloads the given model/version, no-op if not loaded
model_name = 'non-existing-model'
Expand All @@ -99,6 +107,7 @@ def test_delete_unloaded_model():
assert 'Model {} is not loaded yet'.format(model_name) in res


@pytest.mark.skip_gpu
def test_delete_model():
model_name = 'half_plus_three'
model_data = {
Expand All @@ -125,6 +134,7 @@ def test_delete_model():
assert 'Model {} is not loaded yet.'.format(model_name) in str(y2)


@pytest.mark.skip_gpu
def test_load_two_models():
model_name_1 = 'half_plus_two'
model_data_1 = {
Expand Down Expand Up @@ -165,6 +175,7 @@ def test_load_two_models():
assert len(res3) == 2


@pytest.mark.skip_gpu
def test_load_one_model_two_times():
model_name = 'cifar'
model_data = {
Expand All @@ -180,6 +191,7 @@ def test_load_one_model_two_times():
assert'Model {} is already loaded'.format(model_name) in res2


@pytest.mark.skip_gpu
def test_load_non_existing_model():
model_name = 'non-existing'
base_path = '/opt/ml/models/non-existing'
Expand All @@ -192,6 +204,7 @@ def test_load_non_existing_model():
assert 'Could not find valid base path {} for servable {}'.format(base_path, model_name) in str(res)


@pytest.mark.skip_gpu
def test_bad_model_reqeust():
bad_model_data = {
'model_name': 'model_name',
Expand All @@ -201,6 +214,7 @@ def test_bad_model_reqeust():
assert code == 500


@pytest.mark.skip_gpu
def test_invalid_model_version():
model_name = 'invalid_version'
base_path = '/opt/ml/models/invalid_version'
Expand Down
1 change: 0 additions & 1 deletion test/integration/local/test_pre_post_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def container(volume, docker_base_name, tag, runtime_config):
command = (
'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
' --mount type=volume,source={},target=/opt/ml/model,readonly'
' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three'
' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
' -e SAGEMAKER_BIND_TO_PORT=8080'
' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
Expand Down
Loading