Skip to content
This repository was archived by the owner on May 23, 2024. It is now read-only.

Commit 3bab56e

Browse files
chuyang-dengChuyang Dengmetrizable
authored
change: update MME Pre/Post-Processing model and script paths (#153)
* fix: increasing max_retry for model availability check * adjust retries max number * update tfs pre-post-processing file path and test * update readme * Update test/integration/local/test_multi_model_endpoint.py Co-authored-by: Eric Johnson <[email protected]> * Update docker/build_artifacts/sagemaker/python_service.py Co-authored-by: Eric Johnson <[email protected]> * fix: run sagemaker tests for PR build * revert buildspec * revert build and publish script Co-authored-by: Chuyang Deng <[email protected]> Co-authored-by: Eric Johnson <[email protected]>
1 parent 96392ec commit 3bab56e

12 files changed

+112
-43
lines changed

README.md

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -645,23 +645,21 @@ Only 90% of the ports will be utilized and each loaded model will be allocated w
645645
For example, if the ``SAGEMAKER_SAFE_PORT_RANGE`` is between 9000 to 9999, the maximum number of models that can be loaded to the endpoint at the same time would be 499 ((9999 - 9000) * 0.9 / 2).
646646

647647
### Using Multi-Model Endpoint with Pre/Post-Processing
648-
Multi-Model Endpoint can be used together with Pre/Post-Processing. However, please note that in Multi-Model mode, the path of ``inference.py`` is ``/opt/ml/models/code`` instead of ``/opt/ml/model/code``.
649-
Also, all loaded models will share the same ``inference.py`` to handle invocation requests. An example of the directory structure of Multi-Model Endpoint and Pre/Post-Processing would look like this:
648+
Multi-Model Endpoint can be used together with Pre/Post-Processing. Each model will need its own ``inference.py`` otherwise default handlers will be used. An example of the directory structure of Multi-Model Endpoint and Pre/Post-Processing would look like this:
650649

651-
model1
650+
/opt/ml/models/model1/model
652651
|--[model_version_number]
653652
|--variables
654653
|--saved_model.pb
655-
model2
654+
/opt/ml/models/model2/model
656655
|--[model_version_number]
657656
|--assets
658657
|--variables
659658
|--saved_model.pb
660-
code
661-
|--lib
662-
|--external_module
663-
|--inference.py
664-
|--requirements.txt
659+
code
660+
|--lib
661+
|--external_module
662+
|--inference.py
665663

666664
## Contributing
667665

docker/build_artifacts/sagemaker/python_service.py

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import os
1818
import subprocess
1919
import time
20+
import sys
2021

2122
import falcon
2223
import requests
@@ -27,10 +28,8 @@
2728
import tfs_utils
2829

2930
SAGEMAKER_MULTI_MODEL_ENABLED = os.environ.get('SAGEMAKER_MULTI_MODEL', 'false').lower() == 'true'
30-
INFERENCE_SCRIPT_PATH = '/opt/ml/{}/code/inference.py'.format('models'
31-
if SAGEMAKER_MULTI_MODEL_ENABLED
32-
else 'model')
33-
PYTHON_PROCESSING_ENABLED = os.path.exists(INFERENCE_SCRIPT_PATH)
31+
INFERENCE_SCRIPT_PATH = '/opt/ml/model/code/inference.py'
32+
3433
SAGEMAKER_BATCHING_ENABLED = os.environ.get('SAGEMAKER_TFS_ENABLE_BATCHING', 'false').lower()
3534
MODEL_CONFIG_FILE_PATH = '/sagemaker/model-config.cfg'
3635
TFS_GRPC_PORT = os.environ.get('TFS_GRPC_PORT')
@@ -64,21 +63,24 @@ def __init__(self):
6463
self._model_tfs_grpc_port = {}
6564
self._model_tfs_pid = {}
6665
self._tfs_ports = self._parse_sagemaker_port_range(SAGEMAKER_TFS_PORT_RANGE)
66+
# If Multi-Model mode is enabled, dependencies/handlers will be imported
67+
# during the _handle_load_model_post()
68+
self.model_handlers = {}
6769
else:
6870
self._tfs_grpc_port = TFS_GRPC_PORT
6971
self._tfs_rest_port = TFS_REST_PORT
7072

73+
if os.path.exists(INFERENCE_SCRIPT_PATH):
74+
self._handler, self._input_handler, self._output_handler = self._import_handlers()
75+
self._handlers = self._make_handler(self._handler,
76+
self._input_handler,
77+
self._output_handler)
78+
else:
79+
self._handlers = default_handler
80+
7181
self._tfs_enable_batching = SAGEMAKER_BATCHING_ENABLED == 'true'
7282
self._tfs_default_model_name = os.environ.get('TFS_DEFAULT_MODEL_NAME', "None")
7383

74-
if PYTHON_PROCESSING_ENABLED:
75-
self._handler, self._input_handler, self._output_handler = self._import_handlers()
76-
self._handlers = self._make_handler(self._handler,
77-
self._input_handler,
78-
self._output_handler)
79-
else:
80-
self._handlers = default_handler
81-
8284
def on_post(self, req, res, model_name=None):
8385
log.info(req.uri)
8486
if model_name or "invocations" in req.uri:
@@ -129,6 +131,9 @@ def _handle_load_model_post(self, res, data): # noqa: C901
129131
# validate model files are in the specified base_path
130132
if self.validate_model_dir(base_path):
131133
try:
134+
# install custom dependencies, import handlers
135+
self._import_custom_modules(model_name)
136+
132137
tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path)
133138
tfs_config_file = '/sagemaker/tfs-config/{}/model-config.cfg'.format(model_name)
134139
log.info('tensorflow serving model config: \n%s\n', tfs_config)
@@ -197,6 +202,31 @@ def _handle_load_model_post(self, res, data): # noqa: C901
197202
model_name)
198203
})
199204

205+
def _import_custom_modules(self, model_name):
206+
inference_script_path = "/opt/ml/models/{}/model/code/inference.py".format(model_name)
207+
requirements_file_path = "/opt/ml/models/{}/model/code/requirements.txt".format(model_name)
208+
python_lib_path = "/opt/ml/models/{}/model/code/lib".format(model_name)
209+
210+
if os.path.exists(requirements_file_path):
211+
log.info("pip install dependencies from requirements.txt")
212+
pip_install_cmd = "pip3 install -r {}".format(requirements_file_path)
213+
try:
214+
subprocess.check_call(pip_install_cmd.split())
215+
except subprocess.CalledProcessError:
216+
log.error('failed to install required packages, exiting.')
217+
raise ChildProcessError('failed to install required packages.')
218+
219+
if os.path.exists(python_lib_path):
220+
log.info("add Python code library path")
221+
sys.path.append(python_lib_path)
222+
223+
if os.path.exists(inference_script_path):
224+
handler, input_handler, output_handler = self._import_handlers(model_name)
225+
model_handlers = self._make_handler(handler, input_handler, output_handler)
226+
self.model_handlers[model_name] = model_handlers
227+
else:
228+
self.model_handlers[model_name] = default_handler
229+
200230
def _cleanup_config_file(self, config_file):
201231
if os.path.exists(config_file):
202232
os.remove(config_file)
@@ -249,16 +279,24 @@ def _handle_invocation_post(self, req, res, model_name=None):
249279

250280
try:
251281
res.status = falcon.HTTP_200
252-
res.body, res.content_type = self._handlers(data, context)
282+
if SAGEMAKER_MULTI_MODEL_ENABLED:
283+
with lock():
284+
handlers = self.model_handlers[model_name]
285+
res.body, res.content_type = handlers(data, context)
286+
else:
287+
res.body, res.content_type = self._handlers(data, context)
253288
except Exception as e: # pylint: disable=broad-except
254289
log.exception('exception handling request: {}'.format(e))
255290
res.status = falcon.HTTP_500
256291
res.body = json.dumps({
257292
'error': str(e)
258293
}).encode('utf-8') # pylint: disable=E1101
259294

260-
def _import_handlers(self):
261-
spec = importlib.util.spec_from_file_location('inference', INFERENCE_SCRIPT_PATH)
295+
def _import_handlers(self, model_name=None):
296+
inference_script = INFERENCE_SCRIPT_PATH
297+
if model_name:
298+
inference_script = "/opt/ml/models/{}/model/code/inference.py".format(model_name)
299+
spec = importlib.util.spec_from_file_location('inference', inference_script)
262300
inference = importlib.util.module_from_spec(spec)
263301
spec.loader.exec_module(inference)
264302

@@ -358,7 +396,6 @@ def validate_model_dir(self, model_path):
358396
versions = []
359397
for _, dirs, _ in os.walk(model_path):
360398
for dirname in dirs:
361-
log.info("dirname: {}".format(dirname))
362399
if dirname.isdigit():
363400
versions.append(dirname)
364401
return self.validate_model_versions(versions)
@@ -383,7 +420,6 @@ def on_get(self, req, res): # pylint: disable=W0613
383420

384421
class ServiceResources:
385422
def __init__(self):
386-
self._enable_python_processing = PYTHON_PROCESSING_ENABLED
387423
self._enable_model_manager = SAGEMAKER_MULTI_MODEL_ENABLED
388424
self._python_service_resource = PythonServiceResource()
389425
self._ping_resource = PingResource()

docker/build_artifacts/sagemaker/serve.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def __init__(self):
8383

8484
def _create_tfs_config(self):
8585
models = tfs_utils.find_models()
86+
8687
if not models:
8788
raise ValueError('no SavedModel bundles found!')
8889

@@ -255,12 +256,6 @@ def start(self):
255256
self._state = 'starting'
256257
signal.signal(signal.SIGTERM, self._stop)
257258

258-
self._create_nginx_config()
259-
260-
if self._tfs_enable_batching:
261-
log.info('batching is enabled')
262-
tfs_utils.create_batching_config(self._tfs_batching_config_path)
263-
264259
if self._tfs_enable_multi_model_endpoint:
265260
log.info('multi-model endpoint is enabled, TFS model servers will be started later')
266261
else:
@@ -271,6 +266,12 @@ def start(self):
271266
self._create_tfs_config()
272267
self._start_tfs()
273268

269+
self._create_nginx_config()
270+
271+
if self._tfs_enable_batching:
272+
log.info('batching is enabled')
273+
tfs_utils.create_batching_config(self._tfs_batching_config_path)
274+
274275
if self._use_gunicorn:
275276
self._setup_gunicorn()
276277
self._start_gunicorn()

docker/build_artifacts/sagemaker/tfs_utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,6 @@ def create_tfs_config(
111111
config += ' }\n'
112112
config += '}\n'
113113

114-
log.info('tensorflow serving model config: \n%s\n', config)
115-
116114
with open(tfs_config_path, 'w') as f:
117115
f.write(config)
118116

test/integration/local/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,11 @@ def tag(request, framework_version, processor):
5353
if not image_tag:
5454
image_tag = '{}-{}'.format(framework_version, processor)
5555
return image_tag
56+
57+
58+
@pytest.fixture(autouse=True)
59+
def skip_by_device_type(request, processor):
60+
is_gpu = processor == "gpu"
61+
if (request.node.get_closest_marker('skip_gpu') and is_gpu) or \
62+
(request.node.get_closest_marker('skip_cpu') and not is_gpu):
63+
pytest.skip('Skipping because running on \'{}\' instance'.format(processor))

test/integration/local/test_container.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ def container(request, docker_base_name, tag, runtime_config):
4545
command = (
4646
'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
4747
' --mount type=volume,source=model_volume,target=/opt/ml/model,readonly'
48-
' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three'
4948
' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
5049
' -e SAGEMAKER_BIND_TO_PORT=8080'
5150
' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'

test/integration/local/test_multi_model_endpoint.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@
2020
import pytest
2121
import requests
2222

23-
from multi_model_endpoint_test_utils import make_invocation_request, make_list_model_request, \
24-
make_get_model_request, make_load_model_request, make_unload_model_request
23+
from multi_model_endpoint_test_utils import (
24+
make_invocation_request,
25+
make_list_model_request,
26+
make_load_model_request,
27+
make_unload_model_request,
28+
)
2529

2630
PING_URL = 'http://localhost:8080/ping'
2731

@@ -69,11 +73,13 @@ def container(request, docker_base_name, tag, runtime_config):
6973
subprocess.check_call('docker rm -f sagemaker-tensorflow-serving-test'.split())
7074

7175

76+
@pytest.mark.skip_gpu
7277
def test_ping():
7378
res = requests.get(PING_URL)
7479
assert res.status_code == 200
7580

7681

82+
@pytest.mark.skip_gpu
7783
def test_container_start_invocation_fail():
7884
x = {
7985
'instances': [1.0, 2.0, 5.0]
@@ -84,13 +90,15 @@ def test_container_start_invocation_fail():
8490
assert "Model half_plus_three is not loaded yet." in str(y)
8591

8692

93+
@pytest.mark.skip_gpu
8794
def test_list_models_empty():
8895
code, res = make_list_model_request()
8996
res = json.loads(res)
9097
assert code == 200
9198
assert len(res) == 0
9299

93100

101+
@pytest.mark.skip_gpu
94102
def test_delete_unloaded_model():
95103
# unloads the given model/version, no-op if not loaded
96104
model_name = 'non-existing-model'
@@ -99,6 +107,7 @@ def test_delete_unloaded_model():
99107
assert 'Model {} is not loaded yet'.format(model_name) in res
100108

101109

110+
@pytest.mark.skip_gpu
102111
def test_delete_model():
103112
model_name = 'half_plus_three'
104113
model_data = {
@@ -125,6 +134,7 @@ def test_delete_model():
125134
assert 'Model {} is not loaded yet.'.format(model_name) in str(y2)
126135

127136

137+
@pytest.mark.skip_gpu
128138
def test_load_two_models():
129139
model_name_1 = 'half_plus_two'
130140
model_data_1 = {
@@ -165,6 +175,7 @@ def test_load_two_models():
165175
assert len(res3) == 2
166176

167177

178+
@pytest.mark.skip_gpu
168179
def test_load_one_model_two_times():
169180
model_name = 'cifar'
170181
model_data = {
@@ -180,6 +191,7 @@ def test_load_one_model_two_times():
180191
assert'Model {} is already loaded'.format(model_name) in res2
181192

182193

194+
@pytest.mark.skip_gpu
183195
def test_load_non_existing_model():
184196
model_name = 'non-existing'
185197
base_path = '/opt/ml/models/non-existing'
@@ -192,6 +204,7 @@ def test_load_non_existing_model():
192204
assert 'Could not find valid base path {} for servable {}'.format(base_path, model_name) in str(res)
193205

194206

207+
@pytest.mark.skip_gpu
195208
def test_bad_model_reqeust():
196209
bad_model_data = {
197210
'model_name': 'model_name',
@@ -201,6 +214,7 @@ def test_bad_model_reqeust():
201214
assert code == 500
202215

203216

217+
@pytest.mark.skip_gpu
204218
def test_invalid_model_version():
205219
model_name = 'invalid_version'
206220
base_path = '/opt/ml/models/invalid_version'

test/integration/local/test_pre_post_processing.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ def container(volume, docker_base_name, tag, runtime_config):
5353
command = (
5454
'docker run {}--name sagemaker-tensorflow-serving-test -p 8080:8080'
5555
' --mount type=volume,source={},target=/opt/ml/model,readonly'
56-
' -e SAGEMAKER_TFS_DEFAULT_MODEL_NAME=half_plus_three'
5756
' -e SAGEMAKER_TFS_NGINX_LOGLEVEL=info'
5857
' -e SAGEMAKER_BIND_TO_PORT=8080'
5958
' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'

0 commit comments

Comments
 (0)