-
Notifications
You must be signed in to change notification settings - Fork 1.2k
feat: Support for ModelBuilder In_Process Mode (1/2) #4784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
2cc906b
b25295a
fb28458
3576ea9
d3b8e9b
68cede1
02e54ef
f39cca6
cc0ca14
18fc3f2
495c7b4
1121f47
b6062a7
1ec209c
ca6c818
cd3dbaa
f52f36c
1843210
d0fe3ac
1b93244
b40f36c
68000e1
826c5c4
1fd6291
de6f861
5cc24ba
64efa90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
"""Module that defines the InProcessMode class""" | ||
|
||
from __future__ import absolute_import | ||
from pathlib import Path | ||
import logging | ||
from typing import Dict, Type | ||
import time | ||
|
||
from sagemaker.base_predictor import PredictorBase | ||
from sagemaker.serve.spec.inference_spec import InferenceSpec | ||
from sagemaker.serve.builder.schema_builder import SchemaBuilder | ||
from sagemaker.serve.utils.types import ModelServer | ||
from sagemaker.serve.utils.exceptions import LocalDeepPingException | ||
from sagemaker.serve.model_server.multi_model_server.server import InProcessMultiModelServer | ||
from sagemaker.session import Session | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
_PING_HEALTH_CHECK_FAIL_MSG = ( | ||
"Ping health check did not pass. " | ||
+ "Please increase container_timeout_seconds or review your inference code." | ||
) | ||
|
||
|
||
class InProcessMode( | ||
InProcessMultiModelServer, | ||
): | ||
"""A class that holds methods to deploy model to a container in process environment""" | ||
|
||
def __init__( | ||
self, | ||
model_server: ModelServer, | ||
inference_spec: Type[InferenceSpec], | ||
schema_builder: Type[SchemaBuilder], | ||
session: Session, | ||
model_path: str = None, | ||
env_vars: Dict = None, | ||
): | ||
# pylint: disable=bad-super-call | ||
super().__init__() | ||
|
||
self.inference_spec = inference_spec | ||
self.model_path = model_path | ||
self.env_vars = env_vars | ||
self.session = session | ||
self.schema_builder = schema_builder | ||
self.model_server = model_server | ||
self.client = None | ||
self.container = None | ||
self.secret_key = None | ||
self._invoke_serving = None | ||
self._ping_container = None | ||
|
||
def load(self, model_path: str = None): | ||
"""Loads model path, checks that path exists""" | ||
path = Path(model_path if model_path else self.model_path) | ||
if not path.exists(): | ||
raise Exception("model_path does not exist") | ||
if not path.is_dir(): | ||
raise Exception("model_path is not a valid directory") | ||
|
||
return self.inference_spec.load(str(path)) | ||
|
||
def prepare(self): | ||
"""Prepares the server""" | ||
|
||
def create_server( | ||
self, | ||
predictor: PredictorBase, | ||
): | ||
"""Creating the server and checking ping health.""" | ||
|
||
# self.destroy_server() | ||
|
||
logger.info("Waiting for model server %s to start up...", self.model_server) | ||
|
||
if self.model_server == ModelServer.MMS: | ||
self._ping_container = self._multi_model_server_deep_ping | ||
|
||
while True: | ||
time.sleep(10) | ||
|
||
healthy, response = self._ping_container(predictor) | ||
if healthy: | ||
logger.debug("Ping health check has passed. Returned %s", str(response)) | ||
break | ||
|
||
if not healthy: | ||
raise LocalDeepPingException(_PING_HEALTH_CHECK_FAIL_MSG) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,23 @@ | |
logger = logging.getLogger(__name__) | ||
|
||
|
||
class InProcessMultiModelServer: | ||
"""In Process Mode Multi Model server instance""" | ||
|
||
def _start_serving(self): | ||
"""Initializes the start of the server""" | ||
return Exception("Not implemented") | ||
|
||
def _invoke_multi_model_server_serving(self, request: object, content_type: str, accept: str): | ||
"""Invokes the MMS server by sending POST request""" | ||
return Exception("Not implemented") | ||
|
||
def _multi_model_server_deep_ping(self, predictor: PredictorBase): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this complete? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have stubbed it. |
||
"""Sends a deep ping to ensure prediction""" | ||
response = None | ||
return (True, response) | ||
|
||
|
||
class LocalMultiModelServer: | ||
"""Local Multi Model server instance""" | ||
|
||
|
@@ -31,7 +48,7 @@ def _start_serving( | |
secret_key: str, | ||
env_vars: dict, | ||
): | ||
"""Placeholder docstring""" | ||
"""Initializes the start of the server""" | ||
env = { | ||
"SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code", | ||
"SAGEMAKER_PROGRAM": "inference.py", | ||
|
@@ -59,7 +76,7 @@ def _start_serving( | |
) | ||
|
||
def _invoke_multi_model_server_serving(self, request: object, content_type: str, accept: str): | ||
"""Placeholder docstring""" | ||
"""Invokes MMS server by hitting the docker host""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice!! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks ! |
||
try: | ||
response = requests.post( | ||
f"http://{get_docker_host()}:8080/invocations", | ||
|
@@ -73,7 +90,7 @@ def _invoke_multi_model_server_serving(self, request: object, content_type: str, | |
raise Exception("Unable to send request to the local container server") from e | ||
|
||
def _multi_model_server_deep_ping(self, predictor: PredictorBase): | ||
"""Placeholder docstring""" | ||
"""Deep ping in order to ensure prediction""" | ||
response = None | ||
try: | ||
response = predictor.predict(self.schema_builder.sample_input) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
"""Placeholder Docstring""" | ||
"""Exceptions used across different model builder invocations""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice thanks for the update There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of course, thanks for the suggestion. |
||
|
||
from __future__ import absolute_import | ||
|
||
|
@@ -24,6 +24,16 @@ def __init__(self, message): | |
super().__init__(message=message) | ||
|
||
|
||
class InProcessDeepPingException(ModelBuilderException): | ||
"""Raise when in process model serving does not pass the deep ping check""" | ||
|
||
fmt = "Error Message: {message}" | ||
model_builder_error_code = 1 | ||
|
||
def __init__(self, message): | ||
super().__init__(message=message) | ||
|
||
|
||
class LocalModelOutOfMemoryException(ModelBuilderException): | ||
"""Raise when local model serving fails to load the model""" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would leave these methods as stubs .... return an Exception("Not implemented")
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have stubbed it, thank you.