aws
diff --git a/‎docker/build_artifacts/sagemaker/python_service.py
Lines changed: 22 additions & 3 deletions b/‎docker/build_artifacts/sagemaker/python_service.py
Lines changed: 22 additions & 3 deletions
diff --git a/‎docker/build_artifacts/sagemaker/serve.py
Lines changed: 2 additions & 1 deletion b/‎docker/build_artifacts/sagemaker/serve.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎test/integration/local/test_pre_post_processing_mme.py
Lines changed: 80 additions & 22 deletions b/‎test/integration/local/test_pre_post_processing_mme.py
Lines changed: 80 additions & 22 deletions
diff --git a/‎test/resources/mme_universal_script/half_plus_two/00000123/saved_model.pb
9.12 KB b/‎test/resources/mme_universal_script/half_plus_two/00000123/saved_model.pb
9.12 KB
diff --git a/‎test/resources/mme_universal_script/half_plus_two/00000123/variables/variables.data-00000-of-00001
12 Bytes b/‎test/resources/mme_universal_script/half_plus_two/00000123/variables/variables.data-00000-of-00001
12 Bytes
diff --git a/‎test/resources/mme_universal_script/half_plus_two/00000123/variables/variables.index
151 Bytes b/‎test/resources/mme_universal_script/half_plus_two/00000123/variables/variables.index
151 Bytes
@@ -17,6 +17,7 @@
 import os
 import subprocess
 import grpc
+import sys
 
 import falcon
 import requests
@@ -143,6 +144,7 @@ def _handle_load_model_post(self, res, data):  # noqa: C901
         # validate model files are in the specified base_path
         if self.validate_model_dir(base_path):
             try:
+                self._import_custom_modules(model_name)
                 tfs_config = tfs_utils.create_tfs_config_individual_model(model_name, base_path)
                 tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format(model_name)
                 log.info("tensorflow serving model config: \n%s\n", tfs_config)
@@ -221,6 +223,21 @@ def _handle_load_model_post(self, res, data):  # noqa: C901
                 }
             )
 
+    def _import_custom_modules(self, model_name):
+        inference_script_path = "/opt/ml/models/{}/model/code/inference.py".format(model_name)
+        python_lib_path = "/opt/ml/models/{}/model/code/lib".format(model_name)
+
+        if os.path.exists(python_lib_path):
+            log.info("add Python code library path")
+            sys.path.append(python_lib_path)
+
+        if os.path.exists(inference_script_path):
+            handler, input_handler, output_handler = self._import_handlers(inference_script_path)
+            model_handlers = self._make_handler(handler, input_handler, output_handler)
+            self.model_handlers[model_name] = model_handlers
+        else:
+            self.model_handlers[model_name] = default_handler
+
     def _cleanup_config_file(self, config_file):
         if os.path.exists(config_file):
             os.remove(config_file)
@@ -264,8 +281,11 @@ def _handle_invocation_post(self, req, res, model_name=None):
 
         try:
             res.status = falcon.HTTP_200
+            handlers = self._handlers
+            if SAGEMAKER_MULTI_MODEL_ENABLED and self.model_handlers:
+                handlers = self.model_handlers[model_name]
+            res.body, res.content_type = handlers(data, context)
 
-            res.body, res.content_type = self._handlers(data, context)
         except Exception as e:  # pylint: disable=broad-except
             log.exception("exception handling request: {}".format(e))
             res.status = falcon.HTTP_500
@@ -276,8 +296,7 @@ def _setup_channel(self, grpc_port):
             log.info("Creating grpc channel for port: %s", grpc_port)
             self._channels[grpc_port] = grpc.insecure_channel("localhost:{}".format(grpc_port))
 
-    def _import_handlers(self):
-        inference_script = INFERENCE_SCRIPT_PATH
+    def _import_handlers(self, inference_script=INFERENCE_SCRIPT_PATH):
         spec = importlib.util.spec_from_file_location("inference", inference_script)
         inference = importlib.util.module_from_spec(spec)
         spec.loader.exec_module(inference)
 
@@ -134,7 +134,8 @@ def __init__(self):
         os.environ["TFS_REST_PORTS"] = self._tfs_rest_concat_ports
 
     def _need_python_service(self):
-        if os.path.exists(INFERENCE_PATH):
+        if (os.path.exists(INFERENCE_PATH) or os.path.exists(REQUIREMENTS_PATH)
+                or os.path.exists(PYTHON_LIB_PATH)):
             self._enable_python_service = True
         if os.environ.get("SAGEMAKER_MULTI_MODEL_UNIVERSAL_BUCKET") and os.environ.get(
             "SAGEMAKER_MULTI_MODEL_UNIVERSAL_PREFIX"
 
@@ -24,10 +24,10 @@
 
 from multi_model_endpoint_test_utils import make_load_model_request, make_headers
 
-
 PING_URL = "http://localhost:8080/ping"
 INVOCATION_URL = "http://localhost:8080/models/{}/invoke"
-MODEL_NAME = "half_plus_three"
+MODEL_NAME_1 = "half_plus_three"
+MODEL_NAME_2 = "half_plus_two"
 
 
 @pytest.fixture(scope="session", autouse=True)
@@ -74,13 +74,22 @@ def container(docker_base_name, tag, runtime_config):
 
 
 @pytest.fixture
-def model():
+def model1():
     model_data = {
-        "model_name": MODEL_NAME,
+        "model_name": MODEL_NAME_1,
         "url": "/opt/ml/models/half_plus_three/model/half_plus_three"
     }
     make_load_model_request(json.dumps(model_data))
-    return MODEL_NAME
+    return MODEL_NAME_1
+
+@pytest.fixture
+def model2():
+    model_data = {
+        "model_name": MODEL_NAME_2,
+        "url": "/opt/ml/models/half_plus_two/model/half_plus_two"
+    }
+    make_load_model_request(json.dumps(model_data))
+    return MODEL_NAME_2
 
 
 @pytest.mark.skip_gpu
@@ -90,20 +99,37 @@ def test_ping_service():
 
 
 @pytest.mark.skip_gpu
-def test_predict_json(model):
+def test_predict_json(model1, model2):
     headers = make_headers()
     data = "{\"instances\": [1.0, 2.0, 5.0]}"
-    response = requests.post(INVOCATION_URL.format(model), data=data, headers=headers).json()
-    assert response == {"predictions": [3.5, 4.0, 5.5]}
+    response1 = requests.post(INVOCATION_URL.format(model1), data=data, headers=headers).json()
+    print("Response 1:")
+    print(response1)
+    assert response1 == {"predictions": [3.5, 4.0, 5.5]}
+    response2 = requests.post(INVOCATION_URL.format(model2), data=data, headers=headers).json()
+    print("Response 2:")
+    print(response2)
+    assert response2 == {"predictions": [2.5, 3.0, 4.5]}
 
 
 @pytest.mark.skip_gpu
 def test_zero_content():
     headers = make_headers()
     x = ""
-    response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers)
-    assert 500 == response.status_code
-    assert "document is empty" in response.text
+    response1 = requests.post(INVOCATION_URL.format(MODEL_NAME_1), data=x, headers=headers)
+    print("Response 1 status code:")
+    print(response1.status_code)
+    print("Response 1 text:")
+    print(response1.text)
+    assert 500 == response1.status_code
+    assert "document is empty" in response1.text
+    response2 = requests.post(INVOCATION_URL.format(MODEL_NAME_2), data=x, headers=headers)
+    print("Response 2 status code:")
+    print(response2.status_code)
+    print("Response 2 text:")
+    print(response2.text)
+    assert 500 == response2.status_code
+    assert "document is empty" in response2.text
 
 
 @pytest.mark.skip_gpu
@@ -113,34 +139,66 @@ def test_large_input():
     with open(data_file, "r") as file:
         x = file.read()
         headers = make_headers(content_type="text/csv")
-        response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=x, headers=headers).json()
-        predictions = response["predictions"]
-        assert len(predictions) == 753936
+        response1 = requests.post(INVOCATION_URL.format(MODEL_NAME_1), data=x, headers=headers).json()
+        predictions1 = response1["predictions"]
+        print("Response 1:")
+        print(response1)
+        assert len(predictions1) == 753936
+        response2 = requests.post(INVOCATION_URL.format(MODEL_NAME_2), data=x, headers=headers).json()
+        print("Response 2:")
+        print(response2)
+        predictions2 = response2["predictions"]
+        assert len(predictions2) == 753936
 
 
 @pytest.mark.skip_gpu
 def test_csv_input():
     headers = make_headers(content_type="text/csv")
     data = "1.0,2.0,5.0"
-    response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers).json()
-    assert response == {"predictions": [3.5, 4.0, 5.5]}
+    response1 = requests.post(INVOCATION_URL.format(MODEL_NAME_1), data=data, headers=headers).json()
+    print("Response 1:")
+    print(response1)
+    assert response1 == {"predictions": [3.5, 4.0, 5.5]}
+    response2 = requests.post(INVOCATION_URL.format(MODEL_NAME_2), data=data, headers=headers).json()
+    print("Response 2:")
+    print(response2)
+    assert response2 == {"predictions": [2.5, 3.0, 4.5]}
 
 
 @pytest.mark.skip_gpu
 def test_specific_versions():
     for version in ("123", "124"):
         headers = make_headers(content_type="text/csv", version=version)
         data = "1.0,2.0,5.0"
-        response = requests.post(
-            INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers
+        response1 = requests.post(
+            INVOCATION_URL.format(MODEL_NAME_1), data=data, headers=headers
+        ).json()
+        print("Response 1")
+        print(response1)
+        assert response1 == {"predictions": [3.5, 4.0, 5.5]}
+        response2 = requests.post(
+            INVOCATION_URL.format(MODEL_NAME_2), data=data, headers=headers
         ).json()
-        assert response == {"predictions": [3.5, 4.0, 5.5]}
+        print("Response 2:")
+        print(response2)
+        assert response2 == {"predictions": [2.5, 3.0, 4.5]}
 
 
 @pytest.mark.skip_gpu
 def test_unsupported_content_type():
     headers = make_headers("unsupported-type", "predict")
     data = "aW1hZ2UgYnl0ZXM="
-    response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers)
-    assert 500 == response.status_code
-    assert "unsupported content type" in response.text
+    response1 = requests.post(INVOCATION_URL.format(MODEL_NAME_1), data=data, headers=headers)
+    print("Response 1 status code:")
+    print(response1.status_code)
+    print("Response 1 text:")
+    print(response1.text)
+    assert 500 == response1.status_code
+    assert "unsupported content type" in response1.text
+    response2 = requests.post(INVOCATION_URL.format(MODEL_NAME_2), data=data, headers=headers)
+    print("Response 2 status code:")
+    print(response2.status_code)
+    print("Response 2 text:")
+    print(response2.text)
+    assert 500 == response2.status_code
+    assert "unsupported content type" in response2.text