aws
diff --git a/‎docker/build_artifacts/sagemaker/serve.py
Lines changed: 11 additions & 7 deletions b/‎docker/build_artifacts/sagemaker/serve.py
Lines changed: 11 additions & 7 deletions
diff --git a/‎docker/build_artifacts/sagemaker/tfs_utils.py
Lines changed: 14 additions & 32 deletions b/‎docker/build_artifacts/sagemaker/tfs_utils.py
Lines changed: 14 additions & 32 deletions
diff --git a/‎test/integration/local/multi_model_endpoint_test_utils.py
Lines changed: 9 additions & 10 deletions b/‎test/integration/local/multi_model_endpoint_test_utils.py
Lines changed: 9 additions & 10 deletions
diff --git a/‎test/integration/local/test_container.py
Lines changed: 18 additions & 3 deletions b/‎test/integration/local/test_container.py
Lines changed: 18 additions & 3 deletions
diff --git a/‎test/integration/local/test_multi_model_endpoint.py
Lines changed: 5 additions & 4 deletions b/‎test/integration/local/test_multi_model_endpoint.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎test/integration/local/test_pre_post_processing.py
Lines changed: 15 additions & 4 deletions b/‎test/integration/local/test_pre_post_processing.py
Lines changed: 15 additions & 4 deletions
diff --git a/‎test/integration/local/test_pre_post_processing_mme.py
Lines changed: 11 additions & 0 deletions b/‎test/integration/local/test_pre_post_processing_mme.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎test/integration/local/test_tfs_batching.py
Lines changed: 0 additions & 2 deletions b/‎test/integration/local/test_tfs_batching.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎test/resources/mme/half_plus_three/00000124/assets/foo.txt
Lines changed: 1 addition & 0 deletions b/‎test/resources/mme/half_plus_three/00000124/assets/foo.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/resources/mme/half_plus_three/00000124/saved_model.pb
8.46 KB b/‎test/resources/mme/half_plus_three/00000124/saved_model.pb
8.46 KB
diff --git a/‎test/resources/mme/half_plus_three/00000124/variables/variables.data-00000-of-00001
12 Bytes b/‎test/resources/mme/half_plus_three/00000124/variables/variables.data-00000-of-00001
12 Bytes
diff --git a/‎test/resources/mme/half_plus_three/00000124/variables/variables.index
151 Bytes b/‎test/resources/mme/half_plus_three/00000124/variables/variables.index
151 Bytes
diff --git a/‎test/resources/mme/half_plus_three/abcde/dummy.txt b/‎test/resources/mme/half_plus_three/abcde/dummy.txt
diff --git a/‎test/resources/models/half_plus_three/00000124/assets/foo.txt
Lines changed: 1 addition & 0 deletions b/‎test/resources/models/half_plus_three/00000124/assets/foo.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/resources/models/half_plus_three/00000124/saved_model.pb
8.46 KB b/‎test/resources/models/half_plus_three/00000124/saved_model.pb
8.46 KB
diff --git a/‎test/resources/models/half_plus_three/00000124/variables/variables.data-00000-of-00001
12 Bytes b/‎test/resources/models/half_plus_three/00000124/variables/variables.data-00000-of-00001
12 Bytes
diff --git a/‎test/resources/models/half_plus_three/00000124/variables/variables.index
151 Bytes b/‎test/resources/models/half_plus_three/00000124/variables/variables.index
151 Bytes
@@ -99,15 +99,23 @@ def _create_tfs_config(self):
         config = "model_config_list: {\n"
         for m in models:
             config += "  config: {\n"
-            config += "    name: '{}',\n".format(os.path.basename(m))
-            config += "    base_path: '{}',\n".format(m)
+            config += "    name: '{}'\n".format(os.path.basename(m))
+            config += "    base_path: '{}'\n".format(m)
             config += "    model_platform: 'tensorflow'\n"
+
+            config += "    model_version_policy: {\n"
+            config += "      specific: {\n"
+            for version in tfs_utils.find_model_versions(m):
+                config += "        versions: {}\n".format(version)
+            config += "      }\n"
+            config += "    }\n"
+
             config += "  }\n"
         config += "}\n"
 
         log.info("tensorflow serving model config: \n%s\n", config)
 
-        with open("/sagemaker/model-config.cfg", "w") as f:
+        with open(self._tfs_config_path, "w") as f:
             f.write(config)
 
     def _setup_gunicorn(self):
@@ -259,10 +267,6 @@ def start(self):
         if self._tfs_enable_multi_model_endpoint:
             log.info("multi-model endpoint is enabled, TFS model servers will be started later")
         else:
-            tfs_utils.create_tfs_config(
-                self._tfs_default_model_name,
-                self._tfs_config_path
-            )
             self._create_tfs_config()
             self._start_tfs()
 
 
@@ -77,42 +77,20 @@ def parse_tfs_custom_attributes(req):
 def create_tfs_config_individual_model(model_name, base_path):
     config = "model_config_list: {\n"
     config += "  config: {\n"
-    config += "    name: '{}',\n".format(model_name)
-    config += "    base_path: '{}',\n".format(base_path)
+    config += "    name: '{}'\n".format(model_name)
+    config += "    base_path: '{}'\n".format(base_path)
     config += "    model_platform: 'tensorflow'\n"
-    config += "  }\n"
-    config += "}\n"
-    return config
-
 
-def create_tfs_config(
-        tfs_default_model_name,
-        tfs_config_path,
-):
-    models = find_models()
-    if not models:
-        raise ValueError("no SavedModel bundles found!")
+    config += "    model_version_policy: {\n"
+    config += "      specific: {\n"
+    for version in find_model_versions(base_path):
+        config += "        versions: {}\n".format(version)
+    config += "      }\n"
+    config += "    }\n"
 
-    if tfs_default_model_name == "None":
-        default_model = os.path.basename(models[0])
-        if default_model:
-            tfs_default_model_name = default_model
-            log.info("using default model name: {}".format(tfs_default_model_name))
-        else:
-            log.info("no default model detected")
-
-    # config (may) include duplicate 'config' keys, so we can't just dump a dict
-    config = "model_config_list: {\n"
-    for m in models:
-        config += "  config: {\n"
-        config += "    name: '{}',\n".format(os.path.basename(m))
-        config += "    base_path: '{}',\n".format(m)
-        config += "    model_platform: 'tensorflow'\n"
-        config += "  }\n"
+    config += "  }\n"
     config += "}\n"
-
-    with open(tfs_config_path, 'w') as f:
-        f.write(config)
+    return config
 
 
 def tfs_command(tfs_grpc_port,
@@ -142,6 +120,10 @@ def find_models():
         return models
 
 
+def find_model_versions(model_path):
+    return [version.lstrip("0") for version in os.listdir(model_path)]
+
+
 def _find_saved_model_files(path):
     for e in os.scandir(path):
         if e.is_dir():
 
@@ -11,27 +11,26 @@
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
 
-import json
 import requests
 
 INVOCATION_URL = "http://localhost:8080/models/{}/invoke"
 MODELS_URL = "http://localhost:8080/models"
 DELETE_MODEL_URL = "http://localhost:8080/models/{}"
 
 
-def make_headers(content_type="application/json", method="predict"):
-    headers = {
+def make_headers(content_type="application/json", method="predict", version=None):
+    custom_attributes = "tfs-method={}".format(method)
+    if version:
+        custom_attributes += ",tfs-model-version={}".format(version)
+
+    return {
         "Content-Type": content_type,
-        "X-Amzn-SageMaker-Custom-Attributes": "tfs-method=%s" % method
+        "X-Amzn-SageMaker-Custom-Attributes": custom_attributes,
     }
-    return headers
 
 
-def make_invocation_request(data, model_name, content_type="application/json"):
-    headers = {
-        "Content-Type": content_type,
-        "X-Amzn-SageMaker-Custom-Attributes": "tfs-method=predict"
-    }
+def make_invocation_request(data, model_name, content_type="application/json", version=None):
+    headers = make_headers(content_type=content_type, method="predict", version=version)
     response = requests.post(INVOCATION_URL.format(model_name), data=data, headers=headers)
     return response.status_code, response.content.decode("utf-8")
 
 
@@ -71,11 +71,14 @@ def container(request, docker_base_name, tag, runtime_config):
         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 
 
-def make_request(data, content_type="application/json", method="predict"):
+def make_request(data, content_type="application/json", method="predict", version=None):
+    custom_attributes = "tfs-model-name=half_plus_three,tfs-method={}".format(method)
+    if version:
+        custom_attributes += ",tfs-model-version={}".format(version)
+
     headers = {
         "Content-Type": content_type,
-        "X-Amzn-SageMaker-Custom-Attributes":
-            "tfs-model-name=half_plus_three,tfs-method=%s" % method
+        "X-Amzn-SageMaker-Custom-Attributes": custom_attributes,
     }
     response = requests.post(BASE_URL, data=data, headers=headers)
     return json.loads(response.content.decode("utf-8"))
@@ -101,6 +104,18 @@ def test_predict_twice():
     assert z == {"predictions": [3.5, 4.0, 5.5]}
 
 
+def test_predict_specific_versions():
+    x = {
+        "instances": [1.0, 2.0, 5.0]
+    }
+
+    y = make_request(json.dumps(x), version=123)
+    assert y == {"predictions": [3.5, 4.0, 5.5]}
+
+    y = make_request(json.dumps(x), version=124)
+    assert y == {"predictions": [3.5, 4.0, 5.5]}
+
+
 def test_predict_two_instances():
     x = {
         "instances": [[1.0, 2.0, 5.0], [1.0, 2.0, 5.0]]
 
@@ -165,10 +165,11 @@ def test_load_two_models():
     assert y1 == {"predictions": [2.5, 3.0, 4.5]}
 
     # make invocation request to the second model
-    code_invoke2, y2 = make_invocation_request(json.dumps(x), "half_plus_three")
-    y2 = json.loads(y2)
-    assert code_invoke2 == 200
-    assert y2 == {"predictions": [3.5, 4.0, 5.5]}
+    for ver in ("123", "124"):
+        code_invoke2, y2 = make_invocation_request(json.dumps(x), "half_plus_three", version=ver)
+        y2 = json.loads(y2)
+        assert code_invoke2 == 200
+        assert y2 == {"predictions": [3.5, 4.0, 5.5]}
 
     code_list, res3 = make_list_model_request()
     res3 = json.loads(res3)
 
@@ -77,12 +77,15 @@ def container(volume, docker_base_name, tag, runtime_config):
         subprocess.check_call("docker rm -f sagemaker-tensorflow-serving-test".split())
 
 
-def make_headers(content_type, method):
-    headers = {
+def make_headers(content_type, method, version=None):
+    custom_attributes = "tfs-model-name=half_plus_three,tfs-method={}".format(method)
+    if version:
+        custom_attributes += ",tfs-model-version={}".format(version)
+
+    return {
         "Content-Type": content_type,
-        "X-Amzn-SageMaker-Custom-Attributes": "tfs-model-name=half_plus_three,tfs-method=%s" % method
+        "X-Amzn-SageMaker-Custom-Attributes": custom_attributes,
     }
-    return headers
 
 
 def test_predict_json():
@@ -118,6 +121,14 @@ def test_csv_input():
     assert response == {"predictions": [3.5, 4.0, 5.5]}
 
 
+def test_predict_specific_versions():
+    for version in ("123", "124"):
+        headers = make_headers("application/json", "predict", version=version)
+        data = "{\"instances\": [1.0, 2.0, 5.0]}"
+        response = requests.post(INVOCATIONS_URL, data=data, headers=headers).json()
+        assert response == {"predictions": [3.5, 4.0, 5.5]}
+
+
 def test_unsupported_content_type():
     headers = make_headers("unsupported-type", "predict")
     data = "aW1hZ2UgYnl0ZXM="
 
@@ -135,6 +135,17 @@ def test_csv_input():
     assert response == {"predictions": [3.5, 4.0, 5.5]}
 
 
+@pytest.mark.skip_gpu
+def test_specific_versions():
+    for version in ("123", "124"):
+        headers = make_headers(content_type="text/csv", version=version)
+        data = "1.0,2.0,5.0"
+        response = requests.post(
+            INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers
+        ).json()
+        assert response == {"predictions": [3.5, 4.0, 5.5]}
+
+
 @pytest.mark.skip_gpu
 def test_unsupported_content_type():
     headers = make_headers("unsupported-type", "predict")
 
@@ -13,8 +13,6 @@
 
 import os
 import subprocess
-import sys
-import time
 
 import pytest
 
 
@@ -0,0 +1 @@
+asset-file-contents
@@ -0,0 +1 @@
+asset-file-contents