Merge remote-tracking branch 'origin' into feat/jumpstart-instance-types

evakravi · evakravi · commit db599df0bb93 · 2023-03-25T23:24:18.000Z
diff --git a/src/sagemaker/jumpstart/artifacts.py b/src/sagemaker/jumpstart/artifacts.py
@@ -179,7 +179,7 @@ def _retrieve_image_uri(
 def _retrieve_model_uri(
     model_id: str,
     model_version: str,
-    model_scope: Optional[str],
+    model_scope: Optional[str] = None,
     region: Optional[str] = None,
     tolerate_vulnerable_model: bool = False,
     tolerate_deprecated_model: bool = False,
@@ -225,7 +225,11 @@ def _retrieve_model_uri(
     )
 
     if model_scope == JumpStartScriptScope.INFERENCE:
-        model_artifact_key = model_specs.hosting_artifact_key
+        model_artifact_key = (
+            getattr(model_specs, "hosting_prepacked_artifact_key", None)
+            or model_specs.hosting_artifact_key
+        )
+
     elif model_scope == JumpStartScriptScope.TRAINING:
         model_artifact_key = model_specs.training_artifact_key
 
diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py
@@ -297,6 +297,7 @@ class JumpStartModelSpecs(JumpStartDataHolderType):
         "default_training_instance_type",
         "supported_training_instance_types",
         "metrics",
+        "hosting_prepacked_artifact_key",
     ]
 
     def __init__(self, spec: Dict[str, Any]):
@@ -346,6 +347,9 @@ def from_json(self, json_obj: Dict[str, Any]) -> None:
             "supported_training_instance_types"
         )
         self.metrics: Optional[List[Dict[str, str]]] = json_obj.get("metrics", None)
+        self.hosting_prepacked_artifact_key: Optional[str] = json_obj.get(
+            "hosting_prepacked_artifact_key", None
+        )
 
         if self.training_supported:
             self.training_ecr_specs: JumpStartECRSpecs = JumpStartECRSpecs(
diff --git a/tests/unit/sagemaker/jumpstart/constants.py b/tests/unit/sagemaker/jumpstart/constants.py
@@ -118,18 +118,110 @@
                 "scope": "container",
             },
         ],
+        "default_inference_instance_type": "",
+        "supported_inference_instance_types": None,
+        "default_training_instance_type": None,
+        "supported_training_instance_types": [],
+        "inference_vulnerable": False,
+        "inference_dependencies": [],
+        "inference_vulnerabilities": [],
+        "training_vulnerable": False,
+        "training_dependencies": [],
+        "training_vulnerabilities": [],
+        "deprecated": False,
+        "metrics": [],
+    },
+    "huggingface-text2text-flan-t5-xxl-fp16": {
+        "model_id": "huggingface-text2text-flan-t5-xxl-fp16",
+        "url": "https://huggingface.co/google/flan-t5-xxl",
+        "version": "1.0.0",
+        "min_sdk_version": "2.130.0",
+        "training_supported": False,
+        "incremental_training_supported": False,
+        "hosting_ecr_specs": {
+            "framework": "pytorch",
+            "framework_version": "1.12.0",
+            "py_version": "py38",
+            "huggingface_transformers_version": "4.17.0",
+        },
+        "hosting_artifact_key": "huggingface-infer/infer-huggingface-text2text-flan-t5-xxl-fp16.tar.gz",
+        "hosting_script_key": "source-directory-tarballs/huggingface/inference/text2text/v1.0.2/sourcedir.tar.gz",
+        "hosting_prepacked_artifact_key": "huggingface-infer/prepack/v1.0.0/infer-prepack-huggingface-"
+        "text2text-flan-t5-xxl-fp16.tar.gz",
+        "hosting_prepacked_artifact_version": "1.0.0",
+        "inference_vulnerable": False,
+        "inference_dependencies": [
+            "accelerate==0.16.0",
+            "bitsandbytes==0.37.0",
+            "filelock==3.9.0",
+            "huggingface-hub==0.12.0",
+            "regex==2022.7.9",
+            "tokenizers==0.13.2",
+            "transformers==4.26.0",
+        ],
+        "inference_vulnerabilities": [],
+        "training_vulnerable": False,
+        "training_dependencies": [],
+        "training_vulnerabilities": [],
+        "deprecated": False,
+        "inference_environment_variables": [
+            {
+                "name": "SAGEMAKER_PROGRAM",
+                "type": "text",
+                "default": "inference.py",
+                "scope": "container",
+            },
+            {
+                "name": "SAGEMAKER_SUBMIT_DIRECTORY",
+                "type": "text",
+                "default": "/opt/ml/model/code",
+                "scope": "container",
+            },
+            {
+                "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
+                "type": "text",
+                "default": "20",
+                "scope": "container",
+            },
+            {
+                "name": "MODEL_CACHE_ROOT",
+                "type": "text",
+                "default": "/opt/ml/model",
+                "scope": "container",
+            },
+            {"name": "SAGEMAKER_ENV", "type": "text", "default": "1", "scope": "container"},
+            {
+                "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
+                "type": "text",
+                "default": "1",
+                "scope": "container",
+            },
+            {
+                "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
+                "type": "text",
+                "default": "3600",
+                "scope": "container",
+            },
+        ],
         "inference_vulnerable": False,
         "inference_dependencies": [],
         "inference_vulnerabilities": [],
         "training_vulnerable": False,
         "training_dependencies": [],
         "training_vulnerabilities": [],
         "deprecated": False,
-        "default_inference_instance_type": "",
-        "supported_inference_instance_types": None,
         "default_training_instance_type": None,
         "supported_training_instance_types": [],
-    }
+        "metrics": [],
+        "default_inference_instance_type": "ml.g5.12xlarge",
+        "supported_inference_instance_types": [
+            "ml.g5.12xlarge",
+            "ml.g5.24xlarge",
+            "ml.p3.8xlarge",
+            "ml.p3.16xlarge",
+            "ml.g4dn.12xlarge",
+        ],
+    },
 }
 
 
@@ -1214,6 +1306,7 @@
     "training_artifact_key": "pytorch-training/train-pytorch-ic-mobilenet-v2.tar.gz",
     "hosting_script_key": "source-directory-tarballs/pytorch/inference/ic/v1.0.0/sourcedir.tar.gz",
     "training_script_key": "source-directory-tarballs/pytorch/transfer_learning/ic/v1.0.0/sourcedir.tar.gz",
+    "hosting_prepacked_artifact_key": None,
     "hyperparameters": [
         {
             "name": "epochs",
diff --git a/tests/unit/sagemaker/model_uris/jumpstart/test_combined_artifact.py b/tests/unit/sagemaker/model_uris/jumpstart/test_combined_artifact.py
@@ -0,0 +1,38 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+from mock.mock import patch
+
+from sagemaker import model_uris
+
+from tests.unit.sagemaker.jumpstart.utils import get_special_model_spec
+
+
+@patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs")
+def test_jumpstart_combined_artifacts(patched_get_model_specs):
+
+    patched_get_model_specs.side_effect = get_special_model_spec
+
+    model_id_combined_model_artifact = "huggingface-text2text-flan-t5-xxl-fp16"
+
+    uri = model_uris.retrieve(
+        region="us-west-2",
+        model_scope="inference",
+        model_id=model_id_combined_model_artifact,
+        model_version="*",
+    )
+    assert (
+        uri == "s3://jumpstart-cache-prod-us-west-2/huggingface-infer/"
+        "prepack/v1.0.0/infer-prepack-huggingface-text2text-flan-t5-xxl-fp16.tar.gz"
+    )