aws · liujiaorr · Apr 25, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 24, 2024
@@ -15,7 +15,7 @@
 from __future__ import absolute_import
 
 from functools import lru_cache
-from typing import Dict, List, Optional, Union, Any
+from typing import Dict, List, Optional, Any, Union
 import pandas as pd
 from botocore.exceptions import ClientError
 
@@ -441,14 +441,23 @@ def set_deployment_config(self, config_name: Optional[str]) -> None:
             model_id=self.model_id, model_version=self.model_version, config_name=config_name
         )
 
+    @property
+    def deployment_config(self) -> Optional[Dict[str, Any]]:
+        """The deployment config to apply to the model.
+
+        Returns:
+            Union[Dict[str, Any], None]: Deployment config to apply to this model.
+        """
+        return self._retrieve_selected_deployment_config(self.config_name)
+
     @property
     def benchmark_metrics(self) -> pd.DataFrame:
         """Benchmark Metrics for deployment configs
 
         Returns:
             Metrics: Pandas DataFrame object.
         """
-        return pd.DataFrame(self._get_benchmark_data(self.config_name))
+        return pd.DataFrame(self._get_benchmarks_data(self.config_name))
 
     def display_benchmark_metrics(self) -> None:
         """Display Benchmark Metrics for deployment configs."""
@@ -851,7 +860,7 @@ def register_deploy_wrapper(*args, **kwargs):
         return model_package
 
     @lru_cache
-    def _get_benchmark_data(self, config_name: str) -> Dict[str, List[str]]:
+    def _get_benchmarks_data(self, config_name: str) -> Dict[str, List[str]]:
         """Constructs deployment configs benchmark data.
 
         Args:
@@ -864,6 +873,23 @@ def _get_benchmark_data(self, config_name: str) -> Dict[str, List[str]]:
             config_name,
         )
 
+    @lru_cache
+    def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dict[str, Any]]:
+        """Retrieve the deployment config to apply to the model.
+
+        Args:
+            config_name (str): The name of the selected deployment config.
+        Returns:
+            Union[Dict[str, Any], None]: The deployment config to apply to the model.
+        """
+        if config_name is None:
+            return None
+
+        for deployment_config in self._deployment_configs:
+            if deployment_config.get("DeploymentConfigName") == config_name:
+                return deployment_config
+        return None
+
     def _convert_to_deployment_config_metadata(
         self, config_name: str, metadata_config: JumpStartMetadataConfig
     ) -> Dict[str, Any]:

@@ -2249,17 +2249,17 @@ def to_json(self) -> Dict[str, Any]:
         return json_obj
 
 
-class DeploymentConfig(BaseDeploymentConfigDataHolder):
+class DeploymentArgs(BaseDeploymentConfigDataHolder):
     """Dataclass representing a Deployment Config."""
 
     __slots__ = [
-        "model_data_download_timeout",
-        "container_startup_health_check_timeout",
         "image_uri",
         "model_data",
-        "instance_type",
         "environment",
+        "instance_type",
         "compute_resource_requirements",
+        "model_data_download_timeout",
+        "container_startup_health_check_timeout",
     ]
 
     def __init__(
@@ -2286,9 +2286,11 @@ class DeploymentConfigMetadata(BaseDeploymentConfigDataHolder):
     """Dataclass representing a Deployment Config Metadata"""
 
     __slots__ = [
-        "config_name",
+        "version",
+        "deployment_config_name",
+        "deployment_args",
+        "acceleration_configs",
         "benchmark_metrics",
-        "deployment_config",
     ]
 
     def __init__(
@@ -2299,6 +2301,8 @@ def __init__(
         deploy_kwargs: JumpStartModelDeployKwargs,
     ):
         """Instantiates DeploymentConfigMetadata object."""
-        self.config_name = config_name
+        self.version = "1.0.0"
+        self.deployment_config_name = config_name
+        self.deployment_args = DeploymentArgs(init_kwargs, deploy_kwargs)
+        self.acceleration_configs = None
         self.benchmark_metrics = benchmark_metrics
-        self.deployment_config = DeploymentConfig(init_kwargs, deploy_kwargs)
@@ -1011,24 +1011,40 @@ def extract_metrics_from_deployment_configs(
         config_name (str): The name of the deployment config use by the model.
     """
 
-    data = {"Config Name": [], "Instance Type": [], "Selected": []}
+    data = {"Config Name": [], "Instance Type": [], "Selected": [], "Accelerated": []}
 
     for index, deployment_config in enumerate(deployment_configs):
-        if deployment_config.get("DeploymentConfig") is None:
+        if deployment_config.get("DeploymentArgs") is None:
             continue
 
         benchmark_metrics = deployment_config.get("BenchmarkMetrics")
         if benchmark_metrics is not None:
-            data["Config Name"].append(deployment_config.get("ConfigName"))
+            data["Config Name"].append(deployment_config.get("DeploymentConfigName"))
             data["Instance Type"].append(
-                deployment_config.get("DeploymentConfig").get("InstanceType")
+                deployment_config.get("DeploymentArgs").get("InstanceType")
             )
             data["Selected"].append(
                 "Yes"
-                if (config_name is not None and config_name == deployment_config.get("ConfigName"))
+                if (
+                    config_name is not None
+                    and config_name == deployment_config.get("DeploymentConfigName")
+                )
                 else "No"
             )
 
+            accelerated_configs = deployment_config.get("AccelerationConfigs")
+            if accelerated_configs is None:
+                data["Accelerated"].append("No")
+            else:
+                data["Accelerated"].append(
+                    "Yes"
+                    if (
+                        len(accelerated_configs) > 0
+                        and accelerated_configs[0].get("Enabled", False)
+                    )
+                    else "No"
+                )
+
             if index == 0:
                 for benchmark_metric in benchmark_metrics:
                     column_name = f"{benchmark_metric.get('name')} ({benchmark_metric.get('unit')})"

@@ -16,7 +16,7 @@
 import copy
 from abc import ABC, abstractmethod
 from datetime import datetime, timedelta
-from typing import Type, Any, List, Dict
+from typing import Type, Any, List, Dict, Optional
 import logging
 
 from sagemaker.model import Model
@@ -431,25 +431,47 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800):
             sharded_supported=sharded_supported, max_tuning_duration=max_tuning_duration
         )
 
+    def set_deployment_config(self, config_name: Optional[str]) -> None:
+        """Sets the deployment config to apply to the model.
+
+        Args:
+            config_name (Optional[str]):
+                The name of the deployment config. Set to None to unset
+                any existing config that is applied to the model.
+        """
+        self.pysdk_model.set_deployment_config(config_name)
+
+    def get_deployment_config(self) -> Optional[Dict[str, Any]]:
+        """Gets the deployment config to apply to the model.
+
+        Returns:
+            Union[Dict[str, Any], None]: Deployment config to apply to this model.
+        """
+        return getattr(self, "pysdk_model", self._create_pre_trained_js_model()).deployment_config
+
     def display_benchmark_metrics(self):
         """Display Markdown Benchmark Metrics for deployment configs."""
-        self.pysdk_model.display_benchmark_metrics()
+        getattr(
+            self, "pysdk_model", self._create_pre_trained_js_model()
+        ).display_benchmark_metrics()
 
     def list_deployment_configs(self) -> List[Dict[str, Any]]:
         """List deployment configs for ``This`` model in the current region.
 
         Returns:
             List[Dict[str, Any]]: A list of deployment configs.
         """
-        return self.pysdk_model.list_deployment_configs()
+        return getattr(
+            self, "pysdk_model", self._create_pre_trained_js_model()
+        ).list_deployment_configs()
 
     def _build_for_jumpstart(self):
         """Placeholder docstring"""
         # we do not pickle for jumpstart. set to none
         self.secret_key = None
         self.jumpstart = True
 
-        pysdk_model = self._create_pre_trained_js_model()
+        pysdk_model = getattr(self, "pysdk_model", self._create_pre_trained_js_model())
 
         image_uri = pysdk_model.image_uri
 

@@ -7911,11 +7911,9 @@
 
 DEPLOYMENT_CONFIGS = [
     {
-        "ConfigName": "neuron-inference",
-        "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
-        "DeploymentConfig": {
-            "ModelDataDownloadTimeout": None,
-            "ContainerStartupHealthCheckTimeout": None,
+        "Version": "1.0.0",
+        "DeploymentConfigName": "neuron-inference",
+        "DeploymentArgs": {
             "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4"
             ".0-gpu-py310-cu121-ubuntu20.04",
             "ModelData": {
@@ -7926,7 +7924,6 @@
                     "CompressionType": "None",
                 }
             },
-            "InstanceType": "ml.p2.xlarge",
             "Environment": {
                 "SAGEMAKER_PROGRAM": "inference.py",
                 "ENDPOINT_SERVER_TIMEOUT": "3600",
@@ -7938,15 +7935,18 @@
                 "MAX_TOTAL_TOKENS": "2048",
                 "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
             },
+            "InstanceType": "ml.p2.xlarge",
             "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None},
+            "ModelDataDownloadTimeout": None,
+            "ContainerStartupHealthCheckTimeout": None,
         },
+        "AccelerationConfigs": None,
+        "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
     },
     {
-        "ConfigName": "neuron-inference-budget",
-        "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
-        "DeploymentConfig": {
-            "ModelDataDownloadTimeout": None,
-            "ContainerStartupHealthCheckTimeout": None,
+        "Version": "1.0.0",
+        "DeploymentConfigName": "neuron-inference-budget",
+        "DeploymentArgs": {
             "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4"
             ".0-gpu-py310-cu121-ubuntu20.04",
             "ModelData": {
@@ -7957,7 +7957,6 @@
                     "CompressionType": "None",
                 }
             },
-            "InstanceType": "ml.p2.xlarge",
             "Environment": {
                 "SAGEMAKER_PROGRAM": "inference.py",
                 "ENDPOINT_SERVER_TIMEOUT": "3600",
@@ -7969,15 +7968,18 @@
                 "MAX_TOTAL_TOKENS": "2048",
                 "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
             },
+            "InstanceType": "ml.p2.xlarge",
             "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None},
+            "ModelDataDownloadTimeout": None,
+            "ContainerStartupHealthCheckTimeout": None,
         },
+        "AccelerationConfigs": None,
+        "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
     },
     {
-        "ConfigName": "gpu-inference-budget",
-        "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
-        "DeploymentConfig": {
-            "ModelDataDownloadTimeout": None,
-            "ContainerStartupHealthCheckTimeout": None,
+        "Version": "1.0.0",
+        "DeploymentConfigName": "gpu-inference-budget",
+        "DeploymentArgs": {
             "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4"
             ".0-gpu-py310-cu121-ubuntu20.04",
             "ModelData": {
@@ -7988,7 +7990,6 @@
                     "CompressionType": "None",
                 }
             },
-            "InstanceType": "ml.p2.xlarge",
             "Environment": {
                 "SAGEMAKER_PROGRAM": "inference.py",
                 "ENDPOINT_SERVER_TIMEOUT": "3600",
@@ -8000,15 +8001,18 @@
                 "MAX_TOTAL_TOKENS": "2048",
                 "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
             },
+            "InstanceType": "ml.p2.xlarge",
             "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None},
+            "ModelDataDownloadTimeout": None,
+            "ContainerStartupHealthCheckTimeout": None,
         },
+        "AccelerationConfigs": None,
+        "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
     },
     {
-        "ConfigName": "gpu-inference",
-        "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
-        "DeploymentConfig": {
-            "ModelDataDownloadTimeout": None,
-            "ContainerStartupHealthCheckTimeout": None,
+        "Version": "1.0.0",
+        "DeploymentConfigName": "gpu-inference",
+        "DeploymentArgs": {
             "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4"
             ".0-gpu-py310-cu121-ubuntu20.04",
             "ModelData": {
@@ -8019,7 +8023,6 @@
                     "CompressionType": "None",
                 }
             },
-            "InstanceType": "ml.p2.xlarge",
             "Environment": {
                 "SAGEMAKER_PROGRAM": "inference.py",
                 "ENDPOINT_SERVER_TIMEOUT": "3600",
@@ -8031,8 +8034,13 @@
                 "MAX_TOTAL_TOKENS": "2048",
                 "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
             },
+            "InstanceType": "ml.p2.xlarge",
             "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None},
+            "ModelDataDownloadTimeout": None,
+            "ContainerStartupHealthCheckTimeout": None,
         },
+        "AccelerationConfigs": None,
+        "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
     },
 ]