Skip to content

ModelBuilder: Add functionalities to get and set deployment config. #4614

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 25, 2024
32 changes: 29 additions & 3 deletions src/sagemaker/jumpstart/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from __future__ import absolute_import

from functools import lru_cache
from typing import Dict, List, Optional, Union, Any
from typing import Dict, List, Optional, Any, Union
import pandas as pd
from botocore.exceptions import ClientError

Expand Down Expand Up @@ -441,14 +441,23 @@ def set_deployment_config(self, config_name: Optional[str]) -> None:
model_id=self.model_id, model_version=self.model_version, config_name=config_name
)

@property
def deployment_config(self) -> Optional[Dict[str, Any]]:
"""The deployment config to apply to the model.

Returns:
Union[Dict[str, Any], None]: Deployment config to apply to this model.
"""
return self._retrieve_selected_deployment_config(self.config_name)

@property
def benchmark_metrics(self) -> pd.DataFrame:
"""Benchmark Metrics for deployment configs

Returns:
Metrics: Pandas DataFrame object.
"""
return pd.DataFrame(self._get_benchmark_data(self.config_name))
return pd.DataFrame(self._get_benchmarks_data(self.config_name))

def display_benchmark_metrics(self) -> None:
"""Display Benchmark Metrics for deployment configs."""
Expand Down Expand Up @@ -851,7 +860,7 @@ def register_deploy_wrapper(*args, **kwargs):
return model_package

@lru_cache
def _get_benchmark_data(self, config_name: str) -> Dict[str, List[str]]:
def _get_benchmarks_data(self, config_name: str) -> Dict[str, List[str]]:
"""Constructs deployment configs benchmark data.

Args:
Expand All @@ -864,6 +873,23 @@ def _get_benchmark_data(self, config_name: str) -> Dict[str, List[str]]:
config_name,
)

@lru_cache
def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dict[str, Any]]:
"""Retrieve the deployment config to apply to the model.

Args:
config_name (str): The name of the selected deployment config.
Returns:
Union[Dict[str, Any], None]: The deployment config to apply to the model.
"""
if config_name is None:
return None

for deployment_config in self._deployment_configs:
if deployment_config.get("DeploymentConfigName") == config_name:
return deployment_config
return None

def _convert_to_deployment_config_metadata(
self, config_name: str, metadata_config: JumpStartMetadataConfig
) -> Dict[str, Any]:
Expand Down
20 changes: 12 additions & 8 deletions src/sagemaker/jumpstart/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2249,17 +2249,17 @@ def to_json(self) -> Dict[str, Any]:
return json_obj


class DeploymentConfig(BaseDeploymentConfigDataHolder):
class DeploymentArgs(BaseDeploymentConfigDataHolder):
"""Dataclass representing a Deployment Config."""

__slots__ = [
"model_data_download_timeout",
"container_startup_health_check_timeout",
"image_uri",
"model_data",
"instance_type",
"environment",
"instance_type",
"compute_resource_requirements",
"model_data_download_timeout",
"container_startup_health_check_timeout",
]

def __init__(
Expand All @@ -2286,9 +2286,11 @@ class DeploymentConfigMetadata(BaseDeploymentConfigDataHolder):
"""Dataclass representing a Deployment Config Metadata"""

__slots__ = [
"config_name",
"version",
"deployment_config_name",
"deployment_args",
"acceleration_configs",
"benchmark_metrics",
"deployment_config",
]

def __init__(
Expand All @@ -2299,6 +2301,8 @@ def __init__(
deploy_kwargs: JumpStartModelDeployKwargs,
):
"""Instantiates DeploymentConfigMetadata object."""
self.config_name = config_name
self.version = "1.0.0"
self.deployment_config_name = config_name
self.deployment_args = DeploymentArgs(init_kwargs, deploy_kwargs)
self.acceleration_configs = None
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will complete this as a follow-up.

self.benchmark_metrics = benchmark_metrics
self.deployment_config = DeploymentConfig(init_kwargs, deploy_kwargs)
26 changes: 21 additions & 5 deletions src/sagemaker/jumpstart/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1011,24 +1011,40 @@ def extract_metrics_from_deployment_configs(
config_name (str): The name of the deployment config use by the model.
"""

data = {"Config Name": [], "Instance Type": [], "Selected": []}
data = {"Config Name": [], "Instance Type": [], "Selected": [], "Accelerated": []}

for index, deployment_config in enumerate(deployment_configs):
if deployment_config.get("DeploymentConfig") is None:
if deployment_config.get("DeploymentArgs") is None:
continue

benchmark_metrics = deployment_config.get("BenchmarkMetrics")
if benchmark_metrics is not None:
data["Config Name"].append(deployment_config.get("ConfigName"))
data["Config Name"].append(deployment_config.get("DeploymentConfigName"))
data["Instance Type"].append(
deployment_config.get("DeploymentConfig").get("InstanceType")
deployment_config.get("DeploymentArgs").get("InstanceType")
)
data["Selected"].append(
"Yes"
if (config_name is not None and config_name == deployment_config.get("ConfigName"))
if (
config_name is not None
and config_name == deployment_config.get("DeploymentConfigName")
)
else "No"
)

accelerated_configs = deployment_config.get("AccelerationConfigs")
if accelerated_configs is None:
data["Accelerated"].append("No")
else:
data["Accelerated"].append(
"Yes"
if (
len(accelerated_configs) > 0
and accelerated_configs[0].get("Enabled", False)
)
else "No"
)

if index == 0:
for benchmark_metric in benchmark_metrics:
column_name = f"{benchmark_metric.get('name')} ({benchmark_metric.get('unit')})"
Expand Down
30 changes: 26 additions & 4 deletions src/sagemaker/serve/builder/jumpstart_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import copy
from abc import ABC, abstractmethod
from datetime import datetime, timedelta
from typing import Type, Any, List, Dict
from typing import Type, Any, List, Dict, Optional
import logging

from sagemaker.model import Model
Expand Down Expand Up @@ -431,25 +431,47 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800):
sharded_supported=sharded_supported, max_tuning_duration=max_tuning_duration
)

def set_deployment_config(self, config_name: Optional[str]) -> None:
"""Sets the deployment config to apply to the model.

Args:
config_name (Optional[str]):
The name of the deployment config. Set to None to unset
any existing config that is applied to the model.
"""
self.pysdk_model.set_deployment_config(config_name)

def get_deployment_config(self) -> Optional[Dict[str, Any]]:
"""Gets the deployment config to apply to the model.

Returns:
Union[Dict[str, Any], None]: Deployment config to apply to this model.
"""
return getattr(self, "pysdk_model", self._create_pre_trained_js_model()).deployment_config

def display_benchmark_metrics(self):
"""Display Markdown Benchmark Metrics for deployment configs."""
self.pysdk_model.display_benchmark_metrics()
getattr(
self, "pysdk_model", self._create_pre_trained_js_model()
).display_benchmark_metrics()

def list_deployment_configs(self) -> List[Dict[str, Any]]:
"""List deployment configs for ``This`` model in the current region.

Returns:
List[Dict[str, Any]]: A list of deployment configs.
"""
return self.pysdk_model.list_deployment_configs()
return getattr(
self, "pysdk_model", self._create_pre_trained_js_model()
).list_deployment_configs()

def _build_for_jumpstart(self):
"""Placeholder docstring"""
# we do not pickle for jumpstart. set to none
self.secret_key = None
self.jumpstart = True

pysdk_model = self._create_pre_trained_js_model()
pysdk_model = getattr(self, "pysdk_model", self._create_pre_trained_js_model())

image_uri = pysdk_model.image_uri

Expand Down
56 changes: 32 additions & 24 deletions tests/unit/sagemaker/jumpstart/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7911,11 +7911,9 @@

DEPLOYMENT_CONFIGS = [
{
"ConfigName": "neuron-inference",
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
"DeploymentConfig": {
"ModelDataDownloadTimeout": None,
"ContainerStartupHealthCheckTimeout": None,
"Version": "1.0.0",
"DeploymentConfigName": "neuron-inference",
"DeploymentArgs": {
"ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4"
".0-gpu-py310-cu121-ubuntu20.04",
"ModelData": {
Expand All @@ -7926,7 +7924,6 @@
"CompressionType": "None",
}
},
"InstanceType": "ml.p2.xlarge",
"Environment": {
"SAGEMAKER_PROGRAM": "inference.py",
"ENDPOINT_SERVER_TIMEOUT": "3600",
Expand All @@ -7938,15 +7935,18 @@
"MAX_TOTAL_TOKENS": "2048",
"SAGEMAKER_MODEL_SERVER_WORKERS": "1",
},
"InstanceType": "ml.p2.xlarge",
"ComputeResourceRequirements": {"MinMemoryRequiredInMb": None},
"ModelDataDownloadTimeout": None,
"ContainerStartupHealthCheckTimeout": None,
},
"AccelerationConfigs": None,
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
},
{
"ConfigName": "neuron-inference-budget",
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
"DeploymentConfig": {
"ModelDataDownloadTimeout": None,
"ContainerStartupHealthCheckTimeout": None,
"Version": "1.0.0",
"DeploymentConfigName": "neuron-inference-budget",
"DeploymentArgs": {
"ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4"
".0-gpu-py310-cu121-ubuntu20.04",
"ModelData": {
Expand All @@ -7957,7 +7957,6 @@
"CompressionType": "None",
}
},
"InstanceType": "ml.p2.xlarge",
"Environment": {
"SAGEMAKER_PROGRAM": "inference.py",
"ENDPOINT_SERVER_TIMEOUT": "3600",
Expand All @@ -7969,15 +7968,18 @@
"MAX_TOTAL_TOKENS": "2048",
"SAGEMAKER_MODEL_SERVER_WORKERS": "1",
},
"InstanceType": "ml.p2.xlarge",
"ComputeResourceRequirements": {"MinMemoryRequiredInMb": None},
"ModelDataDownloadTimeout": None,
"ContainerStartupHealthCheckTimeout": None,
},
"AccelerationConfigs": None,
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
},
{
"ConfigName": "gpu-inference-budget",
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
"DeploymentConfig": {
"ModelDataDownloadTimeout": None,
"ContainerStartupHealthCheckTimeout": None,
"Version": "1.0.0",
"DeploymentConfigName": "gpu-inference-budget",
"DeploymentArgs": {
"ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4"
".0-gpu-py310-cu121-ubuntu20.04",
"ModelData": {
Expand All @@ -7988,7 +7990,6 @@
"CompressionType": "None",
}
},
"InstanceType": "ml.p2.xlarge",
"Environment": {
"SAGEMAKER_PROGRAM": "inference.py",
"ENDPOINT_SERVER_TIMEOUT": "3600",
Expand All @@ -8000,15 +8001,18 @@
"MAX_TOTAL_TOKENS": "2048",
"SAGEMAKER_MODEL_SERVER_WORKERS": "1",
},
"InstanceType": "ml.p2.xlarge",
"ComputeResourceRequirements": {"MinMemoryRequiredInMb": None},
"ModelDataDownloadTimeout": None,
"ContainerStartupHealthCheckTimeout": None,
},
"AccelerationConfigs": None,
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
},
{
"ConfigName": "gpu-inference",
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
"DeploymentConfig": {
"ModelDataDownloadTimeout": None,
"ContainerStartupHealthCheckTimeout": None,
"Version": "1.0.0",
"DeploymentConfigName": "gpu-inference",
"DeploymentArgs": {
"ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4"
".0-gpu-py310-cu121-ubuntu20.04",
"ModelData": {
Expand All @@ -8019,7 +8023,6 @@
"CompressionType": "None",
}
},
"InstanceType": "ml.p2.xlarge",
"Environment": {
"SAGEMAKER_PROGRAM": "inference.py",
"ENDPOINT_SERVER_TIMEOUT": "3600",
Expand All @@ -8031,8 +8034,13 @@
"MAX_TOTAL_TOKENS": "2048",
"SAGEMAKER_MODEL_SERVER_WORKERS": "1",
},
"InstanceType": "ml.p2.xlarge",
"ComputeResourceRequirements": {"MinMemoryRequiredInMb": None},
"ModelDataDownloadTimeout": None,
"ContainerStartupHealthCheckTimeout": None,
},
"AccelerationConfigs": None,
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
},
]

Expand Down
Loading