From 63147a95537ce1b615d187b7a12b2be7d1ff898d Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 24 Apr 2024 14:37:50 -0700 Subject: [PATCH 01/12] Add funtionalities to get and set deployment config --- src/sagemaker/jumpstart/model.py | 32 ++++++- .../serve/builder/jumpstart_builder.py | 20 +++- src/sagemaker/utils.py | 2 +- .../sagemaker/jumpstart/model/test_model.py | 48 ++++++++++ .../serve/builder/test_js_builder.py | 91 ++++++++++++++++++- 5 files changed, 184 insertions(+), 9 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 2addb0a044..638bf7f1bd 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -441,6 +441,15 @@ def set_deployment_config(self, config_name: Optional[str]) -> None: model_id=self.model_id, model_version=self.model_version, config_name=config_name ) + @property + def deployment_config(self) -> Union[Dict[str, Any], None]: + """The deployment config to apply to the model. + + Returns: + Union[Dict[str, Any], None]: Deployment config to apply to this model. + """ + return self._retrieve_selected_deployment_config(self.config_name) + @property def benchmark_metrics(self) -> pd.DataFrame: """Benchmark Metrics for deployment configs @@ -448,7 +457,7 @@ def benchmark_metrics(self) -> pd.DataFrame: Returns: Metrics: Pandas DataFrame object. """ - return pd.DataFrame(self._get_benchmark_data(self.config_name)) + return pd.DataFrame(self._get_benchmarks_data(self.config_name)) def display_benchmark_metrics(self) -> None: """Display Benchmark Metrics for deployment configs.""" @@ -850,8 +859,8 @@ def register_deploy_wrapper(*args, **kwargs): return model_package - @lru_cache - def _get_benchmark_data(self, config_name: str) -> Dict[str, List[str]]: + @lru_cache(typed=True) + def _get_benchmarks_data(self, config_name: str) -> Dict[str, List[str]]: """Constructs deployment configs benchmark data. Args: @@ -864,6 +873,23 @@ def _get_benchmark_data(self, config_name: str) -> Dict[str, List[str]]: config_name, ) + @lru_cache(typed=True) + def _retrieve_selected_deployment_config(self, config_name: str) -> Union[Dict[str, Any], None]: + """Retrieve the deployment config to apply to the model. + + Args: + config_name (str): The name of the selected deployment config. + Returns: + Union[Dict[str, Any], None]: The deployment config to apply to the model. + """ + if config_name is None: + return None + + for deployment_config in self._deployment_configs: + if deployment_config.get("ConfigName") == config_name: + return deployment_config + return None + def _convert_to_deployment_config_metadata( self, config_name: str, metadata_config: JumpStartMetadataConfig ) -> Dict[str, Any]: diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index c1760311e7..78ec08a821 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -16,7 +16,7 @@ import copy from abc import ABC, abstractmethod from datetime import datetime, timedelta -from typing import Type, Any, List, Dict +from typing import Type, Any, List, Dict, Optional, Union import logging from sagemaker.model import Model @@ -431,6 +431,24 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800): sharded_supported=sharded_supported, max_tuning_duration=max_tuning_duration ) + def set_deployment_config(self, config_name: Optional[str]) -> None: + """Sets the deployment config to apply to the model. + + Args: + config_name (Optional[str]): + The name of the deployment config. Set to None to unset + any existing config that is applied to the model. + """ + self.pysdk_model.set_deployment_config(config_name) + + def get_deployment_config(self) -> Union[Dict[str, Any], None]: + """Gets the deployment config to apply to the model. + + Returns: + Union[Dict[str, Any], None]: Deployment config to apply to this model. + """ + return self.pysdk_model.deployment_config + def display_benchmark_metrics(self): """Display Markdown Benchmark Metrics for deployment configs.""" self.pysdk_model.display_benchmark_metrics() diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 35f60b37e1..8a572786bc 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -1660,7 +1660,7 @@ def deep_override_dict( return unflatten_dict(flattened_dict1) if flattened_dict1 else {} -@lru_cache +@lru_cache(typed=True) def get_instance_rate_per_hour( instance_type: str, region: str, diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index 2df904dce2..6145932cc8 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -1765,6 +1765,54 @@ def test_model_list_deployment_configs_empty( self.assertTrue(len(configs) == 0) + @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") + @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") + @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") + @mock.patch("sagemaker.jumpstart.factory.model.Session") + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") + @mock.patch("sagemaker.jumpstart.model.Model.deploy") + @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) + def test_model_retrieve_deployment_config( + self, + mock_model_deploy: mock.Mock, + mock_get_model_specs: mock.Mock, + mock_session: mock.Mock, + mock_get_manifest: mock.Mock, + mock_get_instance_rate_per_hour: mock.Mock, + mock_verify_model_region_and_return_specs: mock.Mock, + mock_get_init_kwargs: mock.Mock, + ): + model_id, _ = "pytorch-eqa-bert-base-cased", "*" + + mock_get_init_kwargs.side_effect = lambda *args, **kwargs: get_mock_init_kwargs(model_id) + mock_verify_model_region_and_return_specs.side_effect = ( + lambda *args, **kwargs: get_base_spec_with_prototype_configs() + ) + mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { + "name": "Instance Rate", + "unit": "USD/Hrs", + "value": "0.0083000000", + } + mock_get_model_specs.side_effect = get_prototype_spec_with_configs + mock_get_manifest.side_effect = ( + lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) + ) + mock_model_deploy.return_value = default_predictor + + mock_session.return_value = sagemaker_session + + model = JumpStartModel(model_id=model_id) + + expected = get_base_deployment_configs()[0] + model.set_deployment_config(expected.get("ConfigName")) + + self.assertEqual(model.deployment_config, expected) + + # Unset + model.set_deployment_config(None) + self.assertIsNone(model.deployment_config) + @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") diff --git a/tests/unit/sagemaker/serve/builder/test_js_builder.py b/tests/unit/sagemaker/serve/builder/test_js_builder.py index 3d5148772e..c60726ccab 100644 --- a/tests/unit/sagemaker/serve/builder/test_js_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_js_builder.py @@ -676,13 +676,96 @@ def test_list_deployment_configs( lambda: DEPLOYMENT_CONFIGS ) - model = builder.build() + builder.build() builder.serve_settings.telemetry_opt_out = True - configs = model.list_deployment_configs() + configs = builder.list_deployment_configs() self.assertEqual(configs, DEPLOYMENT_CONFIGS) + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", + return_value=True, + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", + return_value=MagicMock(), + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", + return_value=({"model_type": "t5", "n_head": 71}, True), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) + @patch( + "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" + ) + def test_get_deployment_config( + self, + mock_get_nb_instance, + mock_get_ram_usage_mb, + mock_prepare_for_tgi, + mock_pre_trained_model, + mock_is_jumpstart_model, + mock_telemetry, + ): + builder = ModelBuilder( + model="facebook/galactica-mock-model-id", + schema_builder=mock_schema_builder, + ) + + mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri + + expected = DEPLOYMENT_CONFIGS[0] + mock_pre_trained_model.return_value.deployment_config = expected + + builder.build() + builder.serve_settings.telemetry_opt_out = True + + self.assertEqual(builder.get_deployment_config(), expected) + + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", + return_value=True, + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", + return_value=MagicMock(), + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", + return_value=({"model_type": "t5", "n_head": 71}, True), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) + @patch( + "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" + ) + def test_set_deployment_config( + self, + mock_get_nb_instance, + mock_get_ram_usage_mb, + mock_prepare_for_tgi, + mock_pre_trained_model, + mock_is_jumpstart_model, + mock_telemetry, + ): + builder = ModelBuilder( + model="facebook/galactica-mock-model-id", + schema_builder=mock_schema_builder, + ) + + mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri + + builder.build() + builder.serve_settings.telemetry_opt_out = True + + builder.set_deployment_config("config_name") + + mock_pre_trained_model.return_value.set_deployment_config.assert_called_once_with( + "config_name" + ) + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) @patch( "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", @@ -719,7 +802,7 @@ def test_display_benchmark_metrics( lambda *args, **kwargs: "metric data" ) - model = builder.build() + builder.build() builder.serve_settings.telemetry_opt_out = True - model.display_benchmark_metrics() + builder.display_benchmark_metrics() From 6520a2b5d48caa7babd7d5802c1fe461ebb22182 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 24 Apr 2024 15:41:16 -0700 Subject: [PATCH 02/12] Resolve PR comments --- src/sagemaker/jumpstart/model.py | 10 +++++----- src/sagemaker/serve/builder/jumpstart_builder.py | 4 ++-- src/sagemaker/utils.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 638bf7f1bd..cd57ecac06 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -15,7 +15,7 @@ from __future__ import absolute_import from functools import lru_cache -from typing import Dict, List, Optional, Union, Any +from typing import Dict, List, Optional, Any, Union import pandas as pd from botocore.exceptions import ClientError @@ -442,7 +442,7 @@ def set_deployment_config(self, config_name: Optional[str]) -> None: ) @property - def deployment_config(self) -> Union[Dict[str, Any], None]: + def deployment_config(self) -> Optional[Dict[str, Any]]: """The deployment config to apply to the model. Returns: @@ -859,7 +859,7 @@ def register_deploy_wrapper(*args, **kwargs): return model_package - @lru_cache(typed=True) + @lru_cache def _get_benchmarks_data(self, config_name: str) -> Dict[str, List[str]]: """Constructs deployment configs benchmark data. @@ -873,8 +873,8 @@ def _get_benchmarks_data(self, config_name: str) -> Dict[str, List[str]]: config_name, ) - @lru_cache(typed=True) - def _retrieve_selected_deployment_config(self, config_name: str) -> Union[Dict[str, Any], None]: + @lru_cache + def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dict[str, Any]]: """Retrieve the deployment config to apply to the model. Args: diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index 78ec08a821..ff2aa97b93 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -16,7 +16,7 @@ import copy from abc import ABC, abstractmethod from datetime import datetime, timedelta -from typing import Type, Any, List, Dict, Optional, Union +from typing import Type, Any, List, Dict, Optional import logging from sagemaker.model import Model @@ -441,7 +441,7 @@ def set_deployment_config(self, config_name: Optional[str]) -> None: """ self.pysdk_model.set_deployment_config(config_name) - def get_deployment_config(self) -> Union[Dict[str, Any], None]: + def get_deployment_config(self) -> Optional[Dict[str, Any]]: """Gets the deployment config to apply to the model. Returns: diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 8a572786bc..35f60b37e1 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -1660,7 +1660,7 @@ def deep_override_dict( return unflatten_dict(flattened_dict1) if flattened_dict1 else {} -@lru_cache(typed=True) +@lru_cache def get_instance_rate_per_hour( instance_type: str, region: str, From a7fced733bf9d7d3079c92c24f8070b570213309 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 24 Apr 2024 16:42:47 -0700 Subject: [PATCH 03/12] ModelBuilder-JS --- .../serve/builder/jumpstart_builder.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index ff2aa97b93..55a0601a5c 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -439,6 +439,9 @@ def set_deployment_config(self, config_name: Optional[str]) -> None: The name of the deployment config. Set to None to unset any existing config that is applied to the model. """ + if self.pysdk_model is None: + self.pysdk_model = self._create_pre_trained_js_model() + self.pysdk_model.set_deployment_config(config_name) def get_deployment_config(self) -> Optional[Dict[str, Any]]: @@ -447,10 +450,16 @@ def get_deployment_config(self) -> Optional[Dict[str, Any]]: Returns: Union[Dict[str, Any], None]: Deployment config to apply to this model. """ + if self.pysdk_model is None: + self.pysdk_model = self._create_pre_trained_js_model() + return self.pysdk_model.deployment_config def display_benchmark_metrics(self): """Display Markdown Benchmark Metrics for deployment configs.""" + if self.pysdk_model is None: + self.pysdk_model = self._create_pre_trained_js_model() + self.pysdk_model.display_benchmark_metrics() def list_deployment_configs(self) -> List[Dict[str, Any]]: @@ -459,6 +468,9 @@ def list_deployment_configs(self) -> List[Dict[str, Any]]: Returns: List[Dict[str, Any]]: A list of deployment configs. """ + if self.pysdk_model is None: + self.pysdk_model = self._create_pre_trained_js_model() + return self.pysdk_model.list_deployment_configs() def _build_for_jumpstart(self): @@ -467,7 +479,11 @@ def _build_for_jumpstart(self): self.secret_key = None self.jumpstart = True - pysdk_model = self._create_pre_trained_js_model() + pysdk_model = ( + self.pysdk_model + if self.pysdk_model is not None + else self._create_pre_trained_js_model() + ) image_uri = pysdk_model.image_uri From dd84775c5619a94e3fdee4e2a056926cc048cb90 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 24 Apr 2024 18:34:31 -0700 Subject: [PATCH 04/12] Add Unit tests --- src/sagemaker/jumpstart/model.py | 2 +- src/sagemaker/jumpstart/types.py | 20 ++++--- src/sagemaker/jumpstart/utils.py | 26 +++++++-- .../serve/builder/jumpstart_builder.py | 28 +++------- tests/unit/sagemaker/jumpstart/constants.py | 56 +++++++++++-------- .../sagemaker/jumpstart/model/test_model.py | 7 ++- tests/unit/sagemaker/jumpstart/test_utils.py | 9 ++- .../serve/builder/test_js_builder.py | 22 +------- 8 files changed, 91 insertions(+), 79 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index cd57ecac06..37728cad1b 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -886,7 +886,7 @@ def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dic return None for deployment_config in self._deployment_configs: - if deployment_config.get("ConfigName") == config_name: + if deployment_config.get("DeploymentConfigName") == config_name: return deployment_config return None diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index 07bd769054..3c643b79a2 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -2249,17 +2249,17 @@ def to_json(self) -> Dict[str, Any]: return json_obj -class DeploymentConfig(BaseDeploymentConfigDataHolder): +class DeploymentArgs(BaseDeploymentConfigDataHolder): """Dataclass representing a Deployment Config.""" __slots__ = [ - "model_data_download_timeout", - "container_startup_health_check_timeout", "image_uri", "model_data", - "instance_type", "environment", + "instance_type", "compute_resource_requirements", + "model_data_download_timeout", + "container_startup_health_check_timeout", ] def __init__( @@ -2286,9 +2286,11 @@ class DeploymentConfigMetadata(BaseDeploymentConfigDataHolder): """Dataclass representing a Deployment Config Metadata""" __slots__ = [ - "config_name", + "version", + "deployment_config_name", + "deployment_args", + "acceleration_configs", "benchmark_metrics", - "deployment_config", ] def __init__( @@ -2299,6 +2301,8 @@ def __init__( deploy_kwargs: JumpStartModelDeployKwargs, ): """Instantiates DeploymentConfigMetadata object.""" - self.config_name = config_name + self.version = "1.0.0" + self.deployment_config_name = config_name + self.deployment_args = DeploymentArgs(init_kwargs, deploy_kwargs) + self.acceleration_configs = None self.benchmark_metrics = benchmark_metrics - self.deployment_config = DeploymentConfig(init_kwargs, deploy_kwargs) diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 905f2a18d5..4981678821 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1011,24 +1011,40 @@ def extract_metrics_from_deployment_configs( config_name (str): The name of the deployment config use by the model. """ - data = {"Config Name": [], "Instance Type": [], "Selected": []} + data = {"Config Name": [], "Instance Type": [], "Selected": [], "Accelerated": []} for index, deployment_config in enumerate(deployment_configs): - if deployment_config.get("DeploymentConfig") is None: + if deployment_config.get("DeploymentArgs") is None: continue benchmark_metrics = deployment_config.get("BenchmarkMetrics") if benchmark_metrics is not None: - data["Config Name"].append(deployment_config.get("ConfigName")) + data["Config Name"].append(deployment_config.get("DeploymentConfigName")) data["Instance Type"].append( - deployment_config.get("DeploymentConfig").get("InstanceType") + deployment_config.get("DeploymentArgs").get("InstanceType") ) data["Selected"].append( "Yes" - if (config_name is not None and config_name == deployment_config.get("ConfigName")) + if ( + config_name is not None + and config_name == deployment_config.get("DeploymentConfigName") + ) else "No" ) + accelerated_configs = deployment_config.get("AccelerationConfigs") + if accelerated_configs is None: + data["Accelerated"].append("No") + else: + data["Accelerated"].append( + "Yes" + if ( + len(accelerated_configs) > 0 + and accelerated_configs[0].get("Enabled", False) + ) + else "No" + ) + if index == 0: for benchmark_metric in benchmark_metrics: column_name = f"{benchmark_metric.get('name')} ({benchmark_metric.get('unit')})" diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index 55a0601a5c..9e904fc2e7 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -439,9 +439,6 @@ def set_deployment_config(self, config_name: Optional[str]) -> None: The name of the deployment config. Set to None to unset any existing config that is applied to the model. """ - if self.pysdk_model is None: - self.pysdk_model = self._create_pre_trained_js_model() - self.pysdk_model.set_deployment_config(config_name) def get_deployment_config(self) -> Optional[Dict[str, Any]]: @@ -450,17 +447,13 @@ def get_deployment_config(self) -> Optional[Dict[str, Any]]: Returns: Union[Dict[str, Any], None]: Deployment config to apply to this model. """ - if self.pysdk_model is None: - self.pysdk_model = self._create_pre_trained_js_model() - - return self.pysdk_model.deployment_config + return getattr(self, "pysdk_model", self._create_pre_trained_js_model()).deployment_config def display_benchmark_metrics(self): """Display Markdown Benchmark Metrics for deployment configs.""" - if self.pysdk_model is None: - self.pysdk_model = self._create_pre_trained_js_model() - - self.pysdk_model.display_benchmark_metrics() + getattr( + self, "pysdk_model", self._create_pre_trained_js_model() + ).display_benchmark_metrics() def list_deployment_configs(self) -> List[Dict[str, Any]]: """List deployment configs for ``This`` model in the current region. @@ -468,10 +461,9 @@ def list_deployment_configs(self) -> List[Dict[str, Any]]: Returns: List[Dict[str, Any]]: A list of deployment configs. """ - if self.pysdk_model is None: - self.pysdk_model = self._create_pre_trained_js_model() - - return self.pysdk_model.list_deployment_configs() + return getattr( + self, "pysdk_model", self._create_pre_trained_js_model() + ).list_deployment_configs() def _build_for_jumpstart(self): """Placeholder docstring""" @@ -479,11 +471,7 @@ def _build_for_jumpstart(self): self.secret_key = None self.jumpstart = True - pysdk_model = ( - self.pysdk_model - if self.pysdk_model is not None - else self._create_pre_trained_js_model() - ) + pysdk_model = getattr(self, "pysdk_model", self._create_pre_trained_js_model()) image_uri = pysdk_model.image_uri diff --git a/tests/unit/sagemaker/jumpstart/constants.py b/tests/unit/sagemaker/jumpstart/constants.py index b83f85ffde..bc70b8af8d 100644 --- a/tests/unit/sagemaker/jumpstart/constants.py +++ b/tests/unit/sagemaker/jumpstart/constants.py @@ -7911,11 +7911,9 @@ DEPLOYMENT_CONFIGS = [ { - "ConfigName": "neuron-inference", - "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], - "DeploymentConfig": { - "ModelDataDownloadTimeout": None, - "ContainerStartupHealthCheckTimeout": None, + "Version": "1.0.0", + "DeploymentConfigName": "neuron-inference", + "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" ".0-gpu-py310-cu121-ubuntu20.04", "ModelData": { @@ -7926,7 +7924,6 @@ "CompressionType": "None", } }, - "InstanceType": "ml.p2.xlarge", "Environment": { "SAGEMAKER_PROGRAM": "inference.py", "ENDPOINT_SERVER_TIMEOUT": "3600", @@ -7938,15 +7935,18 @@ "MAX_TOTAL_TOKENS": "2048", "SAGEMAKER_MODEL_SERVER_WORKERS": "1", }, + "InstanceType": "ml.p2.xlarge", "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None}, + "ModelDataDownloadTimeout": None, + "ContainerStartupHealthCheckTimeout": None, }, + "AccelerationConfigs": None, + "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, { - "ConfigName": "neuron-inference-budget", - "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], - "DeploymentConfig": { - "ModelDataDownloadTimeout": None, - "ContainerStartupHealthCheckTimeout": None, + "Version": "1.0.0", + "DeploymentConfigName": "neuron-inference-budget", + "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" ".0-gpu-py310-cu121-ubuntu20.04", "ModelData": { @@ -7957,7 +7957,6 @@ "CompressionType": "None", } }, - "InstanceType": "ml.p2.xlarge", "Environment": { "SAGEMAKER_PROGRAM": "inference.py", "ENDPOINT_SERVER_TIMEOUT": "3600", @@ -7969,15 +7968,18 @@ "MAX_TOTAL_TOKENS": "2048", "SAGEMAKER_MODEL_SERVER_WORKERS": "1", }, + "InstanceType": "ml.p2.xlarge", "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None}, + "ModelDataDownloadTimeout": None, + "ContainerStartupHealthCheckTimeout": None, }, + "AccelerationConfigs": None, + "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, { - "ConfigName": "gpu-inference-budget", - "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], - "DeploymentConfig": { - "ModelDataDownloadTimeout": None, - "ContainerStartupHealthCheckTimeout": None, + "Version": "1.0.0", + "DeploymentConfigName": "gpu-inference-budget", + "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" ".0-gpu-py310-cu121-ubuntu20.04", "ModelData": { @@ -7988,7 +7990,6 @@ "CompressionType": "None", } }, - "InstanceType": "ml.p2.xlarge", "Environment": { "SAGEMAKER_PROGRAM": "inference.py", "ENDPOINT_SERVER_TIMEOUT": "3600", @@ -8000,15 +8001,18 @@ "MAX_TOTAL_TOKENS": "2048", "SAGEMAKER_MODEL_SERVER_WORKERS": "1", }, + "InstanceType": "ml.p2.xlarge", "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None}, + "ModelDataDownloadTimeout": None, + "ContainerStartupHealthCheckTimeout": None, }, + "AccelerationConfigs": None, + "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, { - "ConfigName": "gpu-inference", - "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], - "DeploymentConfig": { - "ModelDataDownloadTimeout": None, - "ContainerStartupHealthCheckTimeout": None, + "Version": "1.0.0", + "DeploymentConfigName": "gpu-inference", + "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" ".0-gpu-py310-cu121-ubuntu20.04", "ModelData": { @@ -8019,7 +8023,6 @@ "CompressionType": "None", } }, - "InstanceType": "ml.p2.xlarge", "Environment": { "SAGEMAKER_PROGRAM": "inference.py", "ENDPOINT_SERVER_TIMEOUT": "3600", @@ -8031,8 +8034,13 @@ "MAX_TOTAL_TOKENS": "2048", "SAGEMAKER_MODEL_SERVER_WORKERS": "1", }, + "InstanceType": "ml.p2.xlarge", "ComputeResourceRequirements": {"MinMemoryRequiredInMb": None}, + "ModelDataDownloadTimeout": None, + "ContainerStartupHealthCheckTimeout": None, }, + "AccelerationConfigs": None, + "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, ] diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index 6145932cc8..c63b603564 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -1723,6 +1723,11 @@ def test_model_list_deployment_configs( configs = model.list_deployment_configs() + print("******************************") + for config in configs: + print(config) + print() + self.assertEqual(configs, get_base_deployment_configs()) @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") @@ -1805,7 +1810,7 @@ def test_model_retrieve_deployment_config( model = JumpStartModel(model_id=model_id) expected = get_base_deployment_configs()[0] - model.set_deployment_config(expected.get("ConfigName")) + model.set_deployment_config(expected.get("DeploymentConfigName")) self.assertEqual(model.deployment_config, expected) diff --git a/tests/unit/sagemaker/jumpstart/test_utils.py b/tests/unit/sagemaker/jumpstart/test_utils.py index 85911a2854..367bd82053 100644 --- a/tests/unit/sagemaker/jumpstart/test_utils.py +++ b/tests/unit/sagemaker/jumpstart/test_utils.py @@ -1725,6 +1725,7 @@ def test_get_jumpstart_benchmark_stats_training( ], "Instance Type": ["ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge"], "Selected": ["No", "No", "No", "No"], + "Accelerated": ["Yes", "No", "No", "No"], "Instance Rate (USD/Hrs)": [ "0.0083000000", "0.0083000000", @@ -1744,6 +1745,7 @@ def test_get_jumpstart_benchmark_stats_training( ], "Instance Type": ["ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge"], "Selected": ["Yes", "No", "No", "No"], + "Accelerated": ["Yes", "No", "No", "No"], "Instance Rate (USD/Hrs)": [ "0.0083000000", "0.0083000000", @@ -1755,6 +1757,11 @@ def test_get_jumpstart_benchmark_stats_training( ], ) def test_extract_metrics_from_deployment_configs(config_name, expected): - data = utils.extract_metrics_from_deployment_configs(get_base_deployment_configs(), config_name) + configs = get_base_deployment_configs() + configs[0]["AccelerationConfigs"] = [ + {"Type": "Speculative-Decoding", "Enabled": True, "Spec": {"Version": "0.1"}} + ] + + data = utils.extract_metrics_from_deployment_configs(configs, config_name) assert data == expected diff --git a/tests/unit/sagemaker/serve/builder/test_js_builder.py b/tests/unit/sagemaker/serve/builder/test_js_builder.py index c60726ccab..8fc4faeff4 100644 --- a/tests/unit/sagemaker/serve/builder/test_js_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_js_builder.py @@ -676,9 +676,6 @@ def test_list_deployment_configs( lambda: DEPLOYMENT_CONFIGS ) - builder.build() - builder.serve_settings.telemetry_opt_out = True - configs = builder.list_deployment_configs() self.assertEqual(configs, DEPLOYMENT_CONFIGS) @@ -719,9 +716,6 @@ def test_get_deployment_config( expected = DEPLOYMENT_CONFIGS[0] mock_pre_trained_model.return_value.deployment_config = expected - builder.build() - builder.serve_settings.telemetry_opt_out = True - self.assertEqual(builder.get_deployment_config(), expected) @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) @@ -771,10 +765,7 @@ def test_set_deployment_config( "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", return_value=True, ) - @patch( - "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", - return_value=MagicMock(), - ) + @patch("sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model") @patch( "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", return_value=({"model_type": "t5", "n_head": 71}, True), @@ -796,13 +787,6 @@ def test_display_benchmark_metrics( model="facebook/galactica-mock-model-id", schema_builder=mock_schema_builder, ) - - mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri - mock_pre_trained_model.return_value.display_benchmark_metrics.side_effect = ( - lambda *args, **kwargs: "metric data" - ) - - builder.build() - builder.serve_settings.telemetry_opt_out = True - builder.display_benchmark_metrics() + + mock_pre_trained_model.return_value.display_benchmark_metrics.assert_called_once() From faba3c825f6994f04762031c0f12361379fe7627 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 24 Apr 2024 18:43:51 -0700 Subject: [PATCH 05/12] Refactoring --- tests/unit/sagemaker/jumpstart/model/test_model.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index c63b603564..6e47fd2791 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -1723,11 +1723,6 @@ def test_model_list_deployment_configs( configs = model.list_deployment_configs() - print("******************************") - for config in configs: - print(config) - print() - self.assertEqual(configs, get_base_deployment_configs()) @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") From dc2f5b7189f083201f60d6fe8e4b908c1dfebd9b Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 24 Apr 2024 21:23:09 -0700 Subject: [PATCH 06/12] Testing with Notebook --- src/sagemaker/jumpstart/model.py | 10 ++-- src/sagemaker/jumpstart/types.py | 2 - .../serve/builder/jumpstart_builder.py | 47 +++++++++--------- tests/unit/sagemaker/jumpstart/constants.py | 4 -- .../serve/builder/test_js_builder.py | 49 +++++-------------- 5 files changed, 43 insertions(+), 69 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 37728cad1b..77a1a101b5 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -443,10 +443,10 @@ def set_deployment_config(self, config_name: Optional[str]) -> None: @property def deployment_config(self) -> Optional[Dict[str, Any]]: - """The deployment config to apply to the model. + """The deployment config that will be applied to the model. Returns: - Union[Dict[str, Any], None]: Deployment config to apply to this model. + Union[Dict[str, Any], None]: Deployment config that will be applied to the model. """ return self._retrieve_selected_deployment_config(self.config_name) @@ -861,7 +861,7 @@ def register_deploy_wrapper(*args, **kwargs): @lru_cache def _get_benchmarks_data(self, config_name: str) -> Dict[str, List[str]]: - """Constructs deployment configs benchmark data. + """Deployment configs benchmark metrics. Args: config_name (str): The name of the selected deployment config. @@ -878,9 +878,9 @@ def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dic """Retrieve the deployment config to apply to the model. Args: - config_name (str): The name of the selected deployment config. + config_name (str): The name of the deployment config to retrieve. Returns: - Union[Dict[str, Any], None]: The deployment config to apply to the model. + Optional[Dict[str, Any]]: The retrieved deployment config. """ if config_name is None: return None diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index 3c643b79a2..0f67fa8c58 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -2286,7 +2286,6 @@ class DeploymentConfigMetadata(BaseDeploymentConfigDataHolder): """Dataclass representing a Deployment Config Metadata""" __slots__ = [ - "version", "deployment_config_name", "deployment_args", "acceleration_configs", @@ -2301,7 +2300,6 @@ def __init__( deploy_kwargs: JumpStartModelDeployKwargs, ): """Instantiates DeploymentConfigMetadata object.""" - self.version = "1.0.0" self.deployment_config_name = config_name self.deployment_args = DeploymentArgs(init_kwargs, deploy_kwargs) self.acceleration_configs = None diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index 9e904fc2e7..9241f9bc4f 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -439,6 +439,9 @@ def set_deployment_config(self, config_name: Optional[str]) -> None: The name of the deployment config. Set to None to unset any existing config that is applied to the model. """ + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + raise Exception("Cannot set deployment config to an uninitialized model") + self.pysdk_model.set_deployment_config(config_name) def get_deployment_config(self) -> Optional[Dict[str, Any]]: @@ -447,13 +450,17 @@ def get_deployment_config(self) -> Optional[Dict[str, Any]]: Returns: Union[Dict[str, Any], None]: Deployment config to apply to this model. """ - return getattr(self, "pysdk_model", self._create_pre_trained_js_model()).deployment_config + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + self._build_for_jumpstart() + + return self.pysdk_model.deployment_config def display_benchmark_metrics(self): """Display Markdown Benchmark Metrics for deployment configs.""" - getattr( - self, "pysdk_model", self._create_pre_trained_js_model() - ).display_benchmark_metrics() + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + self._build_for_jumpstart() + + self.pysdk_model.display_benchmark_metrics() def list_deployment_configs(self) -> List[Dict[str, Any]]: """List deployment configs for ``This`` model in the current region. @@ -461,9 +468,10 @@ def list_deployment_configs(self) -> List[Dict[str, Any]]: Returns: List[Dict[str, Any]]: A list of deployment configs. """ - return getattr( - self, "pysdk_model", self._create_pre_trained_js_model() - ).list_deployment_configs() + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + self._build_for_jumpstart() + + return self.pysdk_model.list_deployment_configs() def _build_for_jumpstart(self): """Placeholder docstring""" @@ -471,32 +479,29 @@ def _build_for_jumpstart(self): self.secret_key = None self.jumpstart = True - pysdk_model = getattr(self, "pysdk_model", self._create_pre_trained_js_model()) - - image_uri = pysdk_model.image_uri + if not hasattr(self, "pysdk_model") or self.pysdk_model is None: + self.pysdk_model = self._create_pre_trained_js_model() - logger.info("JumpStart ID %s is packaged with Image URI: %s", self.model, image_uri) + logger.info( + "JumpStart ID %s is packaged with Image URI: %s", self.model, self.pysdk_model.image_uri + ) - if self._is_gated_model(pysdk_model) and self.mode != Mode.SAGEMAKER_ENDPOINT: + if self._is_gated_model() and self.mode != Mode.SAGEMAKER_ENDPOINT: raise ValueError( "JumpStart Gated Models are only supported in SAGEMAKER_ENDPOINT mode." ) - if "djl-inference" in image_uri: + if "djl-inference" in self.pysdk_model.image_uri: logger.info("Building for DJL JumpStart Model ID...") self.model_server = ModelServer.DJL_SERVING - - self.pysdk_model = pysdk_model self.image_uri = self.pysdk_model.image_uri self._build_for_djl_jumpstart() self.pysdk_model.tune = self.tune_for_djl_jumpstart - elif "tgi-inference" in image_uri: + elif "tgi-inference" in self.pysdk_model.image_uri: logger.info("Building for TGI JumpStart Model ID...") self.model_server = ModelServer.TGI - - self.pysdk_model = pysdk_model self.image_uri = self.pysdk_model.image_uri self._build_for_tgi_jumpstart() @@ -509,15 +514,13 @@ def _build_for_jumpstart(self): return self.pysdk_model - def _is_gated_model(self, model) -> bool: + def _is_gated_model(self) -> bool: """Determine if ``this`` Model is Gated - Args: - model (Model): Jumpstart Model Returns: bool: ``True`` if ``this`` Model is Gated """ - s3_uri = model.model_data + s3_uri = self.pysdk_model.model_data if isinstance(s3_uri, dict): s3_uri = s3_uri.get("S3DataSource").get("S3Uri") diff --git a/tests/unit/sagemaker/jumpstart/constants.py b/tests/unit/sagemaker/jumpstart/constants.py index bc70b8af8d..90f037daea 100644 --- a/tests/unit/sagemaker/jumpstart/constants.py +++ b/tests/unit/sagemaker/jumpstart/constants.py @@ -7911,7 +7911,6 @@ DEPLOYMENT_CONFIGS = [ { - "Version": "1.0.0", "DeploymentConfigName": "neuron-inference", "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" @@ -7944,7 +7943,6 @@ "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, { - "Version": "1.0.0", "DeploymentConfigName": "neuron-inference-budget", "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" @@ -7977,7 +7975,6 @@ "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, { - "Version": "1.0.0", "DeploymentConfigName": "gpu-inference-budget", "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" @@ -8010,7 +8007,6 @@ "BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}], }, { - "Version": "1.0.0", "DeploymentConfigName": "gpu-inference", "DeploymentArgs": { "ImageUri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4" diff --git a/tests/unit/sagemaker/serve/builder/test_js_builder.py b/tests/unit/sagemaker/serve/builder/test_js_builder.py index 8fc4faeff4..01d9915fb0 100644 --- a/tests/unit/sagemaker/serve/builder/test_js_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_js_builder.py @@ -735,7 +735,7 @@ def test_get_deployment_config( @patch( "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" ) - def test_set_deployment_config( + def test_display_benchmark_metrics( self, mock_get_nb_instance, mock_get_ram_usage_mb, @@ -750,43 +750,20 @@ def test_set_deployment_config( ) mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri - - builder.build() - builder.serve_settings.telemetry_opt_out = True - - builder.set_deployment_config("config_name") - - mock_pre_trained_model.return_value.set_deployment_config.assert_called_once_with( - "config_name" + mock_pre_trained_model.return_value.list_deployment_configs.side_effect = ( + lambda: DEPLOYMENT_CONFIGS ) - @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) - @patch( - "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", - return_value=True, - ) - @patch("sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model") - @patch( - "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", - return_value=({"model_type": "t5", "n_head": 71}, True), - ) - @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) - @patch( - "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" - ) - def test_display_benchmark_metrics( - self, - mock_get_nb_instance, - mock_get_ram_usage_mb, - mock_prepare_for_tgi, - mock_pre_trained_model, - mock_is_jumpstart_model, - mock_telemetry, - ): - builder = ModelBuilder( - model="facebook/galactica-mock-model-id", - schema_builder=mock_schema_builder, - ) + builder.list_deployment_configs() + builder.display_benchmark_metrics() mock_pre_trained_model.return_value.display_benchmark_metrics.assert_called_once() + + def test_display_benchmark_metrics_ex(self): + self.assertRaises( + Exception, + lambda: ModelBuilder( + model="facebook/galactica-mock-model-id", schema_builder=mock_schema_builder + ).display_benchmark_metrics(), + ) From b4e983a4d1bb0d396fe77ca42f369e0bf14fbebb Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 24 Apr 2024 22:23:20 -0700 Subject: [PATCH 07/12] Test backward compatibility --- .../serve/builder/jumpstart_builder.py | 2 +- .../serve/builder/test_js_builder.py | 82 +++++++++++++++++-- 2 files changed, 75 insertions(+), 9 deletions(-) diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index 9241f9bc4f..a7382e9971 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -440,7 +440,7 @@ def set_deployment_config(self, config_name: Optional[str]) -> None: any existing config that is applied to the model. """ if not hasattr(self, "pysdk_model") or self.pysdk_model is None: - raise Exception("Cannot set deployment config to an uninitialized model") + raise Exception("Cannot set deployment config to an uninitialized model.") self.pysdk_model.set_deployment_config(config_name) diff --git a/tests/unit/sagemaker/serve/builder/test_js_builder.py b/tests/unit/sagemaker/serve/builder/test_js_builder.py index 01d9915fb0..8966668c7d 100644 --- a/tests/unit/sagemaker/serve/builder/test_js_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_js_builder.py @@ -718,6 +718,80 @@ def test_get_deployment_config( self.assertEqual(builder.get_deployment_config(), expected) + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", + return_value=True, + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", + return_value=MagicMock(), + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", + return_value=({"model_type": "t5", "n_head": 71}, True), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) + @patch( + "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" + ) + def test_set_deployment_config( + self, + mock_get_nb_instance, + mock_get_ram_usage_mb, + mock_prepare_for_tgi, + mock_pre_trained_model, + mock_is_jumpstart_model, + mock_telemetry, + ): + builder = ModelBuilder( + model="facebook/galactica-mock-model-id", + schema_builder=mock_schema_builder, + ) + + mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri + + builder.build() + builder.set_deployment_config("config-1") + + mock_pre_trained_model.return_value.set_deployment_config.assert_called_with("config-1") + + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", + return_value=True, + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", + return_value=MagicMock(), + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", + return_value=({"model_type": "t5", "n_head": 71}, True), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) + @patch( + "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" + ) + def test_set_deployment_config_ex( + self, + mock_get_nb_instance, + mock_get_ram_usage_mb, + mock_prepare_for_tgi, + mock_pre_trained_model, + mock_is_jumpstart_model, + mock_telemetry, + ): + mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri + + self.assertRaisesRegex( + Exception, + "Cannot set deployment config to an uninitialized model.", + lambda: ModelBuilder( + model="facebook/galactica-mock-model-id", schema_builder=mock_schema_builder + ).set_deployment_config("config-2"), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) @patch( "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", @@ -759,11 +833,3 @@ def test_display_benchmark_metrics( builder.display_benchmark_metrics() mock_pre_trained_model.return_value.display_benchmark_metrics.assert_called_once() - - def test_display_benchmark_metrics_ex(self): - self.assertRaises( - Exception, - lambda: ModelBuilder( - model="facebook/galactica-mock-model-id", schema_builder=mock_schema_builder - ).display_benchmark_metrics(), - ) From 6f30cbf6d260def33dc1d4d18e3946b9b9f0cfdf Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Thu, 25 Apr 2024 10:01:49 -0700 Subject: [PATCH 08/12] Remove Accelerated column if all not enabled --- src/sagemaker/jumpstart/utils.py | 2 ++ tests/unit/sagemaker/jumpstart/test_utils.py | 13 +++++-------- tests/unit/sagemaker/jumpstart/utils.py | 8 ++++++++ 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 33f185f2bc..3fce6dd105 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1084,4 +1084,6 @@ def extract_metrics_from_deployment_configs( if column_name in data.keys(): data[column_name].append(benchmark_metric.get("value")) + if "Yes" not in data["Accelerated"]: + del data["Accelerated"] return data diff --git a/tests/unit/sagemaker/jumpstart/test_utils.py b/tests/unit/sagemaker/jumpstart/test_utils.py index aed5555c73..210bd8e074 100644 --- a/tests/unit/sagemaker/jumpstart/test_utils.py +++ b/tests/unit/sagemaker/jumpstart/test_utils.py @@ -50,6 +50,7 @@ get_special_model_spec, get_prototype_manifest, get_base_deployment_configs, + get_base_deployment_configs_with_acceleration_configs, ) from mock import MagicMock @@ -1763,10 +1764,11 @@ def test_get_jumpstart_benchmark_stats_training( @pytest.mark.parametrize( - "config_name, expected", + "config_name, configs, expected", [ ( None, + get_base_deployment_configs(), { "Config Name": [ "neuron-inference", @@ -1776,7 +1778,6 @@ def test_get_jumpstart_benchmark_stats_training( ], "Instance Type": ["ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge"], "Selected": ["No", "No", "No", "No"], - "Accelerated": ["Yes", "No", "No", "No"], "Instance Rate (USD/Hrs)": [ "0.0083000000", "0.0083000000", @@ -1787,6 +1788,7 @@ def test_get_jumpstart_benchmark_stats_training( ), ( "neuron-inference", + get_base_deployment_configs_with_acceleration_configs(), { "Config Name": [ "neuron-inference", @@ -1807,12 +1809,7 @@ def test_get_jumpstart_benchmark_stats_training( ), ], ) -def test_extract_metrics_from_deployment_configs(config_name, expected): - configs = get_base_deployment_configs() - configs[0]["AccelerationConfigs"] = [ - {"Type": "Speculative-Decoding", "Enabled": True, "Spec": {"Version": "0.1"}} - ] - +def test_extract_metrics_from_deployment_configs(config_name, configs, expected): data = utils.extract_metrics_from_deployment_configs(configs, config_name) assert data == expected diff --git a/tests/unit/sagemaker/jumpstart/utils.py b/tests/unit/sagemaker/jumpstart/utils.py index e0d6f645a8..96662837b4 100644 --- a/tests/unit/sagemaker/jumpstart/utils.py +++ b/tests/unit/sagemaker/jumpstart/utils.py @@ -307,6 +307,14 @@ def get_base_deployment_configs() -> List[Dict[str, Any]]: return DEPLOYMENT_CONFIGS +def get_base_deployment_configs_with_acceleration_configs() -> List[Dict[str, Any]]: + configs = copy.deepcopy(DEPLOYMENT_CONFIGS) + configs[0]["AccelerationConfigs"] = [ + {"Type": "Speculative-Decoding", "Enabled": True, "Spec": {"Version": "0.1"}} + ] + return configs + + def get_mock_init_kwargs(model_id) -> JumpStartModelInitKwargs: return JumpStartModelInitKwargs( model_id=model_id, From 5724ce1b7d98cd8a32372af852c7a8a77dda46ea Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Thu, 25 Apr 2024 10:13:59 -0700 Subject: [PATCH 09/12] Fix docstring --- src/sagemaker/jumpstart/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 77a1a101b5..f939bc303b 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -446,7 +446,7 @@ def deployment_config(self) -> Optional[Dict[str, Any]]: """The deployment config that will be applied to the model. Returns: - Union[Dict[str, Any], None]: Deployment config that will be applied to the model. + Optional[Dict[str, Any]]: Deployment config that will be applied to the model. """ return self._retrieve_selected_deployment_config(self.config_name) From 4a61ee6c3b5c18962d184f58b34780bf93b3f946 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Thu, 25 Apr 2024 10:28:00 -0700 Subject: [PATCH 10/12] Resolved PR Review comments --- src/sagemaker/serve/builder/jumpstart_builder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index a7382e9971..dd0b9aeecf 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -451,14 +451,14 @@ def get_deployment_config(self) -> Optional[Dict[str, Any]]: Union[Dict[str, Any], None]: Deployment config to apply to this model. """ if not hasattr(self, "pysdk_model") or self.pysdk_model is None: - self._build_for_jumpstart() + self.pysdk_model = self._create_pre_trained_js_model() return self.pysdk_model.deployment_config def display_benchmark_metrics(self): """Display Markdown Benchmark Metrics for deployment configs.""" if not hasattr(self, "pysdk_model") or self.pysdk_model is None: - self._build_for_jumpstart() + self.pysdk_model = self._create_pre_trained_js_model() self.pysdk_model.display_benchmark_metrics() @@ -469,7 +469,7 @@ def list_deployment_configs(self) -> List[Dict[str, Any]]: List[Dict[str, Any]]: A list of deployment configs. """ if not hasattr(self, "pysdk_model") or self.pysdk_model is None: - self._build_for_jumpstart() + self.pysdk_model = self._create_pre_trained_js_model() return self.pysdk_model.list_deployment_configs() From d88ef4732f3dcf6022b19547c9ee1deb9035a0c2 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Thu, 25 Apr 2024 10:35:15 -0700 Subject: [PATCH 11/12] Docstring --- src/sagemaker/serve/builder/jumpstart_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index dd0b9aeecf..d3c2581885 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -448,7 +448,7 @@ def get_deployment_config(self) -> Optional[Dict[str, Any]]: """Gets the deployment config to apply to the model. Returns: - Union[Dict[str, Any], None]: Deployment config to apply to this model. + Optional[Dict[str, Any]]: Deployment config to apply to this model. """ if not hasattr(self, "pysdk_model") or self.pysdk_model is None: self.pysdk_model = self._create_pre_trained_js_model() From aaba1cbadf4c2158e6e4550c27bdae444b8ab571 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Thu, 25 Apr 2024 11:25:19 -0700 Subject: [PATCH 12/12] increase code coverage --- .../serve/builder/test_js_builder.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/unit/sagemaker/serve/builder/test_js_builder.py b/tests/unit/sagemaker/serve/builder/test_js_builder.py index 8966668c7d..b83b113209 100644 --- a/tests/unit/sagemaker/serve/builder/test_js_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_js_builder.py @@ -833,3 +833,37 @@ def test_display_benchmark_metrics( builder.display_benchmark_metrics() mock_pre_trained_model.return_value.display_benchmark_metrics.assert_called_once() + + @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id", + return_value=True, + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model", + return_value=MagicMock(), + ) + @patch( + "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources", + return_value=({"model_type": "t5", "n_head": 71}, True), + ) + @patch("sagemaker.serve.builder.jumpstart_builder._get_ram_usage_mb", return_value=1024) + @patch( + "sagemaker.serve.builder.jumpstart_builder._get_nb_instance", return_value="ml.g5.24xlarge" + ) + def test_display_benchmark_metrics_initial( + self, + mock_get_nb_instance, + mock_get_ram_usage_mb, + mock_prepare_for_tgi, + mock_pre_trained_model, + mock_is_jumpstart_model, + mock_telemetry, + ): + builder = ModelBuilder( + model="facebook/galactica-mock-model-id", + schema_builder=mock_schema_builder, + ) + builder.display_benchmark_metrics() + + mock_pre_trained_model.return_value.display_benchmark_metrics.assert_called_once()