From 2e002381d9d1c9738f309acd31744178b77e3171 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Wed, 15 May 2024 15:29:01 -0700 Subject: [PATCH 01/30] Update: Pull latest tei container for sentence similiarity models --- .../serve/builder/transformers_builder.py | 24 ++++++++- .../builder/test_transformers_builder.py | 51 ++++++++++++++++++- 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/serve/builder/transformers_builder.py b/src/sagemaker/serve/builder/transformers_builder.py index ead9b7425f..d760ef594a 100644 --- a/src/sagemaker/serve/builder/transformers_builder.py +++ b/src/sagemaker/serve/builder/transformers_builder.py @@ -23,7 +23,7 @@ _get_nb_instance, ) from sagemaker.djl_inference.model import _get_model_config_properties_from_hf -from sagemaker.huggingface import HuggingFaceModel +from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri from sagemaker.serve.model_server.multi_model_server.prepare import ( _create_dir_structure, ) @@ -47,6 +47,7 @@ class Transformers(ABC): """Transformers build logic with ModelBuilder()""" def __init__(self): + self.model_metadata = None self.model = None self.serve_settings = None self.sagemaker_session = None @@ -99,7 +100,26 @@ def _create_transformers_model(self) -> Type[Model]: if hf_model_md is None: raise ValueError("Could not fetch HF metadata") - if "pytorch" in hf_model_md.get("tags"): + model_task = None + if self.model_metadata: + model_task = self.model_metadata.get("HF_TASK") + else: + model_task = hf_model_md.get("pipeline_tag") + + if model_task == "sentence-similarity" and not self.image_uri: + self.image_uri = \ + get_huggingface_llm_image_uri("huggingface-tei", session=self.sagemaker_session) + + logger.info("Auto detected %s. Proceeding with the the deployment.", self.image_uri) + + pysdk_model = HuggingFaceModel( + env=self.env_vars, + role=self.role_arn, + sagemaker_session=self.sagemaker_session, + image_uri=self.image_uri, + vpc_config=self.vpc_config, + ) + elif "pytorch" in hf_model_md.get("tags"): self.pytorch_version = self._get_supported_version( hf_config, base_hf_version, "pytorch" ) diff --git a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py index b7e3db79d6..2e1552dcd7 100644 --- a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py @@ -110,7 +110,7 @@ def test_build_deploy_for_transformers_local_container_and_remote_container( return_value="ml.g5.24xlarge", ) @patch("sagemaker.serve.builder.transformers_builder._capture_telemetry", side_effect=None) - def test_image_uri( + def test_image_uri_override( self, mock_get_nb_instance, mock_telemetry, @@ -144,3 +144,52 @@ def test_image_uri( with self.assertRaises(ValueError) as _: model.deploy(mode=Mode.IN_PROCESS) + + @patch( + "sagemaker.serve.builder.transformers_builder._get_nb_instance", + return_value="ml.g5.24xlarge", + ) + @patch( + "sagemaker.huggingface.llm_utils.get_huggingface_model_metadata", + return_value="sentence-similarity", + ) + @patch( + "from sagemaker.huggingface.get_huggingface_llm_image_uri", + return_value=MOCK_IMAGE_CONFIG + ) + @patch("sagemaker.serve.builder.transformers_builder._capture_telemetry", side_effect=None) + def test_sentence_similarity_support( + self, + mock_get_nb_instance, + mock_task, + mock_image, + mock_telemetry, + ): + builder = ModelBuilder( + model=mock_model_id, + schema_builder=mock_schema_builder, + mode=Mode.LOCAL_CONTAINER, + ) + + builder._prepare_for_mode = MagicMock() + builder._prepare_for_mode.side_effect = None + + model = builder.build() + builder.serve_settings.telemetry_opt_out = True + + builder.modes[str(Mode.LOCAL_CONTAINER)] = MagicMock() + predictor = model.deploy(model_data_download_timeout=1800) + + assert builder.image_uri == MOCK_IMAGE_CONFIG + assert builder.env_vars["MODEL_LOADING_TIMEOUT"] == "1800" + assert isinstance(predictor, TransformersLocalModePredictor) + + assert builder.nb_instance_type == "ml.g5.24xlarge" + + builder._original_deploy = MagicMock() + builder._prepare_for_mode.return_value = (None, {}) + predictor = model.deploy(mode=Mode.SAGEMAKER_ENDPOINT, role="mock_role_arn") + assert "HF_MODEL_ID" in model.env + + with self.assertRaises(ValueError) as _: + model.deploy(mode=Mode.IN_PROCESS) From 43ce1ba15ff6164e18c17127815694f0a71b78c4 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Wed, 15 May 2024 16:06:42 -0700 Subject: [PATCH 02/30] Fix formatting --- src/sagemaker/serve/builder/transformers_builder.py | 5 +++-- .../serve/builder/test_transformers_builder.py | 13 ++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sagemaker/serve/builder/transformers_builder.py b/src/sagemaker/serve/builder/transformers_builder.py index d760ef594a..602a0d3977 100644 --- a/src/sagemaker/serve/builder/transformers_builder.py +++ b/src/sagemaker/serve/builder/transformers_builder.py @@ -107,8 +107,9 @@ def _create_transformers_model(self) -> Type[Model]: model_task = hf_model_md.get("pipeline_tag") if model_task == "sentence-similarity" and not self.image_uri: - self.image_uri = \ - get_huggingface_llm_image_uri("huggingface-tei", session=self.sagemaker_session) + self.image_uri = get_huggingface_llm_image_uri( + "huggingface-tei", session=self.sagemaker_session + ) logger.info("Auto detected %s. Proceeding with the the deployment.", self.image_uri) diff --git a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py index 2e1552dcd7..2f6321408f 100644 --- a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py @@ -154,16 +154,15 @@ def test_image_uri_override( return_value="sentence-similarity", ) @patch( - "from sagemaker.huggingface.get_huggingface_llm_image_uri", - return_value=MOCK_IMAGE_CONFIG + "sagemaker.huggingface.get_huggingface_llm_image_uri", return_value=MOCK_IMAGE_CONFIG ) @patch("sagemaker.serve.builder.transformers_builder._capture_telemetry", side_effect=None) def test_sentence_similarity_support( - self, - mock_get_nb_instance, - mock_task, - mock_image, - mock_telemetry, + self, + mock_get_nb_instance, + mock_task, + mock_image, + mock_telemetry, ): builder = ModelBuilder( model=mock_model_id, From 6211227eb925daebc25548ed4086700561948a78 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Wed, 15 May 2024 16:49:24 -0700 Subject: [PATCH 03/30] Address PR comments --- src/sagemaker/serve/builder/transformers_builder.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/serve/builder/transformers_builder.py b/src/sagemaker/serve/builder/transformers_builder.py index 602a0d3977..d4d10bb7a4 100644 --- a/src/sagemaker/serve/builder/transformers_builder.py +++ b/src/sagemaker/serve/builder/transformers_builder.py @@ -100,11 +100,9 @@ def _create_transformers_model(self) -> Type[Model]: if hf_model_md is None: raise ValueError("Could not fetch HF metadata") - model_task = None - if self.model_metadata: + model_task = hf_model_md.get("pipeline_tag") + if self.model_metadata.get("HF_TASK") is not None: model_task = self.model_metadata.get("HF_TASK") - else: - model_task = hf_model_md.get("pipeline_tag") if model_task == "sentence-similarity" and not self.image_uri: self.image_uri = get_huggingface_llm_image_uri( From 044143633c4cbc893906f959186977a286e4130d Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Wed, 15 May 2024 23:54:47 +0000 Subject: [PATCH 04/30] Fix formatting --- .../unit/sagemaker/serve/builder/test_transformers_builder.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py index 2f6321408f..8cd48375d7 100644 --- a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py @@ -153,9 +153,7 @@ def test_image_uri_override( "sagemaker.huggingface.llm_utils.get_huggingface_model_metadata", return_value="sentence-similarity", ) - @patch( - "sagemaker.huggingface.get_huggingface_llm_image_uri", return_value=MOCK_IMAGE_CONFIG - ) + @patch("sagemaker.huggingface.get_huggingface_llm_image_uri", return_value=MOCK_IMAGE_CONFIG) @patch("sagemaker.serve.builder.transformers_builder._capture_telemetry", side_effect=None) def test_sentence_similarity_support( self, From 4973f8fb69b4d60d373062cb93d9917ec296158f Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Wed, 15 May 2024 17:25:24 -0700 Subject: [PATCH 05/30] Fix check --- src/sagemaker/serve/builder/transformers_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/serve/builder/transformers_builder.py b/src/sagemaker/serve/builder/transformers_builder.py index d4d10bb7a4..6cc0001f6d 100644 --- a/src/sagemaker/serve/builder/transformers_builder.py +++ b/src/sagemaker/serve/builder/transformers_builder.py @@ -101,7 +101,7 @@ def _create_transformers_model(self) -> Type[Model]: raise ValueError("Could not fetch HF metadata") model_task = hf_model_md.get("pipeline_tag") - if self.model_metadata.get("HF_TASK") is not None: + if self.model_metadata is not None and self.model_metadata.get("HF_TASK") is not None: model_task = self.model_metadata.get("HF_TASK") if model_task == "sentence-similarity" and not self.image_uri: From f8cd86480cb8e8aa7bffe4b428892a3b0ffc8e14 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 08:15:36 -0700 Subject: [PATCH 06/30] Switch sentence similarity to be deployed on tgi --- src/sagemaker/serve/builder/model_builder.py | 3 +- .../serve/builder/transformers_builder.py | 23 +--------- .../serve/builder/test_model_builder.py | 38 +++++++++++++++ .../builder/test_transformers_builder.py | 46 ------------------- 4 files changed, 42 insertions(+), 68 deletions(-) diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index 42a2b994a8..1ffa175f75 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -764,7 +764,8 @@ def build( # pylint: disable=R0911 model_task = hf_model_md.get("pipeline_tag") if self.schema_builder is None and model_task is not None: self._hf_schema_builder_init(model_task) - if model_task == "text-generation": # pylint: disable=R1705 + if model_task == "text-generation" or\ + model_task == "sentence-similarity": # pylint: disable=R1705 return self._build_for_tgi() elif self._can_fit_on_single_gpu(): return self._build_for_transformers() diff --git a/src/sagemaker/serve/builder/transformers_builder.py b/src/sagemaker/serve/builder/transformers_builder.py index 6cc0001f6d..ead9b7425f 100644 --- a/src/sagemaker/serve/builder/transformers_builder.py +++ b/src/sagemaker/serve/builder/transformers_builder.py @@ -23,7 +23,7 @@ _get_nb_instance, ) from sagemaker.djl_inference.model import _get_model_config_properties_from_hf -from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri +from sagemaker.huggingface import HuggingFaceModel from sagemaker.serve.model_server.multi_model_server.prepare import ( _create_dir_structure, ) @@ -47,7 +47,6 @@ class Transformers(ABC): """Transformers build logic with ModelBuilder()""" def __init__(self): - self.model_metadata = None self.model = None self.serve_settings = None self.sagemaker_session = None @@ -100,25 +99,7 @@ def _create_transformers_model(self) -> Type[Model]: if hf_model_md is None: raise ValueError("Could not fetch HF metadata") - model_task = hf_model_md.get("pipeline_tag") - if self.model_metadata is not None and self.model_metadata.get("HF_TASK") is not None: - model_task = self.model_metadata.get("HF_TASK") - - if model_task == "sentence-similarity" and not self.image_uri: - self.image_uri = get_huggingface_llm_image_uri( - "huggingface-tei", session=self.sagemaker_session - ) - - logger.info("Auto detected %s. Proceeding with the the deployment.", self.image_uri) - - pysdk_model = HuggingFaceModel( - env=self.env_vars, - role=self.role_arn, - sagemaker_session=self.sagemaker_session, - image_uri=self.image_uri, - vpc_config=self.vpc_config, - ) - elif "pytorch" in hf_model_md.get("tags"): + if "pytorch" in hf_model_md.get("tags"): self.pytorch_version = self._get_supported_version( hf_config, base_hf_version, "pytorch" ) diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py index 3ffbdd7c03..2e0d74acde 100644 --- a/tests/unit/sagemaker/serve/builder/test_model_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_model_builder.py @@ -1476,6 +1476,44 @@ def test_text_generation( mock_build_for_tgi.assert_called_once() + @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_tgi") + @patch("sagemaker.image_uris.retrieve") + @patch("sagemaker.djl_inference.model.urllib") + @patch("sagemaker.djl_inference.model.json") + @patch("sagemaker.huggingface.llm_utils.urllib") + @patch("sagemaker.huggingface.llm_utils.json") + @patch("sagemaker.model_uris.retrieve") + @patch("sagemaker.serve.builder.model_builder._ServeSettings") + def test_sentence_similarity( + self, + mock_serveSettings, + mock_model_uris_retrieve, + mock_llm_utils_json, + mock_llm_utils_urllib, + mock_model_json, + mock_model_urllib, + mock_image_uris_retrieve, + mock_build_for_tgi, + ): + mock_setting_object = mock_serveSettings.return_value + mock_setting_object.role_arn = mock_role_arn + mock_setting_object.s3_model_data_url = mock_s3_model_data_url + + mock_model_uris_retrieve.side_effect = KeyError + mock_llm_utils_json.load.return_value = {"pipeline_tag": "sentence-similarity"} + mock_llm_utils_urllib.request.Request.side_effect = Mock() + + mock_model_json.load.return_value = {"some": "config"} + mock_model_urllib.request.Request.side_effect = Mock() + mock_build_for_tgi.side_effect = Mock() + + mock_image_uris_retrieve.return_value = "https://some-image-uri" + + model_builder = ModelBuilder(model="bloom-560m") + model_builder.build(sagemaker_session=mock_session) + + mock_build_for_tgi.assert_called_once() + @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers", Mock()) @patch("sagemaker.serve.builder.model_builder.ModelBuilder._try_fetch_gpu_info") @patch("sagemaker.image_uris.retrieve") diff --git a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py index 8cd48375d7..d63eabf2a3 100644 --- a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py @@ -144,49 +144,3 @@ def test_image_uri_override( with self.assertRaises(ValueError) as _: model.deploy(mode=Mode.IN_PROCESS) - - @patch( - "sagemaker.serve.builder.transformers_builder._get_nb_instance", - return_value="ml.g5.24xlarge", - ) - @patch( - "sagemaker.huggingface.llm_utils.get_huggingface_model_metadata", - return_value="sentence-similarity", - ) - @patch("sagemaker.huggingface.get_huggingface_llm_image_uri", return_value=MOCK_IMAGE_CONFIG) - @patch("sagemaker.serve.builder.transformers_builder._capture_telemetry", side_effect=None) - def test_sentence_similarity_support( - self, - mock_get_nb_instance, - mock_task, - mock_image, - mock_telemetry, - ): - builder = ModelBuilder( - model=mock_model_id, - schema_builder=mock_schema_builder, - mode=Mode.LOCAL_CONTAINER, - ) - - builder._prepare_for_mode = MagicMock() - builder._prepare_for_mode.side_effect = None - - model = builder.build() - builder.serve_settings.telemetry_opt_out = True - - builder.modes[str(Mode.LOCAL_CONTAINER)] = MagicMock() - predictor = model.deploy(model_data_download_timeout=1800) - - assert builder.image_uri == MOCK_IMAGE_CONFIG - assert builder.env_vars["MODEL_LOADING_TIMEOUT"] == "1800" - assert isinstance(predictor, TransformersLocalModePredictor) - - assert builder.nb_instance_type == "ml.g5.24xlarge" - - builder._original_deploy = MagicMock() - builder._prepare_for_mode.return_value = (None, {}) - predictor = model.deploy(mode=Mode.SAGEMAKER_ENDPOINT, role="mock_role_arn") - assert "HF_MODEL_ID" in model.env - - with self.assertRaises(ValueError) as _: - model.deploy(mode=Mode.IN_PROCESS) From a5fa0e9a9786a57a0cdffb618a2787f71e808553 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 15:17:03 +0000 Subject: [PATCH 07/30] Fix formatting --- src/sagemaker/serve/builder/model_builder.py | 5 +++-- .../serve/builder/test_model_builder.py | 18 +++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index 1ffa175f75..7cb9b5e071 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -764,8 +764,9 @@ def build( # pylint: disable=R0911 model_task = hf_model_md.get("pipeline_tag") if self.schema_builder is None and model_task is not None: self._hf_schema_builder_init(model_task) - if model_task == "text-generation" or\ - model_task == "sentence-similarity": # pylint: disable=R1705 + if ( + model_task == "text-generation" or model_task == "sentence-similarity" + ): # pylint: disable=R1705 return self._build_for_tgi() elif self._can_fit_on_single_gpu(): return self._build_for_transformers() diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py index 2e0d74acde..dda56c4bd4 100644 --- a/tests/unit/sagemaker/serve/builder/test_model_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_model_builder.py @@ -1485,15 +1485,15 @@ def test_text_generation( @patch("sagemaker.model_uris.retrieve") @patch("sagemaker.serve.builder.model_builder._ServeSettings") def test_sentence_similarity( - self, - mock_serveSettings, - mock_model_uris_retrieve, - mock_llm_utils_json, - mock_llm_utils_urllib, - mock_model_json, - mock_model_urllib, - mock_image_uris_retrieve, - mock_build_for_tgi, + self, + mock_serveSettings, + mock_model_uris_retrieve, + mock_llm_utils_json, + mock_llm_utils_urllib, + mock_model_json, + mock_model_urllib, + mock_image_uris_retrieve, + mock_build_for_tgi, ): mock_setting_object = mock_serveSettings.return_value mock_setting_object.role_arn = mock_role_arn From e524134db8159bc4c251a10f655a5034ae9f79e2 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 08:21:26 -0700 Subject: [PATCH 08/30] Fix formatting --- src/sagemaker/serve/builder/model_builder.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index 7cb9b5e071..e29bbeb4d4 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -764,9 +764,8 @@ def build( # pylint: disable=R0911 model_task = hf_model_md.get("pipeline_tag") if self.schema_builder is None and model_task is not None: self._hf_schema_builder_init(model_task) - if ( - model_task == "text-generation" or model_task == "sentence-similarity" - ): # pylint: disable=R1705 + if model_task in ('text-generation', 'sentence-similarity'): # pylint: + # disable=R1705 return self._build_for_tgi() elif self._can_fit_on_single_gpu(): return self._build_for_transformers() From 4263a44a91c1dac2cd6121cfd97d58329af8990e Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 15:22:20 +0000 Subject: [PATCH 09/30] Fix formatting --- src/sagemaker/serve/builder/model_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index e29bbeb4d4..be3fd633cd 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -764,7 +764,7 @@ def build( # pylint: disable=R0911 model_task = hf_model_md.get("pipeline_tag") if self.schema_builder is None and model_task is not None: self._hf_schema_builder_init(model_task) - if model_task in ('text-generation', 'sentence-similarity'): # pylint: + if model_task in ("text-generation", "sentence-similarity"): # pylint: # disable=R1705 return self._build_for_tgi() elif self._can_fit_on_single_gpu(): From eb3b6d3abf3f55ac34232377d82fabfc8bb3622e Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 08:59:09 -0700 Subject: [PATCH 10/30] Fix formatting --- src/sagemaker/serve/builder/model_builder.py | 7 +++---- tests/unit/sagemaker/serve/builder/test_model_builder.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index be3fd633cd..f32b339487 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -95,7 +95,7 @@ } -# pylint: disable=attribute-defined-outside-init, disable=E1101, disable=R0901 +# pylint: disable=attribute-defined-outside-init, disable=E1101, disable=R0901, disable=R1705 @dataclass class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers, TensorflowServing): """Class that builds a deployable model. @@ -753,7 +753,7 @@ def build( # pylint: disable=R0911 model_task = self.model_metadata.get("HF_TASK") if self._is_jumpstart_model_id(): return self._build_for_jumpstart() - if self._is_djl(): # pylint: disable=R1705 + if self._is_djl(): return self._build_for_djl() else: hf_model_md = get_huggingface_model_metadata( @@ -764,8 +764,7 @@ def build( # pylint: disable=R0911 model_task = hf_model_md.get("pipeline_tag") if self.schema_builder is None and model_task is not None: self._hf_schema_builder_init(model_task) - if model_task in ("text-generation", "sentence-similarity"): # pylint: - # disable=R1705 + if model_task in ("text-generation", "sentence-similarity"): return self._build_for_tgi() elif self._can_fit_on_single_gpu(): return self._build_for_transformers() diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py index dda56c4bd4..ef05857628 100644 --- a/tests/unit/sagemaker/serve/builder/test_model_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_model_builder.py @@ -1509,7 +1509,7 @@ def test_sentence_similarity( mock_image_uris_retrieve.return_value = "https://some-image-uri" - model_builder = ModelBuilder(model="bloom-560m") + model_builder = ModelBuilder(model="bloom-560m", schema_builder=schema_builder) model_builder.build(sagemaker_session=mock_session) mock_build_for_tgi.assert_called_once() From 2b9ba2ae1cf0a844a6eed6b0e3d00af335a59a36 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 10:08:21 -0700 Subject: [PATCH 11/30] Introduce TEI builder with TGI server --- src/sagemaker/serve/builder/model_builder.py | 7 +- src/sagemaker/serve/builder/tei_builder.py | 218 ++++++++++++++++++ .../serve/builder/test_model_builder.py | 8 +- .../serve/builder/test_tei_builder.py | 152 ++++++++++++ 4 files changed, 379 insertions(+), 6 deletions(-) create mode 100644 src/sagemaker/serve/builder/tei_builder.py create mode 100644 tests/unit/sagemaker/serve/builder/test_tei_builder.py diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index f32b339487..b24f30fb3e 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -36,6 +36,7 @@ from sagemaker.serve.detector.pickler import save_pkl, save_xgboost from sagemaker.serve.builder.serve_settings import _ServeSettings from sagemaker.serve.builder.djl_builder import DJL +from sagemaker.serve.builder.tei_builder import TEI from sagemaker.serve.builder.tgi_builder import TGI from sagemaker.serve.builder.jumpstart_builder import JumpStart from sagemaker.serve.builder.transformers_builder import Transformers @@ -97,7 +98,7 @@ # pylint: disable=attribute-defined-outside-init, disable=E1101, disable=R0901, disable=R1705 @dataclass -class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers, TensorflowServing): +class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers, TensorflowServing, TEI): """Class that builds a deployable model. Args: @@ -764,8 +765,10 @@ def build( # pylint: disable=R0911 model_task = hf_model_md.get("pipeline_tag") if self.schema_builder is None and model_task is not None: self._hf_schema_builder_init(model_task) - if model_task in ("text-generation", "sentence-similarity"): + if model_task == "text-generation": return self._build_for_tgi() + if model_task == "sentence-similarity": + return self._build_for_tei() elif self._can_fit_on_single_gpu(): return self._build_for_transformers() elif ( diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py new file mode 100644 index 0000000000..24f93be2fd --- /dev/null +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -0,0 +1,218 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Holds mixin logic to support deployment of Model ID""" +from __future__ import absolute_import +import logging +from typing import Type +from abc import ABC, abstractmethod + +from sagemaker.model import Model +from sagemaker.djl_inference.model import _get_model_config_properties_from_hf +from sagemaker.serve.model_server.djl_serving.utils import ( + _get_default_max_tokens, +) + +from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri +from sagemaker.serve.utils.local_hardware import ( + _get_nb_instance, +) +from sagemaker.serve.model_server.tgi.prepare import _create_dir_structure +from sagemaker.serve.utils.predictors import TgiLocalModePredictor +from sagemaker.serve.utils.types import ModelServer +from sagemaker.serve.mode.function_pointers import Mode +from sagemaker.serve.utils.telemetry_logger import _capture_telemetry +from sagemaker.base_predictor import PredictorBase + +logger = logging.getLogger(__name__) + +_CODE_FOLDER = "code" + + +class TEI(ABC): + """TEI build logic for ModelBuilder()""" + + def __init__(self): + self.model = None + self.serve_settings = None + self.sagemaker_session = None + self.model_path = None + self.dependencies = None + self.modes = None + self.mode = None + self.model_server = None + self.image_uri = None + self._is_custom_image_uri = False + self.image_config = None + self.vpc_config = None + self._original_deploy = None + self.hf_model_config = None + self._default_tensor_parallel_degree = None + self._default_data_type = None + self._default_max_tokens = None + self.pysdk_model = None + self.schema_builder = None + self.env_vars = None + self.nb_instance_type = None + self.ram_usage_model_load = None + self.secret_key = None + self.jumpstart = None + self.role_arn = None + + @abstractmethod + def _prepare_for_mode(self): + """Placeholder docstring""" + + @abstractmethod + def _get_client_translators(self): + """Placeholder docstring""" + + def _set_to_tgi(self): + """Placeholder docstring""" + if self.model_server != ModelServer.TGI: + messaging = ( + "HuggingFace Model ID support on model server: " + f"{self.model_server} is not currently supported. " + f"Defaulting to {ModelServer.TGI}" + ) + logger.warning(messaging) + self.model_server = ModelServer.TGI + + def _create_tei_model(self) -> Type[Model]: + """Placeholder docstring""" + if not self.image_uri: + self.image_uri = get_huggingface_llm_image_uri( + "huggingface-tei", session=self.sagemaker_session + ) + logger.info("Auto detected %s. Proceeding with the the deployment.", self.image_uri) + + pysdk_model = HuggingFaceModel( + image_uri=self.image_uri, + image_config=self.image_config, + vpc_config=self.vpc_config, + env=self.env_vars, + role=self.role_arn, + sagemaker_session=self.sagemaker_session, + ) + + self._original_deploy = pysdk_model.deploy + pysdk_model.deploy = self._tei_model_builder_deploy_wrapper + return pysdk_model + + @_capture_telemetry("tei.deploy") + def _tei_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBase]: + """Placeholder docstring""" + timeout = kwargs.get("model_data_download_timeout") + if timeout: + self.pysdk_model.env.update({"MODEL_LOADING_TIMEOUT": str(timeout)}) + + if "mode" in kwargs and kwargs.get("mode") != self.mode: + overwrite_mode = kwargs.get("mode") + # mode overwritten by customer during model.deploy() + logger.warning( + "Deploying in %s Mode, overriding existing configurations set for %s mode", + overwrite_mode, + self.mode, + ) + + if overwrite_mode == Mode.SAGEMAKER_ENDPOINT: + self.mode = self.pysdk_model.mode = Mode.SAGEMAKER_ENDPOINT + elif overwrite_mode == Mode.LOCAL_CONTAINER: + self._prepare_for_mode() + self.mode = self.pysdk_model.mode = Mode.LOCAL_CONTAINER + else: + raise ValueError("Mode %s is not supported!" % overwrite_mode) + + serializer = self.schema_builder.input_serializer + deserializer = self.schema_builder._output_deserializer + if self.mode == Mode.LOCAL_CONTAINER: + timeout = kwargs.get("model_data_download_timeout") + + predictor = TgiLocalModePredictor( + self.modes[str(Mode.LOCAL_CONTAINER)], serializer, deserializer + ) + + self.modes[str(Mode.LOCAL_CONTAINER)].create_server( + self.image_uri, + timeout if timeout else 1800, + None, + predictor, + self.pysdk_model.env, + jumpstart=False, + ) + + return predictor + + if "mode" in kwargs: + del kwargs["mode"] + if "role" in kwargs: + self.pysdk_model.role = kwargs.get("role") + del kwargs["role"] + + # set model_data to uncompressed s3 dict + self.pysdk_model.model_data, env_vars = self._prepare_for_mode() + self.env_vars.update(env_vars) + self.pysdk_model.env.update(self.env_vars) + + + if "endpoint_logging" not in kwargs: + kwargs["endpoint_logging"] = True + + if self.nb_instance_type and "instance_type" not in kwargs: + kwargs.update({"instance_type": self.nb_instance_type}) + elif not self.nb_instance_type and "instance_type" not in kwargs: + raise ValueError( + "Instance type must be provided when deploying " "to SageMaker Endpoint mode." + ) + + if "initial_instance_count" not in kwargs: + kwargs.update({"initial_instance_count": 1}) + + predictor = self._original_deploy(*args, **kwargs) + + predictor.serializer = serializer + predictor.deserializer = deserializer + return predictor + + def _build_for_hf_tei(self): + """Placeholder docstring""" + self.nb_instance_type = _get_nb_instance() + + _create_dir_structure(self.model_path) + if not hasattr(self, "pysdk_model"): + self.env_vars.update({"HF_MODEL_ID": self.model}) + self.hf_model_config = _get_model_config_properties_from_hf( + self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN") + ) + + _default_max_new_tokens = _get_default_max_tokens( + self.schema_builder.sample_input, self.schema_builder.sample_output + ) + + self.schema_builder.sample_input["parameters"][ + "max_new_tokens" + ] = _default_max_new_tokens + self.pysdk_model = self._create_tei_model() + + if self.mode == Mode.LOCAL_CONTAINER: + self._prepare_for_mode() + + return self.pysdk_model + + def _build_for_tei(self): + """Placeholder docstring""" + self.secret_key = None + + self._set_to_tgi() + + self.pysdk_model = self._build_for_hf_tei() + return self.pysdk_model diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py index ef05857628..0c06b5ae8e 100644 --- a/tests/unit/sagemaker/serve/builder/test_model_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_model_builder.py @@ -1476,7 +1476,7 @@ def test_text_generation( mock_build_for_tgi.assert_called_once() - @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_tgi") + @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_tei") @patch("sagemaker.image_uris.retrieve") @patch("sagemaker.djl_inference.model.urllib") @patch("sagemaker.djl_inference.model.json") @@ -1493,7 +1493,7 @@ def test_sentence_similarity( mock_model_json, mock_model_urllib, mock_image_uris_retrieve, - mock_build_for_tgi, + mock_build_for_tei, ): mock_setting_object = mock_serveSettings.return_value mock_setting_object.role_arn = mock_role_arn @@ -1505,14 +1505,14 @@ def test_sentence_similarity( mock_model_json.load.return_value = {"some": "config"} mock_model_urllib.request.Request.side_effect = Mock() - mock_build_for_tgi.side_effect = Mock() + mock_build_for_tei.side_effect = Mock() mock_image_uris_retrieve.return_value = "https://some-image-uri" model_builder = ModelBuilder(model="bloom-560m", schema_builder=schema_builder) model_builder.build(sagemaker_session=mock_session) - mock_build_for_tgi.assert_called_once() + mock_build_for_tei.assert_called_once() @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers", Mock()) @patch("sagemaker.serve.builder.model_builder.ModelBuilder._try_fetch_gpu_info") diff --git a/tests/unit/sagemaker/serve/builder/test_tei_builder.py b/tests/unit/sagemaker/serve/builder/test_tei_builder.py new file mode 100644 index 0000000000..99f4a7ea0d --- /dev/null +++ b/tests/unit/sagemaker/serve/builder/test_tei_builder.py @@ -0,0 +1,152 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import +from unittest.mock import MagicMock, patch + +import unittest +from sagemaker.serve.builder.model_builder import ModelBuilder +from sagemaker.serve.mode.function_pointers import Mode +from tests.unit.sagemaker.serve.constants import MOCK_VPC_CONFIG + +from sagemaker.serve.utils.predictors import TgiLocalModePredictor + +mock_model_id = "bert-base-uncased" +mock_prompt = "The man worked as a [MASK]." +mock_sample_input = {"inputs": mock_prompt} +mock_sample_output = [ + { + "score": 0.0974755585193634, + "token": 10533, + "token_str": "carpenter", + "sequence": "the man worked as a carpenter.", + }, + { + "score": 0.052383411675691605, + "token": 15610, + "token_str": "waiter", + "sequence": "the man worked as a waiter.", + }, + { + "score": 0.04962712526321411, + "token": 13362, + "token_str": "barber", + "sequence": "the man worked as a barber.", + }, + { + "score": 0.0378861166536808, + "token": 15893, + "token_str": "mechanic", + "sequence": "the man worked as a mechanic.", + }, + { + "score": 0.037680838257074356, + "token": 18968, + "token_str": "salesman", + "sequence": "the man worked as a salesman.", + }, +] +mock_schema_builder = MagicMock() +mock_schema_builder.sample_input = mock_sample_input +mock_schema_builder.sample_output = mock_sample_output +MOCK_IMAGE_CONFIG = ( + "763104351884.dkr.ecr.us-west-2.amazonaws.com/" + "huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04-v1.0" +) + + +class TestTEIBuilder(unittest.TestCase): + @patch( + "sagemaker.serve.builder.tei_builder._get_nb_instance", + return_value="ml.g5.24xlarge", + ) + @patch("sagemaker.serve.builder.tei_builder._capture_telemetry", side_effect=None) + def test_build_deploy_for_tei_local_container_and_remote_container( + self, + mock_get_nb_instance, + mock_telemetry, + ): + builder = ModelBuilder( + model=mock_model_id, + schema_builder=mock_schema_builder, + mode=Mode.LOCAL_CONTAINER, + vpc_config=MOCK_VPC_CONFIG, + env_vars={ + 'HF_TASK': 'sentence-similarity', + }, + ) + + builder._prepare_for_mode = MagicMock() + builder._prepare_for_mode.side_effect = None + + model = builder.build() + builder.serve_settings.telemetry_opt_out = True + + builder.modes[str(Mode.LOCAL_CONTAINER)] = MagicMock() + predictor = model.deploy(model_data_download_timeout=1800) + + assert model.vpc_config == MOCK_VPC_CONFIG + assert builder.env_vars["MODEL_LOADING_TIMEOUT"] == "1800" + assert isinstance(predictor, TgiLocalModePredictor) + + assert builder.nb_instance_type == "ml.g5.24xlarge" + + builder._original_deploy = MagicMock() + builder._prepare_for_mode.return_value = (None, {}) + predictor = model.deploy(mode=Mode.SAGEMAKER_ENDPOINT, role="mock_role_arn") + assert "HF_MODEL_ID" in model.env + + with self.assertRaises(ValueError) as _: + model.deploy(mode=Mode.IN_PROCESS) + + @patch( + "sagemaker.serve.builder.transformers_builder._get_nb_instance", + return_value="ml.g5.24xlarge", + ) + @patch("sagemaker.serve.builder.tei_builder._capture_telemetry", side_effect=None) + def test_image_uri_override( + self, + mock_get_nb_instance, + mock_telemetry, + ): + builder = ModelBuilder( + model=mock_model_id, + schema_builder=mock_schema_builder, + mode=Mode.LOCAL_CONTAINER, + image_uri=MOCK_IMAGE_CONFIG, + env_vars={ + 'HF_TASK': 'sentence-similarity', + }, + ) + + builder._prepare_for_mode = MagicMock() + builder._prepare_for_mode.side_effect = None + + model = builder.build() + builder.serve_settings.telemetry_opt_out = True + + builder.modes[str(Mode.LOCAL_CONTAINER)] = MagicMock() + predictor = model.deploy(model_data_download_timeout=1800) + + assert builder.image_uri == MOCK_IMAGE_CONFIG + assert builder.env_vars["MODEL_LOADING_TIMEOUT"] == "1800" + assert isinstance(predictor, TgiLocalModePredictor) + + assert builder.nb_instance_type == "ml.g5.24xlarge" + + builder._original_deploy = MagicMock() + builder._prepare_for_mode.return_value = (None, {}) + predictor = model.deploy(mode=Mode.SAGEMAKER_ENDPOINT, role="mock_role_arn") + assert "HF_MODEL_ID" in model.env + + with self.assertRaises(ValueError) as _: + model.deploy(mode=Mode.IN_PROCESS) From 33d5b0499a44cc791ccff27afaf40a4fd06ff082 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 17:09:57 +0000 Subject: [PATCH 12/30] Fix formmatting --- src/sagemaker/serve/builder/tei_builder.py | 1 - .../sagemaker/serve/builder/test_tei_builder.py | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 24f93be2fd..195a44afe0 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -163,7 +163,6 @@ def _tei_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBa self.env_vars.update(env_vars) self.pysdk_model.env.update(self.env_vars) - if "endpoint_logging" not in kwargs: kwargs["endpoint_logging"] = True diff --git a/tests/unit/sagemaker/serve/builder/test_tei_builder.py b/tests/unit/sagemaker/serve/builder/test_tei_builder.py index 99f4a7ea0d..b01265f564 100644 --- a/tests/unit/sagemaker/serve/builder/test_tei_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_tei_builder.py @@ -71,9 +71,9 @@ class TestTEIBuilder(unittest.TestCase): ) @patch("sagemaker.serve.builder.tei_builder._capture_telemetry", side_effect=None) def test_build_deploy_for_tei_local_container_and_remote_container( - self, - mock_get_nb_instance, - mock_telemetry, + self, + mock_get_nb_instance, + mock_telemetry, ): builder = ModelBuilder( model=mock_model_id, @@ -81,7 +81,7 @@ def test_build_deploy_for_tei_local_container_and_remote_container( mode=Mode.LOCAL_CONTAINER, vpc_config=MOCK_VPC_CONFIG, env_vars={ - 'HF_TASK': 'sentence-similarity', + "HF_TASK": "sentence-similarity", }, ) @@ -114,9 +114,9 @@ def test_build_deploy_for_tei_local_container_and_remote_container( ) @patch("sagemaker.serve.builder.tei_builder._capture_telemetry", side_effect=None) def test_image_uri_override( - self, - mock_get_nb_instance, - mock_telemetry, + self, + mock_get_nb_instance, + mock_telemetry, ): builder = ModelBuilder( model=mock_model_id, @@ -124,7 +124,7 @@ def test_image_uri_override( mode=Mode.LOCAL_CONTAINER, image_uri=MOCK_IMAGE_CONFIG, env_vars={ - 'HF_TASK': 'sentence-similarity', + "HF_TASK": "sentence-similarity", }, ) From 20687f0f338dae72f69e1216dd3ec188370519ca Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 10:14:49 -0700 Subject: [PATCH 13/30] Add integ test --- tests/integ/sagemaker/serve/test_serve_tei.py | 133 ++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 tests/integ/sagemaker/serve/test_serve_tei.py diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py new file mode 100644 index 0000000000..3000078351 --- /dev/null +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -0,0 +1,133 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +import pytest +from sagemaker.serve.builder.schema_builder import SchemaBuilder +from sagemaker.serve.builder.model_builder import ModelBuilder, Mode +import tests.integ +from tests.integ.sagemaker.serve.constants import ( + HF_DIR, + PYTHON_VERSION_IS_NOT_310, + SERVE_SAGEMAKER_ENDPOINT_TIMEOUT, +) + +from tests.integ.timeout import timeout +from tests.integ.utils import cleanup_model_resources, gpu_list, retry_with_instance_list +import logging + +logger = logging.getLogger(__name__) + +sample_input = { + "inputs": "The man worked as a [MASK].", +} + +loaded_response = [ + { + "score": 0.0974755585193634, + "token": 10533, + "token_str": "carpenter", + "sequence": "the man worked as a carpenter.", + }, + { + "score": 0.052383411675691605, + "token": 15610, + "token_str": "waiter", + "sequence": "the man worked as a waiter.", + }, + { + "score": 0.04962712526321411, + "token": 13362, + "token_str": "barber", + "sequence": "the man worked as a barber.", + }, + { + "score": 0.0378861166536808, + "token": 15893, + "token_str": "mechanic", + "sequence": "the man worked as a mechanic.", + }, + { + "score": 0.037680838257074356, + "token": 18968, + "token_str": "salesman", + "sequence": "the man worked as a salesman.", + }, +] + + +@pytest.fixture +def model_input(): + return {"inputs": "The man worked as a [MASK]."} + + +@pytest.fixture +def model_builder_model_schema_builder(): + return ModelBuilder( + model_path=HF_DIR, + model="bert-base-uncased", + schema_builder=SchemaBuilder(sample_input, loaded_response), + env_vars={ + 'HF_TASK': 'sentence-similarity', + }, + ) + + +@pytest.fixture +def model_builder(request): + return request.getfixturevalue(request.param) + + +@pytest.mark.skipif( + PYTHON_VERSION_IS_NOT_310, + tests.integ.test_region() in tests.integ.TRAINING_NO_P2_REGIONS + and tests.integ.test_region() in tests.integ.TRAINING_NO_P3_REGIONS, + reason="no ml.p2 or ml.p3 instances in this region", + ) +@retry_with_instance_list(gpu_list(tests.integ.test_region())) +@pytest.mark.parametrize("model_builder", ["model_builder_model_schema_builder"], indirect=True) +def test_tei_sagemaker_endpoint( + sagemaker_session, model_builder, model_input, **kwargs +): + logger.info("Running in SAGEMAKER_ENDPOINT mode...") + caught_ex = None + + iam_client = sagemaker_session.boto_session.client("iam") + role_arn = iam_client.get_role(RoleName="SageMakerRole")["Role"]["Arn"] + + model = model_builder.build( + mode=Mode.SAGEMAKER_ENDPOINT, role_arn=role_arn, sagemaker_session=sagemaker_session + ) + + with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT): + try: + logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...") + predictor = model.deploy( + instance_type=kwargs["instance_type"], initial_instance_count=2 + ) + logger.info("Endpoint successfully deployed.") + predictor.predict(model_input) + assert predictor is not None + except Exception as e: + caught_ex = e + finally: + cleanup_model_resources( + sagemaker_session=model_builder.sagemaker_session, + model_name=model.name, + endpoint_name=model.endpoint_name, + ) + if caught_ex: + logger.exception(caught_ex) + assert ( + False + ), f"{caught_ex} was thrown when running tei sagemaker endpoint test" \ No newline at end of file From d85425facbe5f6b95cc9df2c2bdcd8c061596cc0 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 17:22:58 +0000 Subject: [PATCH 14/30] Fix formatting --- tests/integ/sagemaker/serve/test_serve_tei.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index 3000078351..03efbb206f 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -78,7 +78,7 @@ def model_builder_model_schema_builder(): model="bert-base-uncased", schema_builder=SchemaBuilder(sample_input, loaded_response), env_vars={ - 'HF_TASK': 'sentence-similarity', + "HF_TASK": "sentence-similarity", }, ) @@ -93,12 +93,10 @@ def model_builder(request): tests.integ.test_region() in tests.integ.TRAINING_NO_P2_REGIONS and tests.integ.test_region() in tests.integ.TRAINING_NO_P3_REGIONS, reason="no ml.p2 or ml.p3 instances in this region", - ) +) @retry_with_instance_list(gpu_list(tests.integ.test_region())) @pytest.mark.parametrize("model_builder", ["model_builder_model_schema_builder"], indirect=True) -def test_tei_sagemaker_endpoint( - sagemaker_session, model_builder, model_input, **kwargs -): +def test_tei_sagemaker_endpoint(sagemaker_session, model_builder, model_input, **kwargs): logger.info("Running in SAGEMAKER_ENDPOINT mode...") caught_ex = None @@ -128,6 +126,4 @@ def test_tei_sagemaker_endpoint( ) if caught_ex: logger.exception(caught_ex) - assert ( - False - ), f"{caught_ex} was thrown when running tei sagemaker endpoint test" \ No newline at end of file + assert False, f"{caught_ex} was thrown when running tei sagemaker endpoint test" From bbdff4c2bfef9afee7023533c96ccecdbf6c3500 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 10:25:13 -0700 Subject: [PATCH 15/30] Add integ test --- src/sagemaker/serve/builder/tei_builder.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 195a44afe0..2dcc5ae291 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -193,13 +193,6 @@ def _build_for_hf_tei(self): self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN") ) - _default_max_new_tokens = _get_default_max_tokens( - self.schema_builder.sample_input, self.schema_builder.sample_output - ) - - self.schema_builder.sample_input["parameters"][ - "max_new_tokens" - ] = _default_max_new_tokens self.pysdk_model = self._create_tei_model() if self.mode == Mode.LOCAL_CONTAINER: From a5264165d7cabc2cd8c0a645317ecbd1dc1331d7 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 10:37:35 -0700 Subject: [PATCH 16/30] Add integ test --- src/sagemaker/serve/builder/tei_builder.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 2dcc5ae291..eca304c5e9 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -18,10 +18,10 @@ from sagemaker.model import Model from sagemaker.djl_inference.model import _get_model_config_properties_from_hf -from sagemaker.serve.model_server.djl_serving.utils import ( - _get_default_max_tokens, -) +from sagemaker.serve.model_server.tgi.utils import ( + _get_default_tgi_configurations, +) from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri from sagemaker.serve.utils.local_hardware import ( _get_nb_instance, @@ -93,7 +93,6 @@ def _create_tei_model(self) -> Type[Model]: self.image_uri = get_huggingface_llm_image_uri( "huggingface-tei", session=self.sagemaker_session ) - logger.info("Auto detected %s. Proceeding with the the deployment.", self.image_uri) pysdk_model = HuggingFaceModel( image_uri=self.image_uri, @@ -104,6 +103,8 @@ def _create_tei_model(self) -> Type[Model]: sagemaker_session=self.sagemaker_session, ) + logger.info("Detected %s. Proceeding with the the deployment.", self.image_uri) + self._original_deploy = pysdk_model.deploy pysdk_model.deploy = self._tei_model_builder_deploy_wrapper return pysdk_model @@ -193,6 +194,14 @@ def _build_for_hf_tei(self): self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN") ) + default_tgi_configurations, _default_max_new_tokens = _get_default_tgi_configurations( + self.model, self.hf_model_config, self.schema_builder + ) + + self.schema_builder.sample_input["parameters"][ + "max_new_tokens" + ] = _default_max_new_tokens + self.pysdk_model = self._create_tei_model() if self.mode == Mode.LOCAL_CONTAINER: From 1e49f889fcde05f9eddced1d5bc9c85767acf8d7 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 10:39:15 -0700 Subject: [PATCH 17/30] Add integ test --- src/sagemaker/serve/builder/tei_builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index eca304c5e9..edda7a9f0f 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -38,6 +38,7 @@ _CODE_FOLDER = "code" +# pylint: disable=W0612 class TEI(ABC): """TEI build logic for ModelBuilder()""" From af78426238693dc64f1f6ba02f156b51ca727b84 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 10:42:32 -0700 Subject: [PATCH 18/30] Add integ test --- src/sagemaker/serve/builder/tei_builder.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index edda7a9f0f..15e2c647ce 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -19,9 +19,6 @@ from sagemaker.model import Model from sagemaker.djl_inference.model import _get_model_config_properties_from_hf -from sagemaker.serve.model_server.tgi.utils import ( - _get_default_tgi_configurations, -) from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri from sagemaker.serve.utils.local_hardware import ( _get_nb_instance, @@ -38,7 +35,6 @@ _CODE_FOLDER = "code" -# pylint: disable=W0612 class TEI(ABC): """TEI build logic for ModelBuilder()""" @@ -195,14 +191,6 @@ def _build_for_hf_tei(self): self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN") ) - default_tgi_configurations, _default_max_new_tokens = _get_default_tgi_configurations( - self.model, self.hf_model_config, self.schema_builder - ) - - self.schema_builder.sample_input["parameters"][ - "max_new_tokens" - ] = _default_max_new_tokens - self.pysdk_model = self._create_tei_model() if self.mode == Mode.LOCAL_CONTAINER: From a5e665ac1b818716bee6ab5103b37582a24d4db8 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 11:15:50 -0700 Subject: [PATCH 19/30] Add integ test --- tests/integ/sagemaker/serve/test_serve_tei.py | 4 ++-- tests/unit/sagemaker/serve/builder/test_tei_builder.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index 03efbb206f..478e2320f9 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -77,9 +77,9 @@ def model_builder_model_schema_builder(): model_path=HF_DIR, model="bert-base-uncased", schema_builder=SchemaBuilder(sample_input, loaded_response), - env_vars={ + model_metadata={ "HF_TASK": "sentence-similarity", - }, + } ) diff --git a/tests/unit/sagemaker/serve/builder/test_tei_builder.py b/tests/unit/sagemaker/serve/builder/test_tei_builder.py index b01265f564..79a8f23324 100644 --- a/tests/unit/sagemaker/serve/builder/test_tei_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_tei_builder.py @@ -80,7 +80,7 @@ def test_build_deploy_for_tei_local_container_and_remote_container( schema_builder=mock_schema_builder, mode=Mode.LOCAL_CONTAINER, vpc_config=MOCK_VPC_CONFIG, - env_vars={ + model_metadata={ "HF_TASK": "sentence-similarity", }, ) @@ -109,7 +109,7 @@ def test_build_deploy_for_tei_local_container_and_remote_container( model.deploy(mode=Mode.IN_PROCESS) @patch( - "sagemaker.serve.builder.transformers_builder._get_nb_instance", + "sagemaker.serve.builder.tei_builder._get_nb_instance", return_value="ml.g5.24xlarge", ) @patch("sagemaker.serve.builder.tei_builder._capture_telemetry", side_effect=None) @@ -123,7 +123,7 @@ def test_image_uri_override( schema_builder=mock_schema_builder, mode=Mode.LOCAL_CONTAINER, image_uri=MOCK_IMAGE_CONFIG, - env_vars={ + model_metadata={ "HF_TASK": "sentence-similarity", }, ) From e58f622d2a5f589c76c9874b531f9fe15ccb100b Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 18:17:02 +0000 Subject: [PATCH 20/30] Fix formatting --- tests/integ/sagemaker/serve/test_serve_tei.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index 478e2320f9..e90562e914 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -79,7 +79,7 @@ def model_builder_model_schema_builder(): schema_builder=SchemaBuilder(sample_input, loaded_response), model_metadata={ "HF_TASK": "sentence-similarity", - } + }, ) From ea900bf27036366cc3b06e4860c36c3343387ad8 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 13:12:54 -0700 Subject: [PATCH 21/30] Move to G5 for integ test --- tests/integ/sagemaker/serve/test_serve_tei.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index e90562e914..2bcdaa35e5 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -15,7 +15,7 @@ import pytest from sagemaker.serve.builder.schema_builder import SchemaBuilder from sagemaker.serve.builder.model_builder import ModelBuilder, Mode -import tests.integ + from tests.integ.sagemaker.serve.constants import ( HF_DIR, PYTHON_VERSION_IS_NOT_310, @@ -23,7 +23,7 @@ ) from tests.integ.timeout import timeout -from tests.integ.utils import cleanup_model_resources, gpu_list, retry_with_instance_list +from tests.integ.utils import cleanup_model_resources import logging logger = logging.getLogger(__name__) @@ -89,14 +89,10 @@ def model_builder(request): @pytest.mark.skipif( - PYTHON_VERSION_IS_NOT_310, - tests.integ.test_region() in tests.integ.TRAINING_NO_P2_REGIONS - and tests.integ.test_region() in tests.integ.TRAINING_NO_P3_REGIONS, - reason="no ml.p2 or ml.p3 instances in this region", + PYTHON_VERSION_IS_NOT_310 ) -@retry_with_instance_list(gpu_list(tests.integ.test_region())) @pytest.mark.parametrize("model_builder", ["model_builder_model_schema_builder"], indirect=True) -def test_tei_sagemaker_endpoint(sagemaker_session, model_builder, model_input, **kwargs): +def test_tei_sagemaker_endpoint(sagemaker_session, model_builder, model_input): logger.info("Running in SAGEMAKER_ENDPOINT mode...") caught_ex = None @@ -111,7 +107,7 @@ def test_tei_sagemaker_endpoint(sagemaker_session, model_builder, model_input, * try: logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...") predictor = model.deploy( - instance_type=kwargs["instance_type"], initial_instance_count=2 + instance_type="ml.g5.12xlarge", initial_instance_count=2 ) logger.info("Endpoint successfully deployed.") predictor.predict(model_input) From cffe46afb5cf16535a5f794ffa4e2fa78af8eb34 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Thu, 16 May 2024 20:14:16 +0000 Subject: [PATCH 22/30] Fix formatting --- tests/integ/sagemaker/serve/test_serve_tei.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index 2bcdaa35e5..bf366e3510 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -88,9 +88,7 @@ def model_builder(request): return request.getfixturevalue(request.param) -@pytest.mark.skipif( - PYTHON_VERSION_IS_NOT_310 -) +@pytest.mark.skipif(PYTHON_VERSION_IS_NOT_310) @pytest.mark.parametrize("model_builder", ["model_builder_model_schema_builder"], indirect=True) def test_tei_sagemaker_endpoint(sagemaker_session, model_builder, model_input): logger.info("Running in SAGEMAKER_ENDPOINT mode...") @@ -106,9 +104,7 @@ def test_tei_sagemaker_endpoint(sagemaker_session, model_builder, model_input): with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT): try: logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...") - predictor = model.deploy( - instance_type="ml.g5.12xlarge", initial_instance_count=2 - ) + predictor = model.deploy(instance_type="ml.g5.12xlarge", initial_instance_count=2) logger.info("Endpoint successfully deployed.") predictor.predict(model_input) assert predictor is not None From 48205ad63b34f2a4db4c337ed1af4174bd3bb64a Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Fri, 17 May 2024 07:28:59 -0700 Subject: [PATCH 23/30] Integ test updates --- src/sagemaker/serve/builder/tei_builder.py | 18 +++++++++++------- src/sagemaker/serve/model_server/tgi/server.py | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 15e2c647ce..6c67370b17 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -16,10 +16,11 @@ from typing import Type from abc import ABC, abstractmethod +from sagemaker import image_uris from sagemaker.model import Model from sagemaker.djl_inference.model import _get_model_config_properties_from_hf -from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri +from sagemaker.huggingface import HuggingFaceModel from sagemaker.serve.utils.local_hardware import ( _get_nb_instance, ) @@ -84,11 +85,16 @@ def _set_to_tgi(self): logger.warning(messaging) self.model_server = ModelServer.TGI - def _create_tei_model(self) -> Type[Model]: + def _create_tei_model(self, **kwargs) -> Type[Model]: """Placeholder docstring""" + if self.nb_instance_type and "instance_type" not in kwargs: + kwargs.update({"instance_type": self.nb_instance_type}) + if not self.image_uri: - self.image_uri = get_huggingface_llm_image_uri( - "huggingface-tei", session=self.sagemaker_session + self.image_uri = image_uris.retrieve( + "huggingface-tei", + image_scope="inference", + instance_type=kwargs.get("instance_type") ) pysdk_model = HuggingFaceModel( @@ -164,9 +170,7 @@ def _tei_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBa if "endpoint_logging" not in kwargs: kwargs["endpoint_logging"] = True - if self.nb_instance_type and "instance_type" not in kwargs: - kwargs.update({"instance_type": self.nb_instance_type}) - elif not self.nb_instance_type and "instance_type" not in kwargs: + if not self.nb_instance_type and "instance_type" not in kwargs: raise ValueError( "Instance type must be provided when deploying " "to SageMaker Endpoint mode." ) diff --git a/src/sagemaker/serve/model_server/tgi/server.py b/src/sagemaker/serve/model_server/tgi/server.py index ef39e890c8..b71c7462c4 100644 --- a/src/sagemaker/serve/model_server/tgi/server.py +++ b/src/sagemaker/serve/model_server/tgi/server.py @@ -74,7 +74,7 @@ def _invoke_tgi_serving(self, request: object, content_type: str, accept: str): """Placeholder docstring""" try: response = requests.post( - f"http://{get_docker_host()}:8080/generate", + f"http://{get_docker_host()}:8080/invocations", data=request, headers={"Content-Type": content_type, "Accept": accept}, timeout=600, From 312d837ec50c914c63c80f9e04cc664cec803f78 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Fri, 17 May 2024 07:32:48 -0700 Subject: [PATCH 24/30] Integ test updates --- src/sagemaker/serve/builder/tei_builder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 6c67370b17..0aa8ea2810 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -94,7 +94,8 @@ def _create_tei_model(self, **kwargs) -> Type[Model]: self.image_uri = image_uris.retrieve( "huggingface-tei", image_scope="inference", - instance_type=kwargs.get("instance_type") + instance_type=kwargs.get("instance_type"), + region=self.sagemaker_session.boto_region_name ) pysdk_model = HuggingFaceModel( From 29ea1c5f6299609b2e3a04374c586d3dba41849f Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Fri, 17 May 2024 07:40:28 -0700 Subject: [PATCH 25/30] Integ test updates --- src/sagemaker/serve/builder/tei_builder.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 0aa8ea2810..7a831619b4 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -168,6 +168,15 @@ def _tei_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBa self.env_vars.update(env_vars) self.pysdk_model.env.update(self.env_vars) + # if the weights have been cached via local container mode -> set to offline + if str(Mode.LOCAL_CONTAINER) in self.modes: + self.pysdk_model.env.update({"TRANSFORMERS_OFFLINE": "1"}) + else: + # if has not been built for local container we must use cache + # that hosting has write access to. + self.pysdk_model.env["TRANSFORMERS_CACHE"] = "/tmp" + self.pysdk_model.env["HUGGINGFACE_HUB_CACHE"] = "/tmp" + if "endpoint_logging" not in kwargs: kwargs["endpoint_logging"] = True From f6f81166cde80b48f6efa4007726831914444a57 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Fri, 17 May 2024 14:40:58 +0000 Subject: [PATCH 26/30] Fix formatting --- src/sagemaker/serve/builder/tei_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 7a831619b4..50d3866468 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -85,7 +85,7 @@ def _set_to_tgi(self): logger.warning(messaging) self.model_server = ModelServer.TGI - def _create_tei_model(self, **kwargs) -> Type[Model]: + def _create_tei_model(self, **kwargs) -> Type[Model]: """Placeholder docstring""" if self.nb_instance_type and "instance_type" not in kwargs: kwargs.update({"instance_type": self.nb_instance_type}) @@ -95,7 +95,7 @@ def _create_tei_model(self, **kwargs) -> Type[Model]: "huggingface-tei", image_scope="inference", instance_type=kwargs.get("instance_type"), - region=self.sagemaker_session.boto_region_name + region=self.sagemaker_session.boto_region_name, ) pysdk_model = HuggingFaceModel( From 166e5706aa575f652d89ca0d2900aaae85a72cfb Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Fri, 17 May 2024 08:54:20 -0700 Subject: [PATCH 27/30] Integ test updates --- tests/integ/sagemaker/serve/test_serve_tei.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index bf366e3510..c3a696d208 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -88,7 +88,10 @@ def model_builder(request): return request.getfixturevalue(request.param) -@pytest.mark.skipif(PYTHON_VERSION_IS_NOT_310) +@pytest.mark.skipif( + PYTHON_VERSION_IS_NOT_310, + reason="Testing feature needs latest metadata", +) @pytest.mark.parametrize("model_builder", ["model_builder_model_schema_builder"], indirect=True) def test_tei_sagemaker_endpoint(sagemaker_session, model_builder, model_input): logger.info("Running in SAGEMAKER_ENDPOINT mode...") From 4bb55224647c8a03c34a52d6481b5cb78e688b1b Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Fri, 17 May 2024 09:00:43 -0700 Subject: [PATCH 28/30] Move back to generate for ping --- src/sagemaker/serve/model_server/tgi/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/serve/model_server/tgi/server.py b/src/sagemaker/serve/model_server/tgi/server.py index b71c7462c4..ef39e890c8 100644 --- a/src/sagemaker/serve/model_server/tgi/server.py +++ b/src/sagemaker/serve/model_server/tgi/server.py @@ -74,7 +74,7 @@ def _invoke_tgi_serving(self, request: object, content_type: str, accept: str): """Placeholder docstring""" try: response = requests.post( - f"http://{get_docker_host()}:8080/invocations", + f"http://{get_docker_host()}:8080/generate", data=request, headers={"Content-Type": content_type, "Accept": accept}, timeout=600, From 17645f77a783f8b80b5bd674b531db3287f75b23 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Fri, 17 May 2024 10:31:06 -0700 Subject: [PATCH 29/30] Integ test updates --- tests/integ/sagemaker/serve/test_serve_tei.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index c3a696d208..29aaa69d7f 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -107,8 +107,7 @@ def test_tei_sagemaker_endpoint(sagemaker_session, model_builder, model_input): with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT): try: logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...") - predictor = model.deploy(instance_type="ml.g5.12xlarge", initial_instance_count=2) - logger.info("Endpoint successfully deployed.") + predictor = model.deploy(instance_type="ml.g5.2xlarge", initial_instance_count=1) predictor.predict(model_input) assert predictor is not None except Exception as e: From e8341c2bb8e4ede3e7dc33da173442982a400ac0 Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Fri, 17 May 2024 11:49:10 -0700 Subject: [PATCH 30/30] Integ test updates --- tests/integ/sagemaker/serve/test_serve_tei.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/sagemaker/serve/test_serve_tei.py b/tests/integ/sagemaker/serve/test_serve_tei.py index 29aaa69d7f..19ee0b57de 100644 --- a/tests/integ/sagemaker/serve/test_serve_tei.py +++ b/tests/integ/sagemaker/serve/test_serve_tei.py @@ -75,7 +75,7 @@ def model_input(): def model_builder_model_schema_builder(): return ModelBuilder( model_path=HF_DIR, - model="bert-base-uncased", + model="BAAI/bge-m3", schema_builder=SchemaBuilder(sample_input, loaded_response), model_metadata={ "HF_TASK": "sentence-similarity",