From 43d3eb13701273ad3ffad515083834947b0f0226 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 15 May 2024 21:58:14 -0700 Subject: [PATCH 1/9] Debug --- src/sagemaker/serve/mode/local_container_mode.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index f940e2959c..3730e54251 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -215,6 +215,10 @@ def _pull_image(self, image: str): logger.warning("Unable to login to ecr: %s", e) self.client = docker.from_env() + print("*" * 80) + images = self.client.images.list() + print(images) + print("*" * 80) try: logger.info("Pulling image %s from repository...", image) self.client.images.pull(image) From 8af45e3a38851fe6cfdf2a0fcfcb3b7ad7d02cd9 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 15 May 2024 22:09:27 -0700 Subject: [PATCH 2/9] Debug --- src/sagemaker/serve/mode/local_container_mode.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index 3730e54251..054d2dba91 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -217,7 +217,11 @@ def _pull_image(self, image: str): self.client = docker.from_env() print("*" * 80) images = self.client.images.list() - print(images) + for image in images: + print(image.id) + print(image.tags) + print(image.short_id) + print() print("*" * 80) try: logger.info("Pulling image %s from repository...", image) From 7e8d50c8819eaa6e3197b17a6504ffb29d951f7c Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 15 May 2024 22:15:02 -0700 Subject: [PATCH 3/9] Debug --- src/sagemaker/serve/mode/local_container_mode.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index 054d2dba91..f61feff23f 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -217,10 +217,11 @@ def _pull_image(self, image: str): self.client = docker.from_env() print("*" * 80) images = self.client.images.list() - for image in images: - print(image.id) - print(image.tags) - print(image.short_id) + for img in images: + for tag in img.tags: + if tag == image: + print(tag) + break print() print("*" * 80) try: From 378e93b679dacc6eb9cec4cf31621b7f4b9d5e5d Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 15 May 2024 22:19:55 -0700 Subject: [PATCH 4/9] Debug --- src/sagemaker/serve/mode/local_container_mode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index f61feff23f..3ea362ba28 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -220,8 +220,8 @@ def _pull_image(self, image: str): for img in images: for tag in img.tags: if tag == image: - print(tag) - break + print(f"Matched {tag}") + print(tag) print() print("*" * 80) try: From f29adeed21d53c744c18898c66c59c0da8de2676 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 15 May 2024 22:41:42 -0700 Subject: [PATCH 5/9] Debug --- src/sagemaker/serve/builder/jumpstart_builder.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index 8cb42689fe..efeb7aec6d 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -470,9 +470,10 @@ def _build_for_jumpstart(self): self.pysdk_model = self._create_pre_trained_js_model() self.pysdk_model.tune = lambda *args, **kwargs: self._default_tune() + self.image_uri = self.pysdk_model.image_uri logger.info( - "JumpStart ID %s is packaged with Image URI: %s", self.model, self.pysdk_model.image_uri + "JumpStart ID %s is packaged with Image URI: %s", self.model, self.image_uri ) if self.mode != Mode.SAGEMAKER_ENDPOINT: @@ -484,7 +485,6 @@ def _build_for_jumpstart(self): if "djl-inference" in self.pysdk_model.image_uri: logger.info("Building for DJL JumpStart Model ID...") self.model_server = ModelServer.DJL_SERVING - self.image_uri = self.pysdk_model.image_uri self._build_for_djl_jumpstart() @@ -492,7 +492,6 @@ def _build_for_jumpstart(self): elif "tgi-inference" in self.pysdk_model.image_uri: logger.info("Building for TGI JumpStart Model ID...") self.model_server = ModelServer.TGI - self.image_uri = self.pysdk_model.image_uri self._build_for_tgi_jumpstart() @@ -500,7 +499,6 @@ def _build_for_jumpstart(self): elif "huggingface-pytorch-inference:" in self.pysdk_model.image_uri: logger.info("Building for MMS JumpStart Model ID...") self.model_server = ModelServer.MMS - self.image_uri = self.pysdk_model.image_uri self._build_for_mms_jumpstart() else: From cac47163e42813b6abcb5af08829e295bf62c781 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 15 May 2024 22:51:53 -0700 Subject: [PATCH 6/9] Debug --- .../serve/builder/jumpstart_builder.py | 75 +++++++++++-------- .../serve/mode/local_container_mode.py | 9 --- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index efeb7aec6d..16c3bf04b2 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -300,6 +300,11 @@ def _tune_for_js(self, sharded_supported: bool, max_tuning_duration: int = 1800) returns: Tuned Model. """ + if self.mode == Mode.SAGEMAKER_ENDPOINT: + logger.warning( + "Tuning is only a %s capability. Returning original model.", Mode.LOCAL_CONTAINER + ) + return self.pysdk_model num_shard_env_var_name = "SM_NUM_GPUS" if "OPTION_TENSOR_PARALLEL_DEGREE" in self.pysdk_model.env.keys(): @@ -468,44 +473,48 @@ def _build_for_jumpstart(self): self.secret_key = None self.jumpstart = True - self.pysdk_model = self._create_pre_trained_js_model() - self.pysdk_model.tune = lambda *args, **kwargs: self._default_tune() - self.image_uri = self.pysdk_model.image_uri + pysdk_model = self._create_pre_trained_js_model() + image_uri = pysdk_model.image_uri logger.info( - "JumpStart ID %s is packaged with Image URI: %s", self.model, self.image_uri + "JumpStart ID %s is packaged with Image URI: %s", self.model, image_uri ) - if self.mode != Mode.SAGEMAKER_ENDPOINT: - if self._is_gated_model(self.pysdk_model): - raise ValueError( - "JumpStart Gated Models are only supported in SAGEMAKER_ENDPOINT mode." - ) - - if "djl-inference" in self.pysdk_model.image_uri: - logger.info("Building for DJL JumpStart Model ID...") - self.model_server = ModelServer.DJL_SERVING - - self._build_for_djl_jumpstart() - - self.pysdk_model.tune = self.tune_for_djl_jumpstart - elif "tgi-inference" in self.pysdk_model.image_uri: - logger.info("Building for TGI JumpStart Model ID...") - self.model_server = ModelServer.TGI - - self._build_for_tgi_jumpstart() - - self.pysdk_model.tune = self.tune_for_tgi_jumpstart - elif "huggingface-pytorch-inference:" in self.pysdk_model.image_uri: - logger.info("Building for MMS JumpStart Model ID...") - self.model_server = ModelServer.MMS + if self._is_gated_model(pysdk_model) and self.mode != Mode.SAGEMAKER_ENDPOINT: + raise ValueError( + "JumpStart Gated Models are only supported in SAGEMAKER_ENDPOINT mode." + ) - self._build_for_mms_jumpstart() - else: - raise ValueError( - "JumpStart Model ID was not packaged " - "with djl-inference, tgi-inference, or mms-inference container." - ) + if "djl-inference" in image_uri: + logger.info("Building for DJL JumpStart Model ID...") + self.model_server = ModelServer.DJL_SERVING + self.pysdk_model = pysdk_model + self.image_uri = self.pysdk_model.image_uri + + self._build_for_djl_jumpstart() + + self.pysdk_model.tune = self.tune_for_djl_jumpstart + elif "tgi-inference" in image_uri: + logger.info("Building for TGI JumpStart Model ID...") + self.model_server = ModelServer.TGI + self.pysdk_model = pysdk_model + self.image_uri = self.pysdk_model.image_uri + + self._build_for_tgi_jumpstart() + + self.pysdk_model.tune = self.tune_for_tgi_jumpstart + elif "huggingface-pytorch-inference:" in image_uri: + logger.info("Building for MMS JumpStart Model ID...") + self.model_server = ModelServer.MMS + self.pysdk_model = pysdk_model + self.image_uri = self.pysdk_model.image_uri + + self._build_for_mms_jumpstart() + elif self.mode != Mode.SAGEMAKER_ENDPOINT: + raise ValueError( + "JumpStart Model ID was not packaged " + "with djl-inference, tgi-inference, or mms-inference container." + ) return self.pysdk_model diff --git a/src/sagemaker/serve/mode/local_container_mode.py b/src/sagemaker/serve/mode/local_container_mode.py index 3ea362ba28..f940e2959c 100644 --- a/src/sagemaker/serve/mode/local_container_mode.py +++ b/src/sagemaker/serve/mode/local_container_mode.py @@ -215,15 +215,6 @@ def _pull_image(self, image: str): logger.warning("Unable to login to ecr: %s", e) self.client = docker.from_env() - print("*" * 80) - images = self.client.images.list() - for img in images: - for tag in img.tags: - if tag == image: - print(f"Matched {tag}") - print(tag) - print() - print("*" * 80) try: logger.info("Pulling image %s from repository...", image) self.client.images.pull(image) From 927be8c83dbf55b7ec9dfa22e7a8f587e85789ba Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 15 May 2024 23:12:54 -0700 Subject: [PATCH 7/9] fix docstyle --- src/sagemaker/serve/builder/jumpstart_builder.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index 16c3bf04b2..193294d781 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -476,9 +476,7 @@ def _build_for_jumpstart(self): pysdk_model = self._create_pre_trained_js_model() image_uri = pysdk_model.image_uri - logger.info( - "JumpStart ID %s is packaged with Image URI: %s", self.model, image_uri - ) + logger.info("JumpStart ID %s is packaged with Image URI: %s", self.model, image_uri) if self._is_gated_model(pysdk_model) and self.mode != Mode.SAGEMAKER_ENDPOINT: raise ValueError( From 8509e3be3c6b60c12ea6e9f16fa3d655a607f882 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 15 May 2024 23:41:57 -0700 Subject: [PATCH 8/9] Refactoring --- src/sagemaker/serve/builder/jumpstart_builder.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index 193294d781..bc31e8d323 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -516,17 +516,6 @@ def _build_for_jumpstart(self): return self.pysdk_model - def _default_tune(self): - """Logs a warning message if tune is invoked on endpoint mode. - - Returns: - Jumpstart Model: ``This`` model - """ - logger.warning( - "Tuning is only a %s capability. Returning original model.", Mode.LOCAL_CONTAINER - ) - return self.pysdk_model - def _is_gated_model(self, model) -> bool: """Determine if ``this`` Model is Gated From 93fee7edee49db11fe5d2a195a5276947c94597c Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Thu, 16 May 2024 10:12:43 -0700 Subject: [PATCH 9/9] Add Integ tests --- .../sagemaker/serve/test_serve_js_happy.py | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/integ/sagemaker/serve/test_serve_js_happy.py b/tests/integ/sagemaker/serve/test_serve_js_happy.py index 7835c8ae3c..ad0527fcc0 100644 --- a/tests/integ/sagemaker/serve/test_serve_js_happy.py +++ b/tests/integ/sagemaker/serve/test_serve_js_happy.py @@ -34,6 +34,14 @@ JS_MODEL_ID = "huggingface-textgeneration1-gpt-neo-125m-fp16" ROLE_NAME = "SageMakerRole" +SAMPLE_MMS_PROMPT = [ + "How cute your dog is!", + "Your dog is so cute.", + "The mitochondria is the powerhouse of the cell.", +] +SAMPLE_MMS_RESPONSE = {"embedding": []} +JS_MMS_MODEL_ID = "huggingface-sentencesimilarity-bge-m3" + @pytest.fixture def happy_model_builder(sagemaker_session): @@ -46,6 +54,17 @@ def happy_model_builder(sagemaker_session): ) +@pytest.fixture +def happy_mms_model_builder(sagemaker_session): + iam_client = sagemaker_session.boto_session.client("iam") + return ModelBuilder( + model=JS_MMS_MODEL_ID, + schema_builder=SchemaBuilder(SAMPLE_MMS_PROMPT, SAMPLE_MMS_RESPONSE), + role_arn=iam_client.get_role(RoleName=ROLE_NAME)["Role"]["Arn"], + sagemaker_session=sagemaker_session, + ) + + @pytest.mark.skipif( PYTHON_VERSION_IS_NOT_310, reason="The goal of these test are to test the serving components of our feature", @@ -75,3 +94,34 @@ def test_happy_tgi_sagemaker_endpoint(happy_model_builder, gpu_instance_type): ) if caught_ex: raise caught_ex + + +@pytest.mark.skipif( + PYTHON_VERSION_IS_NOT_310, + reason="The goal of these test are to test the serving components of our feature", +) +@pytest.mark.slow_test +def test_happy_mms_sagemaker_endpoint(happy_mms_model_builder, gpu_instance_type): + logger.info("Running in SAGEMAKER_ENDPOINT mode...") + caught_ex = None + model = happy_mms_model_builder.build() + + with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT): + try: + logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...") + predictor = model.deploy(instance_type=gpu_instance_type, endpoint_logging=False) + logger.info("Endpoint successfully deployed.") + + updated_sample_input = happy_mms_model_builder.schema_builder.sample_input + + predictor.predict(updated_sample_input) + except Exception as e: + caught_ex = e + finally: + cleanup_model_resources( + sagemaker_session=happy_mms_model_builder.sagemaker_session, + model_name=model.name, + endpoint_name=model.endpoint_name, + ) + if caught_ex: + raise caught_ex