diff --git a/src/sagemaker/serve/builder/djl_builder.py b/src/sagemaker/serve/builder/djl_builder.py index 9fe8d47734..e89c1b8e9c 100644 --- a/src/sagemaker/serve/builder/djl_builder.py +++ b/src/sagemaker/serve/builder/djl_builder.py @@ -83,6 +83,7 @@ def __init__(self): self.mode = None self.model_server = None self.image_uri = None + self._is_custom_image_uri = False self.image_config = None self.vpc_config = None self._original_deploy = None diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index b7dc5b1da5..56c5ec1a2f 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -57,6 +57,8 @@ def __init__(self): self.mode = None self.model_server = None self.image_uri = None + self._is_custom_image_uri = False + self.vpc_config = None self._original_deploy = None self.secret_key = None self.js_model_config = None @@ -94,7 +96,7 @@ def _is_jumpstart_model_id(self) -> bool: def _create_pre_trained_js_model(self) -> Type[Model]: """Placeholder docstring""" - pysdk_model = JumpStartModel(self.model) + pysdk_model = JumpStartModel(self.model, vpc_config=self.vpc_config) pysdk_model.sagemaker_session = self.sagemaker_session self._original_deploy = pysdk_model.deploy diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index 0ade8096f6..c7f2dc5633 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -605,6 +605,8 @@ def build( self.serve_settings = self._get_serve_setting() + self._is_custom_image_uri = self.image_uri is None + if isinstance(self.model, str): if self._is_jumpstart_model_id(): return self._build_for_jumpstart() diff --git a/src/sagemaker/serve/builder/tgi_builder.py b/src/sagemaker/serve/builder/tgi_builder.py index db87dcf4d0..23cc7e2202 100644 --- a/src/sagemaker/serve/builder/tgi_builder.py +++ b/src/sagemaker/serve/builder/tgi_builder.py @@ -76,6 +76,7 @@ def __init__(self): self.mode = None self.model_server = None self.image_uri = None + self._is_custom_image_uri = False self.image_config = None self.vpc_config = None self._original_deploy = None diff --git a/src/sagemaker/serve/builder/transformers_builder.py b/src/sagemaker/serve/builder/transformers_builder.py index 71f5d11f98..2d4cefcb4f 100644 --- a/src/sagemaker/serve/builder/transformers_builder.py +++ b/src/sagemaker/serve/builder/transformers_builder.py @@ -56,6 +56,8 @@ def __init__(self): self.mode = None self.model_server = None self.image_uri = None + self._is_custom_image_uri = False + self.vpc_config = None self._original_deploy = None self.hf_model_config = None self._default_data_type = None @@ -111,6 +113,7 @@ def _create_transformers_model(self) -> Type[Model]: py_version=self.py_version, transformers_version=base_hf_version, pytorch_version=self.pytorch_version, + vpc_config=self.vpc_config, ) elif "keras" in hf_model_md.get("tags") or "tensorflow" in hf_model_md.get("tags"): self.tensorflow_version = self._get_supported_version( @@ -126,6 +129,7 @@ def _create_transformers_model(self) -> Type[Model]: py_version=self.py_version, transformers_version=base_hf_version, tensorflow_version=self.tensorflow_version, + vpc_config=self.vpc_config, ) if self.mode == Mode.LOCAL_CONTAINER: diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index 84044c4d96..64cbce03e8 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -20,7 +20,8 @@ from sagemaker import Session, exceptions from sagemaker.serve.mode.function_pointers import Mode from sagemaker.serve.utils.exceptions import ModelBuilderException -from sagemaker.serve.utils.types import ModelServer +from sagemaker.serve.utils.types import ModelServer, ImageUriOption +from sagemaker.serve.validations.check_image_uri import is_1p_image_uri from sagemaker.user_agent import SDK_VERSION logger = logging.getLogger(__name__) @@ -62,11 +63,13 @@ def wrapper(self, *args, **kwargs): caught_ex = None image_uri_tail = self.image_uri.split("/")[1] + image_uri_option = _get_image_uri_option(self.image_uri, self._is_custom_image_uri) extra = ( f"{func_name}" f"&x-modelServer={MODEL_SERVER_TO_CODE[str(self.model_server)]}" f"&x-imageTag={image_uri_tail}" f"&x-sdkVersion={SDK_VERSION}" + f"&x-defaultImageUsage={image_uri_option}" ) if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI: @@ -201,3 +204,22 @@ def _get_region_or_default(session): return session.boto_session.region_name except Exception: # pylint: disable=W0703 return "us-west-2" + + +def _get_image_uri_option(image_uri: str, is_custom_image: bool) -> int: + """Detect whether default values are used for ModelBuilder + + Args: + image_uri (str): Image uri used by ModelBuilder. + is_custom_image: (bool): Boolean indicating whether customer provides with custom image. + Returns: + bool: Integer code of image option types. + """ + + if not is_custom_image: + return ImageUriOption.DEFAULT_IMAGE.value + + if is_1p_image_uri(image_uri): + return ImageUriOption.CUSTOM_1P_IMAGE.value + + return ImageUriOption.CUSTOM_IMAGE.value diff --git a/src/sagemaker/serve/utils/types.py b/src/sagemaker/serve/utils/types.py index b8657a19ca..60b7b5cd6f 100644 --- a/src/sagemaker/serve/utils/types.py +++ b/src/sagemaker/serve/utils/types.py @@ -43,3 +43,15 @@ def __str__(self) -> str: INFERENTIA_1 = 3 INFERENTIA_2 = 4 GRAVITON = 5 + + +class ImageUriOption(Enum): + """Enum type for image uri options""" + + def __str__(self) -> str: + """Convert enum to string""" + return str(self.name) + + CUSTOM_IMAGE = 1 + CUSTOM_1P_IMAGE = 2 + DEFAULT_IMAGE = 3 diff --git a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py index 918ed6bf01..e17364f22d 100644 --- a/tests/unit/sagemaker/serve/builder/test_transformers_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_transformers_builder.py @@ -16,6 +16,7 @@ import unittest from sagemaker.serve.builder.model_builder import ModelBuilder from sagemaker.serve.mode.function_pointers import Mode +from tests.unit.sagemaker.serve.constants import MOCK_VPC_CONFIG from sagemaker.serve.utils.predictors import TransformersLocalModePredictor @@ -74,6 +75,7 @@ def test_build_deploy_for_transformers_local_container_and_remote_container( model=mock_model_id, schema_builder=mock_schema_builder, mode=Mode.LOCAL_CONTAINER, + vpc_config=MOCK_VPC_CONFIG, ) builder._prepare_for_mode = MagicMock() @@ -85,6 +87,7 @@ def test_build_deploy_for_transformers_local_container_and_remote_container( builder.modes[str(Mode.LOCAL_CONTAINER)] = MagicMock() predictor = model.deploy(model_data_download_timeout=1800) + assert model.vpc_config == MOCK_VPC_CONFIG assert builder.env_vars["MODEL_LOADING_TIMEOUT"] == "1800" assert isinstance(predictor, TransformersLocalModePredictor) diff --git a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py index 73d24c9505..e8273dd9a1 100644 --- a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py +++ b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py @@ -20,6 +20,7 @@ _construct_url, ) from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException +from sagemaker.serve.utils.types import ImageUriOption from sagemaker.user_agent import SDK_VERSION MOCK_SESSION = Mock() @@ -71,6 +72,7 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): mock_model_builder = ModelBuilderMock() mock_model_builder.serve_settings.telemetry_opt_out = False mock_model_builder.image_uri = MOCK_DJL_CONTAINER + mock_model_builder._is_custom_image_uri = False mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING @@ -85,6 +87,37 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): "&x-modelServer=4" "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118" f"&x-sdkVersion={SDK_VERSION}" + f"&x-defaultImageUsage={ImageUriOption.DEFAULT_IMAGE.value}" + f"&x-modelName={MOCK_HUGGINGFACE_ID}" + f"&x-endpointArn={MOCK_ENDPOINT_ARN}" + f"&x-latency={latency}" + ) + + mock_send_telemetry.assert_called_once_with( + "1", 2, MOCK_SESSION, None, None, expected_extra_str + ) + + @patch("sagemaker.serve.utils.telemetry_logger._send_telemetry") + def test_capture_telemetry_decorator_djl_success_with_custom_image(self, mock_send_telemetry): + mock_model_builder = ModelBuilderMock() + mock_model_builder.serve_settings.telemetry_opt_out = False + mock_model_builder.image_uri = MOCK_DJL_CONTAINER + mock_model_builder._is_custom_image_uri = True + mock_model_builder.model = MOCK_HUGGINGFACE_ID + mock_model_builder.mode = Mode.LOCAL_CONTAINER + mock_model_builder.model_server = ModelServer.DJL_SERVING + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN + + mock_model_builder.mock_deploy() + + args = mock_send_telemetry.call_args.args + latency = str(args[5]).split("latency=")[1] + expected_extra_str = ( + f"{MOCK_FUNC_NAME}" + "&x-modelServer=4" + "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118" + f"&x-sdkVersion={SDK_VERSION}" + f"&x-defaultImageUsage={ImageUriOption.CUSTOM_1P_IMAGE.value}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" f"&x-latency={latency}" @@ -99,6 +132,7 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): mock_model_builder = ModelBuilderMock() mock_model_builder.serve_settings.telemetry_opt_out = False mock_model_builder.image_uri = MOCK_TGI_CONTAINER + mock_model_builder._is_custom_image_uri = False mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.TGI @@ -113,6 +147,7 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): "&x-modelServer=6" "&x-imageTag=huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04" f"&x-sdkVersion={SDK_VERSION}" + f"&x-defaultImageUsage={ImageUriOption.DEFAULT_IMAGE.value}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" f"&x-latency={latency}" @@ -127,6 +162,7 @@ def test_capture_telemetry_decorator_no_call_when_disabled(self, mock_send_telem mock_model_builder = ModelBuilderMock() mock_model_builder.serve_settings.telemetry_opt_out = True mock_model_builder.image_uri = MOCK_DJL_CONTAINER + mock_model_builder._is_custom_image_uri = False mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.model_server = ModelServer.DJL_SERVING @@ -139,6 +175,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te mock_model_builder = ModelBuilderMock() mock_model_builder.serve_settings.telemetry_opt_out = False mock_model_builder.image_uri = MOCK_DJL_CONTAINER + mock_model_builder._is_custom_image_uri = False mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING @@ -158,6 +195,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te "&x-modelServer=4" "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118" f"&x-sdkVersion={SDK_VERSION}" + f"&x-defaultImageUsage={ImageUriOption.DEFAULT_IMAGE.value}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" f"&x-latency={latency}"