From bdd6c63f044896453b7b7e4c29852ba881b96cbe Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Tue, 6 Feb 2024 17:10:31 -0800 Subject: [PATCH 01/10] Emit additional telemetry metrics --- .gitignore | 1 + setup.py | 15 +++++++++++++++ src/sagemaker/serve/utils/telemetry_logger.py | 13 +++++++++++-- src/sagemaker/session.py | 2 ++ src/sagemaker/utils.py | 7 +++++++ .../serve/utils/test_telemetry_logger.py | 14 ++++++++++++++ tests/unit/test_utils.py | 4 ++++ 7 files changed, 54 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index b7a858e341..a97b1283b9 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ env/ tests/data/**/_repack_model.py tests/data/experiment/sagemaker-dev-1.0.tar.gz src/sagemaker/serve/tmp_workspace +src/sagemaker/image_uri_config/pysdk_version.json \ No newline at end of file diff --git a/setup.py b/setup.py index b1070319d3..2ead2c9b7f 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,21 @@ def read_version(): return read("VERSION").strip() +def pysdk_version(): + """Persists Sagemaker Python SDK Version in Config""" + content = '{"version": "' + read_version() + '"}' + with open( + os.path.join( + os.path.dirname(__file__), "src", "sagemaker", "image_uri_config", "pysdk_version.json" + ), + "w", + ) as v: + v.write(content) + + +pysdk_version() + + def read_requirements(filename): """Reads requirements file which lists package dependencies. diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index cb57a9f0a7..0a4a33bcab 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -15,10 +15,11 @@ import logging import requests -from sagemaker import Session +from sagemaker import Session, exceptions from sagemaker.serve.mode.function_pointers import Mode from sagemaker.serve.utils.exceptions import ModelBuilderException from sagemaker.serve.utils.types import ModelServer +from sagemaker.utils import pysdk_version logger = logging.getLogger(__name__) @@ -63,11 +64,15 @@ def wrapper(self, *args, **kwargs): f"{func_name}" f"&x-modelServer={MODEL_SERVER_TO_CODE[str(self.model_server)]}" f"&x-imageTag={image_uri_tail}" + f"&x-pySdkVersion={pysdk_version()}" ) if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI: extra += f"&x-modelName={self.model}" + if self.sagemaker_session.endpoint_arn: + extra += f"&x-endpointArn={self.sagemaker_session.endpoint_arn}" + try: response = func(self, *args, **kwargs) if not self.serve_settings.telemetry_opt_out: @@ -79,7 +84,11 @@ def wrapper(self, *args, **kwargs): None, extra, ) - except ModelBuilderException as e: + except ( + ModelBuilderException, + exceptions.CapacityError, + exceptions.UnexpectedStatusException, + ) as e: if not self.serve_settings.telemetry_opt_out: _send_telemetry( "0", diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 0cad3ab0da..eab27cc808 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -243,6 +243,7 @@ def __init__( # sagemaker_config is validated and initialized inside :func:`_initialize`, # so if default_bucket is None and the sagemaker_config has a default S3 bucket configured, # _default_bucket_name_override will be set again inside :func:`_initialize`. + self.endpoint_arn = None self._default_bucket = None self._default_bucket_name_override = default_bucket # this may also be set again inside :func:`_initialize` if it is None @@ -5054,6 +5055,7 @@ def wait_for_endpoint(self, endpoint, poll=DEFAULT_EP_POLL, live_logging=False): poll=EP_LOGGER_POLL, ) status = desc["EndpointStatus"] + self.endpoint_arn = desc["EndpointArn"] if status != "InService": reason = desc.get("FailureReason", None) diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 15a3d128de..b518ea260b 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -1489,3 +1489,10 @@ def format_tags(tags: Tags) -> List[TagsDict]: return [{"Key": str(k), "Value": str(v)} for k, v in tags.items()] return tags + + +def pysdk_version() -> str: + """Returns the current Sagemaker Python SDK Version""" + v_path = os.path.join(os.path.dirname(__file__), "image_uri_config", "pysdk_version.json") + with open(v_path) as v: + return json.load(v).get("version") diff --git a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py index 7c4b014e8a..20787eb31f 100644 --- a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py +++ b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py @@ -20,6 +20,7 @@ _construct_url, ) from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException +from sagemaker.utils import pysdk_version MOCK_SESSION = Mock() MOCK_FUNC_NAME = "Mock.deploy" @@ -32,6 +33,10 @@ ) MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf" MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex") +MOCK_ENDPOINT_ARN = ( + "arn:aws:sagemaker:us-west-2:123456789012:endpoint/huggingface-pytorch-tgi-inference-2024-02-06" + "-04-06-23-819" +) class ModelBuilderMock: @@ -72,6 +77,7 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN mock_model_builder.mock_deploy() @@ -79,7 +85,9 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): f"{MOCK_FUNC_NAME}" "&x-modelServer=4" "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118" + f"&x-pySdkVersion={pysdk_version()}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" + f"&x-endpointArn={MOCK_ENDPOINT_ARN}" ) mock_send_telemetry.assert_called_once_with( "1", 2, MOCK_SESSION, None, None, expected_extra_str @@ -93,6 +101,7 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.TGI + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN mock_model_builder.mock_deploy() @@ -100,7 +109,9 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): f"{MOCK_FUNC_NAME}" "&x-modelServer=6" "&x-imageTag=huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04" + f"&x-pySdkVersion={pysdk_version()}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" + f"&x-endpointArn={MOCK_ENDPOINT_ARN}" ) mock_send_telemetry.assert_called_once_with( "1", 2, MOCK_SESSION, None, None, expected_extra_str @@ -126,6 +137,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN mock_exception = Mock() mock_exception_obj = MOCK_EXCEPTION @@ -138,7 +150,9 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te f"{MOCK_FUNC_NAME}" "&x-modelServer=4" "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118" + f"&x-pySdkVersion={pysdk_version()}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" + f"&x-endpointArn={MOCK_ENDPOINT_ARN}" ) mock_send_telemetry.assert_called_once_with( "0", diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index d733752428..d397801823 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -42,6 +42,7 @@ resolve_nested_dict_value_from_config, update_list_of_dicts_with_values_from_config, volume_size_supported, + pysdk_version, ) from tests.unit.sagemaker.workflow.helpers import CustomStep from sagemaker.workflow.parameters import ParameterString, ParameterInteger @@ -1748,3 +1749,6 @@ def test_instance_family_from_full_instance_type(self): for instance_type, family in instance_type_to_family_test_dict.items(): self.assertEqual(family, get_instance_type_family(instance_type)) + + def test_pysdk_version(self): + self.assertIsNotNone(pysdk_version()) From 641bb9d2d3dea88dc2a9b35618db1f677d503591 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 7 Feb 2024 10:59:41 -0800 Subject: [PATCH 02/10] Fix unit tests --- tests/unit/test_session.py | 79 ++++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 21 deletions(-) diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 43768031b3..b59802c003 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -3192,7 +3192,9 @@ def test_create_model_from_job_with_vpc_config_override(sagemaker_session): def test_endpoint_from_production_variants(sagemaker_session): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -3487,7 +3489,7 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection( sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_CONFIG sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService"} + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -3555,7 +3557,7 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_parti sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_CONFIG sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService"} + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} ) pvs = [ sagemaker.production_variant("A", "ml.g5.xlarge"), @@ -3619,7 +3621,7 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_no_km sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_CONFIG sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService"} + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} ) pvs = [ sagemaker.production_variant("A", "ml.g5.xlarge"), @@ -3726,7 +3728,9 @@ def test_create_endpoint_config_with_explainer_config(sagemaker_session): def test_endpoint_from_production_variants_with_tags(sagemaker_session): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -3757,7 +3761,9 @@ def test_endpoint_from_production_variants_with_combined_sagemaker_config_inject sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_ENDPOINT_CONFIG_COMBINED ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -3801,7 +3807,9 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_tags( sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -3830,7 +3838,9 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_tags( def test_endpoint_from_production_variants_with_accelerator_type(sagemaker_session): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge", accelerator_type=ACCELERATOR_TYPE), sagemaker.production_variant("B", "p299.4096xlarge", accelerator_type=ACCELERATOR_TYPE), @@ -3861,7 +3871,9 @@ def test_endpoint_from_production_variants_with_accelerator_type_sagemaker_confi sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge", accelerator_type=ACCELERATOR_TYPE), sagemaker.production_variant("B", "p299.4096xlarge", accelerator_type=ACCELERATOR_TYPE), @@ -3892,7 +3904,9 @@ def test_endpoint_from_production_variants_with_serverless_inference_config( sagemaker_session, ): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant( "A", "ml.p2.xlarge", serverless_inference_config=SERVERLESS_INFERENCE_CONFIG @@ -3929,7 +3943,9 @@ def test_endpoint_from_production_variants_with_serverless_inference_config_sage sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant( "A", "ml.p2.xlarge", serverless_inference_config=SERVERLESS_INFERENCE_CONFIG @@ -3964,7 +3980,9 @@ def test_endpoint_from_production_variants_with_serverless_inference_config_sage def test_endpoint_from_production_variants_with_async_config(sagemaker_session): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -4000,7 +4018,9 @@ def test_endpoint_from_production_variants_with_async_config_sagemaker_config_in sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -4037,7 +4057,9 @@ def test_endpoint_from_production_variants_with_clarify_explainer_config( sagemaker_session, ): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) + ims.sagemaker_client.describe_endpoint = Mock( + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -4069,7 +4091,7 @@ def test_endpoint_from_production_variants_with_clarify_explainer_config( def test_update_endpoint_succeed(sagemaker_session): sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService"} + return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} ) endpoint_name = "some-endpoint" endpoint_config = "some-endpoint-config" @@ -4079,7 +4101,7 @@ def test_update_endpoint_succeed(sagemaker_session): def test_update_endpoint_no_wait(sagemaker_session): sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "Updating"} + return_value={"EndpointStatus": "Updating", "EndpointArn": "arn:aws:sagemaker:"} ) endpoint_name = "some-endpoint" endpoint_config = "some-endpoint-config" @@ -6136,7 +6158,10 @@ def test_upload_data_default_bucket_and_prefix_combinations( def test_is_inference_component_based_endpoint_affirmative(sagemaker_session): - describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} + describe_endpoint_response = { + "EndpointConfigName": "some-endpoint-config", + "EndpointArn": "arn:aws:sagemaker:", + } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", "ProductionVariants": [{"VariantName": "AllTraffic"}], @@ -6160,7 +6185,10 @@ def test_is_inference_component_based_endpoint_affirmative(sagemaker_session): def test_is_inference_component_based_endpoint_negative_no_role(sagemaker_session): - describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} + describe_endpoint_response = { + "EndpointConfigName": "some-endpoint-config", + "EndpointArn": "arn:aws:sagemaker:", + } describe_endpoint_config_response = { "ProductionVariants": [{"VariantName": "AllTraffic"}], } @@ -6183,7 +6211,10 @@ def test_is_inference_component_based_endpoint_negative_no_role(sagemaker_sessio def test_is_inference_component_based_endpoint_positive_multiple_variants(sagemaker_session): - describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} + describe_endpoint_response = { + "EndpointConfigName": "some-endpoint-config", + "EndpointArn": "arn:aws:sagemaker:", + } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", "ProductionVariants": [{"VariantName": "AllTraffic1"}, {"VariantName": "AllTraffic2"}], @@ -6207,7 +6238,10 @@ def test_is_inference_component_based_endpoint_positive_multiple_variants(sagema def test_is_inference_component_based_endpoint_negative_no_variants(sagemaker_session): - describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} + describe_endpoint_response = { + "EndpointConfigName": "some-endpoint-config", + "EndpointArn": "arn:aws:sagemaker:", + } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", "ProductionVariants": [], @@ -6231,7 +6265,10 @@ def test_is_inference_component_based_endpoint_negative_no_variants(sagemaker_se def test_is_inference_component_based_endpoint_negative_model_name_present(sagemaker_session): - describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} + describe_endpoint_response = { + "EndpointConfigName": "some-endpoint-config", + "EndpointArn": "arn:aws:sagemaker:", + } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", "ProductionVariants": [ From a616b6f8b16b05ed300da07309bc600aee5532ca Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 7 Feb 2024 17:33:48 -0800 Subject: [PATCH 03/10] Emit endpoint failure to telemetry --- setup.py | 15 ---- src/sagemaker/serve/utils/telemetry_logger.py | 17 ++-- src/sagemaker/session.py | 4 +- src/sagemaker/utils.py | 7 -- .../serve/utils/test_telemetry_logger.py | 20 ++--- tests/unit/test_session.py | 90 ++++++++++++++----- tests/unit/test_utils.py | 4 - 7 files changed, 91 insertions(+), 66 deletions(-) diff --git a/setup.py b/setup.py index 2ead2c9b7f..b1070319d3 100644 --- a/setup.py +++ b/setup.py @@ -32,21 +32,6 @@ def read_version(): return read("VERSION").strip() -def pysdk_version(): - """Persists Sagemaker Python SDK Version in Config""" - content = '{"version": "' + read_version() + '"}' - with open( - os.path.join( - os.path.dirname(__file__), "src", "sagemaker", "image_uri_config", "pysdk_version.json" - ), - "w", - ) as v: - v.write(content) - - -pysdk_version() - - def read_requirements(filename): """Reads requirements file which lists package dependencies. diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index 0a4a33bcab..7688332014 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -19,7 +19,7 @@ from sagemaker.serve.mode.function_pointers import Mode from sagemaker.serve.utils.exceptions import ModelBuilderException from sagemaker.serve.utils.types import ModelServer -from sagemaker.utils import pysdk_version +from sagemaker.user_agent import SDK_VERSION logger = logging.getLogger(__name__) @@ -64,23 +64,28 @@ def wrapper(self, *args, **kwargs): f"{func_name}" f"&x-modelServer={MODEL_SERVER_TO_CODE[str(self.model_server)]}" f"&x-imageTag={image_uri_tail}" - f"&x-pySdkVersion={pysdk_version()}" + f"&x-sdkVersion={SDK_VERSION}" ) if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI: extra += f"&x-modelName={self.model}" - if self.sagemaker_session.endpoint_arn: - extra += f"&x-endpointArn={self.sagemaker_session.endpoint_arn}" + status = "1" + failure_reason = None + if self.sagemaker_session.endpoint: + extra += f"&x-endpointArn={self.sagemaker_session.endpoint['EndpointArn']}" + if self.sagemaker_session.endpoint["EndpointStatus"] != "InService": + status = "0" + failure_reason = self.sagemaker_session.endpoint.get("FailureReason", None) try: response = func(self, *args, **kwargs) if not self.serve_settings.telemetry_opt_out: _send_telemetry( - "1", + status, MODE_TO_CODE[str(self.mode)], self.sagemaker_session, - None, + failure_reason, None, extra, ) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index eab27cc808..70bfb7d562 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -243,7 +243,7 @@ def __init__( # sagemaker_config is validated and initialized inside :func:`_initialize`, # so if default_bucket is None and the sagemaker_config has a default S3 bucket configured, # _default_bucket_name_override will be set again inside :func:`_initialize`. - self.endpoint_arn = None + self.endpoint = None self._default_bucket = None self._default_bucket_name_override = default_bucket # this may also be set again inside :func:`_initialize` if it is None @@ -5055,7 +5055,7 @@ def wait_for_endpoint(self, endpoint, poll=DEFAULT_EP_POLL, live_logging=False): poll=EP_LOGGER_POLL, ) status = desc["EndpointStatus"] - self.endpoint_arn = desc["EndpointArn"] + self.endpoint = desc if status != "InService": reason = desc.get("FailureReason", None) diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index b518ea260b..15a3d128de 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -1489,10 +1489,3 @@ def format_tags(tags: Tags) -> List[TagsDict]: return [{"Key": str(k), "Value": str(v)} for k, v in tags.items()] return tags - - -def pysdk_version() -> str: - """Returns the current Sagemaker Python SDK Version""" - v_path = os.path.join(os.path.dirname(__file__), "image_uri_config", "pysdk_version.json") - with open(v_path) as v: - return json.load(v).get("version") diff --git a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py index 20787eb31f..b2d66997c7 100644 --- a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py +++ b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py @@ -20,7 +20,7 @@ _construct_url, ) from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException -from sagemaker.utils import pysdk_version +from sagemaker.user_agent import SDK_VERSION MOCK_SESSION = Mock() MOCK_FUNC_NAME = "Mock.deploy" @@ -33,10 +33,8 @@ ) MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf" MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex") -MOCK_ENDPOINT_ARN = ( - "arn:aws:sagemaker:us-west-2:123456789012:endpoint/huggingface-pytorch-tgi-inference-2024-02-06" - "-04-06-23-819" -) +MOCK_ENDPOINT_ARN = "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test" +MOCK_ENDPOINT = {"EndpointStatus": "InService", "EndpointArn": MOCK_ENDPOINT_ARN} class ModelBuilderMock: @@ -77,7 +75,7 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING - mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN + mock_model_builder.sagemaker_session.endpoint = MOCK_ENDPOINT mock_model_builder.mock_deploy() @@ -85,7 +83,7 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): f"{MOCK_FUNC_NAME}" "&x-modelServer=4" "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118" - f"&x-pySdkVersion={pysdk_version()}" + f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" ) @@ -101,7 +99,7 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.TGI - mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN + mock_model_builder.sagemaker_session.endpoint = MOCK_ENDPOINT mock_model_builder.mock_deploy() @@ -109,7 +107,7 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): f"{MOCK_FUNC_NAME}" "&x-modelServer=6" "&x-imageTag=huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04" - f"&x-pySdkVersion={pysdk_version()}" + f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" ) @@ -137,7 +135,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING - mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN + mock_model_builder.sagemaker_session.endpoint = MOCK_ENDPOINT mock_exception = Mock() mock_exception_obj = MOCK_EXCEPTION @@ -150,7 +148,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te f"{MOCK_FUNC_NAME}" "&x-modelServer=4" "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118" - f"&x-pySdkVersion={pysdk_version()}" + f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" ) diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index b59802c003..13f97287b3 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -3193,7 +3193,10 @@ def test_create_model_from_job_with_vpc_config_override(sagemaker_session): def test_endpoint_from_production_variants(sagemaker_session): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -3489,7 +3492,10 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection( sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_CONFIG sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -3557,7 +3563,10 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_parti sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_CONFIG sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.g5.xlarge"), @@ -3621,7 +3630,10 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_no_km sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_CONFIG sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.g5.xlarge"), @@ -3729,7 +3741,10 @@ def test_create_endpoint_config_with_explainer_config(sagemaker_session): def test_endpoint_from_production_variants_with_tags(sagemaker_session): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -3762,7 +3777,10 @@ def test_endpoint_from_production_variants_with_combined_sagemaker_config_inject ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -3808,7 +3826,10 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_tags( ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -3839,7 +3860,10 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_tags( def test_endpoint_from_production_variants_with_accelerator_type(sagemaker_session): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge", accelerator_type=ACCELERATOR_TYPE), @@ -3872,7 +3896,10 @@ def test_endpoint_from_production_variants_with_accelerator_type_sagemaker_confi ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge", accelerator_type=ACCELERATOR_TYPE), @@ -3905,7 +3932,10 @@ def test_endpoint_from_production_variants_with_serverless_inference_config( ): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant( @@ -3944,7 +3974,10 @@ def test_endpoint_from_production_variants_with_serverless_inference_config_sage ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant( @@ -3981,7 +4014,10 @@ def test_endpoint_from_production_variants_with_serverless_inference_config_sage def test_endpoint_from_production_variants_with_async_config(sagemaker_session): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -4019,7 +4055,10 @@ def test_endpoint_from_production_variants_with_async_config_sagemaker_config_in ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -4058,7 +4097,10 @@ def test_endpoint_from_production_variants_with_clarify_explainer_config( ): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -4091,7 +4133,10 @@ def test_endpoint_from_production_variants_with_clarify_explainer_config( def test_update_endpoint_succeed(sagemaker_session): sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "InService", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "InService", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) endpoint_name = "some-endpoint" endpoint_config = "some-endpoint-config" @@ -4101,7 +4146,10 @@ def test_update_endpoint_succeed(sagemaker_session): def test_update_endpoint_no_wait(sagemaker_session): sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={"EndpointStatus": "Updating", "EndpointArn": "arn:aws:sagemaker:"} + return_value={ + "EndpointStatus": "Updating", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + } ) endpoint_name = "some-endpoint" endpoint_config = "some-endpoint-config" @@ -6160,7 +6208,7 @@ def test_is_inference_component_based_endpoint_affirmative(sagemaker_session): describe_endpoint_response = { "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", @@ -6187,7 +6235,7 @@ def test_is_inference_component_based_endpoint_negative_no_role(sagemaker_sessio describe_endpoint_response = { "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", } describe_endpoint_config_response = { "ProductionVariants": [{"VariantName": "AllTraffic"}], @@ -6213,7 +6261,7 @@ def test_is_inference_component_based_endpoint_positive_multiple_variants(sagema describe_endpoint_response = { "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", @@ -6240,7 +6288,7 @@ def test_is_inference_component_based_endpoint_negative_no_variants(sagemaker_se describe_endpoint_response = { "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", @@ -6267,7 +6315,7 @@ def test_is_inference_component_based_endpoint_negative_model_name_present(sagem describe_endpoint_response = { "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index d397801823..d733752428 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -42,7 +42,6 @@ resolve_nested_dict_value_from_config, update_list_of_dicts_with_values_from_config, volume_size_supported, - pysdk_version, ) from tests.unit.sagemaker.workflow.helpers import CustomStep from sagemaker.workflow.parameters import ParameterString, ParameterInteger @@ -1749,6 +1748,3 @@ def test_instance_family_from_full_instance_type(self): for instance_type, family in instance_type_to_family_test_dict.items(): self.assertEqual(family, get_instance_type_family(instance_type)) - - def test_pysdk_version(self): - self.assertIsNotNone(pysdk_version()) From 0fd648799a4391b5df2b9bce3fa4e60e840a0eca Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Wed, 7 Feb 2024 17:37:09 -0800 Subject: [PATCH 04/10] Address PR Comments --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index a97b1283b9..ad6e488dbd 100644 --- a/.gitignore +++ b/.gitignore @@ -34,5 +34,4 @@ env/ **/_repack_script_launcher.sh tests/data/**/_repack_model.py tests/data/experiment/sagemaker-dev-1.0.tar.gz -src/sagemaker/serve/tmp_workspace -src/sagemaker/image_uri_config/pysdk_version.json \ No newline at end of file +src/sagemaker/serve/tmp_workspace \ No newline at end of file From 7d543a257276b1119bdab82e576bba5fb0a9fd0c Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Thu, 8 Feb 2024 23:30:02 -0800 Subject: [PATCH 05/10] Emit latency in telemetry --- src/sagemaker/serve/utils/telemetry_logger.py | 16 ++++--- .../serve/utils/test_telemetry_logger.py | 47 +++++++++++++------ 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index 7688332014..ed57ea0840 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -13,6 +13,8 @@ """Placeholder docstring""" from __future__ import absolute_import import logging +from time import perf_counter + import requests from sagemaker import Session, exceptions @@ -70,22 +72,20 @@ def wrapper(self, *args, **kwargs): if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI: extra += f"&x-modelName={self.model}" - status = "1" - failure_reason = None if self.sagemaker_session.endpoint: extra += f"&x-endpointArn={self.sagemaker_session.endpoint['EndpointArn']}" - if self.sagemaker_session.endpoint["EndpointStatus"] != "InService": - status = "0" - failure_reason = self.sagemaker_session.endpoint.get("FailureReason", None) + start_timer = perf_counter() try: response = func(self, *args, **kwargs) + stop_timer = perf_counter() + extra += f"&x-latency={stop_timer - start_timer}" if not self.serve_settings.telemetry_opt_out: _send_telemetry( - status, + "1", MODE_TO_CODE[str(self.mode)], self.sagemaker_session, - failure_reason, + None, None, extra, ) @@ -94,6 +94,8 @@ def wrapper(self, *args, **kwargs): exceptions.CapacityError, exceptions.UnexpectedStatusException, ) as e: + stop_timer = perf_counter() + extra += f"&x-latency={stop_timer - start_timer}" if not self.serve_settings.telemetry_opt_out: _send_telemetry( "0", diff --git a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py index b2d66997c7..3834a02ad0 100644 --- a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py +++ b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py @@ -86,10 +86,18 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" + f"&x-latency=" ) - mock_send_telemetry.assert_called_once_with( - "1", 2, MOCK_SESSION, None, None, expected_extra_str - ) + + mock_send_telemetry.assert_called_once() + args = mock_send_telemetry.call_args.args + assert args is not None + assert "1" == args[0] + assert 2 == args[1] + assert MOCK_SESSION == args[2] + assert args[3] is None + assert args[4] is None + assert expected_extra_str in args[5] @patch("sagemaker.serve.utils.telemetry_logger._send_telemetry") def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): @@ -110,10 +118,18 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" + f"&x-latency=" ) - mock_send_telemetry.assert_called_once_with( - "1", 2, MOCK_SESSION, None, None, expected_extra_str - ) + + mock_send_telemetry.assert_called_once() + args = mock_send_telemetry.call_args.args + assert args is not None + assert "1" == args[0] + assert 2 == args[1] + assert MOCK_SESSION == args[2] + assert args[3] is None + assert args[4] is None + assert expected_extra_str in args[5] @patch("sagemaker.serve.utils.telemetry_logger._send_telemetry") def test_capture_telemetry_decorator_no_call_when_disabled(self, mock_send_telemetry): @@ -151,15 +167,18 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" + f"&x-latency=" ) - mock_send_telemetry.assert_called_once_with( - "0", - 2, - MOCK_SESSION, - str(mock_exception_obj), - mock_exception_obj.__class__.__name__, - expected_extra_str, - ) + + mock_send_telemetry.assert_called_once() + args = mock_send_telemetry.call_args.args + assert args is not None + assert "0" == args[0] + assert 2 == args[1] + assert MOCK_SESSION == args[2] + assert str(mock_exception_obj) == args[3] + assert mock_exception_obj.__class__.__name__ == args[4] + assert expected_extra_str in args[5] def test_construct_url_with_failure_reason_and_extra_info(self): mock_accountId = "12345678910" From ab068925cfbeb90a67cbb75c9d9e775887537a0c Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 9 Feb 2024 11:17:46 -0800 Subject: [PATCH 06/10] Address PR Comments --- src/sagemaker/serve/utils/telemetry_logger.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index ed57ea0840..96de36c2fa 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -93,6 +93,7 @@ def wrapper(self, *args, **kwargs): ModelBuilderException, exceptions.CapacityError, exceptions.UnexpectedStatusException, + exceptions.AsyncInferenceError, ) as e: stop_timer = perf_counter() extra += f"&x-latency={stop_timer - start_timer}" From e1615099c2ad0cdb2bd21b170f296d6b20411898 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Mon, 12 Feb 2024 06:30:06 -0800 Subject: [PATCH 07/10] Addressed PR Comments --- src/sagemaker/serve/utils/telemetry_logger.py | 6 +- .../serve/utils/test_telemetry_logger.py | 53 +++--- tests/unit/test_session.py | 155 +++--------------- 3 files changed, 48 insertions(+), 166 deletions(-) diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index 96de36c2fa..e63bb27a2b 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -79,7 +79,8 @@ def wrapper(self, *args, **kwargs): try: response = func(self, *args, **kwargs) stop_timer = perf_counter() - extra += f"&x-latency={stop_timer - start_timer}" + elapsed = stop_timer - start_timer + extra += f"&x-latency={round(elapsed, 2)}" if not self.serve_settings.telemetry_opt_out: _send_telemetry( "1", @@ -96,7 +97,8 @@ def wrapper(self, *args, **kwargs): exceptions.AsyncInferenceError, ) as e: stop_timer = perf_counter() - extra += f"&x-latency={stop_timer - start_timer}" + elapsed = stop_timer - start_timer + extra += f"&x-latency={round(elapsed, 2)}" if not self.serve_settings.telemetry_opt_out: _send_telemetry( "0", diff --git a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py index 3834a02ad0..12e4bcca75 100644 --- a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py +++ b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py @@ -79,6 +79,8 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): mock_model_builder.mock_deploy() + args = mock_send_telemetry.call_args.args + latency = str(args[5]).split("latency=")[1] expected_extra_str = ( f"{MOCK_FUNC_NAME}" "&x-modelServer=4" @@ -86,18 +88,12 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" - f"&x-latency=" + f"&x-latency={latency}" ) - mock_send_telemetry.assert_called_once() - args = mock_send_telemetry.call_args.args - assert args is not None - assert "1" == args[0] - assert 2 == args[1] - assert MOCK_SESSION == args[2] - assert args[3] is None - assert args[4] is None - assert expected_extra_str in args[5] + mock_send_telemetry.assert_called_once_with( + "1", 2, MOCK_SESSION, None, None, expected_extra_str + ) @patch("sagemaker.serve.utils.telemetry_logger._send_telemetry") def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): @@ -111,6 +107,8 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): mock_model_builder.mock_deploy() + args = mock_send_telemetry.call_args.args + latency = str(args[5]).split("latency=")[1] expected_extra_str = ( f"{MOCK_FUNC_NAME}" "&x-modelServer=6" @@ -118,18 +116,12 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" - f"&x-latency=" + f"&x-latency={latency}" ) - mock_send_telemetry.assert_called_once() - args = mock_send_telemetry.call_args.args - assert args is not None - assert "1" == args[0] - assert 2 == args[1] - assert MOCK_SESSION == args[2] - assert args[3] is None - assert args[4] is None - assert expected_extra_str in args[5] + mock_send_telemetry.assert_called_once_with( + "1", 2, MOCK_SESSION, None, None, expected_extra_str + ) @patch("sagemaker.serve.utils.telemetry_logger._send_telemetry") def test_capture_telemetry_decorator_no_call_when_disabled(self, mock_send_telemetry): @@ -160,6 +152,8 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te with self.assertRaises(ModelBuilderException) as _: mock_model_builder.mock_deploy(mock_exception) + args = mock_send_telemetry.call_args.args + latency = str(args[5]).split("latency=")[1] expected_extra_str = ( f"{MOCK_FUNC_NAME}" "&x-modelServer=4" @@ -167,18 +161,17 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" - f"&x-latency=" + f"&x-latency={latency}" ) - mock_send_telemetry.assert_called_once() - args = mock_send_telemetry.call_args.args - assert args is not None - assert "0" == args[0] - assert 2 == args[1] - assert MOCK_SESSION == args[2] - assert str(mock_exception_obj) == args[3] - assert mock_exception_obj.__class__.__name__ == args[4] - assert expected_extra_str in args[5] + mock_send_telemetry.assert_called_once_with( + "0", + 2, + MOCK_SESSION, + str(mock_exception_obj), + mock_exception_obj.__class__.__name__, + expected_extra_str, + ) def test_construct_url_with_failure_reason_and_extra_info(self): mock_accountId = "12345678910" diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 1ec6875dc3..c61d52faee 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -3192,12 +3192,7 @@ def test_create_model_from_job_with_vpc_config_override(sagemaker_session): def test_endpoint_from_production_variants(sagemaker_session): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -3492,10 +3487,7 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection( sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_CONFIG sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } + return_value={"EndpointStatus": "InService"} ) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), @@ -3563,10 +3555,7 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_parti sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_CONFIG sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } + return_value={"EndpointStatus": "InService"} ) pvs = [ sagemaker.production_variant("A", "ml.g5.xlarge"), @@ -3630,10 +3619,7 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_no_km sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_CONFIG sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } + return_value={"EndpointStatus": "InService"} ) pvs = [ sagemaker.production_variant("A", "ml.g5.xlarge"), @@ -3740,12 +3726,7 @@ def test_create_endpoint_config_with_explainer_config(sagemaker_session): def test_endpoint_from_production_variants_with_tags(sagemaker_session): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -3776,12 +3757,7 @@ def test_endpoint_from_production_variants_with_combined_sagemaker_config_inject sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT_ENDPOINT_CONFIG_COMBINED ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -3825,12 +3801,7 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_tags( sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -3859,12 +3830,7 @@ def test_endpoint_from_production_variants_with_sagemaker_config_injection_tags( def test_endpoint_from_production_variants_with_accelerator_type(sagemaker_session): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge", accelerator_type=ACCELERATOR_TYPE), sagemaker.production_variant("B", "p299.4096xlarge", accelerator_type=ACCELERATOR_TYPE), @@ -3895,12 +3861,7 @@ def test_endpoint_from_production_variants_with_accelerator_type_sagemaker_confi sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge", accelerator_type=ACCELERATOR_TYPE), sagemaker.production_variant("B", "p299.4096xlarge", accelerator_type=ACCELERATOR_TYPE), @@ -3931,12 +3892,7 @@ def test_endpoint_from_production_variants_with_serverless_inference_config( sagemaker_session, ): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant( "A", "ml.p2.xlarge", serverless_inference_config=SERVERLESS_INFERENCE_CONFIG @@ -3973,12 +3929,7 @@ def test_endpoint_from_production_variants_with_serverless_inference_config_sage sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant( "A", "ml.p2.xlarge", serverless_inference_config=SERVERLESS_INFERENCE_CONFIG @@ -4013,12 +3964,7 @@ def test_endpoint_from_production_variants_with_serverless_inference_config_sage def test_endpoint_from_production_variants_with_async_config(sagemaker_session): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -4054,12 +4000,7 @@ def test_endpoint_from_production_variants_with_async_config_sagemaker_config_in sagemaker_session.sagemaker_config = SAGEMAKER_CONFIG_ENDPOINT ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -4096,12 +4037,7 @@ def test_endpoint_from_production_variants_with_clarify_explainer_config( sagemaker_session, ): ims = sagemaker_session - ims.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } - ) + ims.sagemaker_client.describe_endpoint = Mock(return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge"), sagemaker.production_variant("B", "p299.4096xlarge"), @@ -4133,10 +4069,7 @@ def test_endpoint_from_production_variants_with_clarify_explainer_config( def test_update_endpoint_succeed(sagemaker_session): sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "InService", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } + return_value={"EndpointStatus": "InService"} ) endpoint_name = "some-endpoint" endpoint_config = "some-endpoint-config" @@ -4146,10 +4079,7 @@ def test_update_endpoint_succeed(sagemaker_session): def test_update_endpoint_no_wait(sagemaker_session): sagemaker_session.sagemaker_client.describe_endpoint = Mock( - return_value={ - "EndpointStatus": "Updating", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } + return_value={"EndpointStatus": "Updating"} ) endpoint_name = "some-endpoint" endpoint_config = "some-endpoint-config" @@ -5331,46 +5261,6 @@ def test_list_feature_groups(sagemaker_session): ) -@pytest.fixture() -def sagemaker_session_with_fs_runtime_client(): - boto_mock = MagicMock(name="boto_session") - sagemaker_session = sagemaker.Session( - boto_session=boto_mock, sagemaker_featurestore_runtime_client=MagicMock() - ) - return sagemaker_session - - -def test_feature_group_put_record(sagemaker_session_with_fs_runtime_client): - sagemaker_session_with_fs_runtime_client.put_record( - feature_group_name="MyFeatureGroup", - record=[{"FeatureName": "feature1", "ValueAsString": "value1"}], - ) - fs_client_mock = sagemaker_session_with_fs_runtime_client.sagemaker_featurestore_runtime_client - - assert fs_client_mock.put_record.called_with( - FeatureGroupName="MyFeatureGroup", - record=[{"FeatureName": "feature1", "ValueAsString": "value1"}], - ) - - -def test_feature_group_put_record_with_ttl_and_target_stores( - sagemaker_session_with_fs_runtime_client, -): - sagemaker_session_with_fs_runtime_client.put_record( - feature_group_name="MyFeatureGroup", - record=[{"FeatureName": "feature1", "ValueAsString": "value1"}], - ttl_duration={"Unit": "Seconds", "Value": 123}, - target_stores=["OnlineStore", "OfflineStore"], - ) - fs_client_mock = sagemaker_session_with_fs_runtime_client.sagemaker_featurestore_runtime_client - assert fs_client_mock.put_record.called_with( - FeatureGroupName="MyFeatureGroup", - record=[{"FeatureName": "feature1", "ValueAsString": "value1"}], - target_stores=["OnlineStore", "OfflineStore"], - ttl_duration={"Unit": "Seconds", "Value": 123}, - ) - - def test_start_query_execution(sagemaker_session): athena_mock = Mock() sagemaker_session.boto_session.client( @@ -6248,7 +6138,7 @@ def test_is_inference_component_based_endpoint_affirmative(sagemaker_session): describe_endpoint_response = { "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + "EndpointArn": "arn:aws:sagemaker:", } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", @@ -6273,10 +6163,7 @@ def test_is_inference_component_based_endpoint_affirmative(sagemaker_session): def test_is_inference_component_based_endpoint_negative_no_role(sagemaker_session): - describe_endpoint_response = { - "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", - } + describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} describe_endpoint_config_response = { "ProductionVariants": [{"VariantName": "AllTraffic"}], } @@ -6301,7 +6188,7 @@ def test_is_inference_component_based_endpoint_positive_multiple_variants(sagema describe_endpoint_response = { "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + "EndpointArn": "arn:aws:sagemaker:", } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", @@ -6328,7 +6215,7 @@ def test_is_inference_component_based_endpoint_negative_no_variants(sagemaker_se describe_endpoint_response = { "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + "EndpointArn": "arn:aws:sagemaker:", } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", @@ -6355,7 +6242,7 @@ def test_is_inference_component_based_endpoint_negative_model_name_present(sagem describe_endpoint_response = { "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test", + "EndpointArn": "arn:aws:sagemaker:", } describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", From 834a5a29c34670d0b0516ae559920f47dba0f336 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Mon, 12 Feb 2024 10:06:51 -0800 Subject: [PATCH 08/10] Address PR Comments --- src/sagemaker/serve/utils/telemetry_logger.py | 4 +- src/sagemaker/session.py | 10 ++-- .../serve/utils/test_telemetry_logger.py | 7 +-- tests/unit/test_session.py | 60 ++++++++++++++----- 4 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index e63bb27a2b..31b5c5e771 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -72,8 +72,8 @@ def wrapper(self, *args, **kwargs): if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI: extra += f"&x-modelName={self.model}" - if self.sagemaker_session.endpoint: - extra += f"&x-endpointArn={self.sagemaker_session.endpoint['EndpointArn']}" + if self.sagemaker_session.endpoint_arn: + extra += f"&x-endpointArn={self.sagemaker_session.endpoint_arn}" start_timer = perf_counter() try: diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 003cad43f9..428b5eed21 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -243,7 +243,7 @@ def __init__( # sagemaker_config is validated and initialized inside :func:`_initialize`, # so if default_bucket is None and the sagemaker_config has a default S3 bucket configured, # _default_bucket_name_override will be set again inside :func:`_initialize`. - self.endpoint = None + self.endpoint_arn = None self._default_bucket = None self._default_bucket_name_override = default_bucket # this may also be set again inside :func:`_initialize` if it is None @@ -4285,9 +4285,11 @@ def create_endpoint(self, endpoint_name, config_name, tags=None, wait=True, live tags, "{}.{}.{}".format(SAGEMAKER, ENDPOINT, TAGS) ) - self.sagemaker_client.create_endpoint( + res = self.sagemaker_client.create_endpoint( EndpointName=endpoint_name, EndpointConfigName=config_name, Tags=tags ) + self.endpoint_arn = res["EndpointArn"] + if wait: self.wait_for_endpoint(endpoint_name, live_logging=live_logging) return endpoint_name @@ -4345,9 +4347,10 @@ def update_endpoint(self, endpoint_name, endpoint_config_name, wait=True): "existing endpoint name".format(endpoint_name) ) - self.sagemaker_client.update_endpoint( + res = self.sagemaker_client.update_endpoint( EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name ) + self.endpoint_arn = res["EndpointArn"] if wait: self.wait_for_endpoint(endpoint_name) @@ -5055,7 +5058,6 @@ def wait_for_endpoint(self, endpoint, poll=DEFAULT_EP_POLL, live_logging=False): poll=EP_LOGGER_POLL, ) status = desc["EndpointStatus"] - self.endpoint = desc if status != "InService": reason = desc.get("FailureReason", None) diff --git a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py index 12e4bcca75..73d24c9505 100644 --- a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py +++ b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py @@ -34,7 +34,6 @@ MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf" MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex") MOCK_ENDPOINT_ARN = "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test" -MOCK_ENDPOINT = {"EndpointStatus": "InService", "EndpointArn": MOCK_ENDPOINT_ARN} class ModelBuilderMock: @@ -75,7 +74,7 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING - mock_model_builder.sagemaker_session.endpoint = MOCK_ENDPOINT + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN mock_model_builder.mock_deploy() @@ -103,7 +102,7 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.TGI - mock_model_builder.sagemaker_session.endpoint = MOCK_ENDPOINT + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN mock_model_builder.mock_deploy() @@ -143,7 +142,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING - mock_model_builder.sagemaker_session.endpoint = MOCK_ENDPOINT + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN mock_exception = Mock() mock_exception_obj = MOCK_EXCEPTION diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index c61d52faee..de543b6f53 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -5261,6 +5261,46 @@ def test_list_feature_groups(sagemaker_session): ) +@pytest.fixture() +def sagemaker_session_with_fs_runtime_client(): + boto_mock = MagicMock(name="boto_session") + sagemaker_session = sagemaker.Session( + boto_session=boto_mock, sagemaker_featurestore_runtime_client=MagicMock() + ) + return sagemaker_session + + +def test_feature_group_put_record(sagemaker_session_with_fs_runtime_client): + sagemaker_session_with_fs_runtime_client.put_record( + feature_group_name="MyFeatureGroup", + record=[{"FeatureName": "feature1", "ValueAsString": "value1"}], + ) + fs_client_mock = sagemaker_session_with_fs_runtime_client.sagemaker_featurestore_runtime_client + + assert fs_client_mock.put_record.called_with( + FeatureGroupName="MyFeatureGroup", + record=[{"FeatureName": "feature1", "ValueAsString": "value1"}], + ) + + +def test_feature_group_put_record_with_ttl_and_target_stores( + sagemaker_session_with_fs_runtime_client, +): + sagemaker_session_with_fs_runtime_client.put_record( + feature_group_name="MyFeatureGroup", + record=[{"FeatureName": "feature1", "ValueAsString": "value1"}], + ttl_duration={"Unit": "Seconds", "Value": 123}, + target_stores=["OnlineStore", "OfflineStore"], + ) + fs_client_mock = sagemaker_session_with_fs_runtime_client.sagemaker_featurestore_runtime_client + assert fs_client_mock.put_record.called_with( + FeatureGroupName="MyFeatureGroup", + record=[{"FeatureName": "feature1", "ValueAsString": "value1"}], + target_stores=["OnlineStore", "OfflineStore"], + ttl_duration={"Unit": "Seconds", "Value": 123}, + ) + + def test_start_query_execution(sagemaker_session): athena_mock = Mock() sagemaker_session.boto_session.client( @@ -6136,10 +6176,7 @@ def test_upload_data_default_bucket_and_prefix_combinations( def test_is_inference_component_based_endpoint_affirmative(sagemaker_session): - describe_endpoint_response = { - "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:", - } + describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", "ProductionVariants": [{"VariantName": "AllTraffic"}], @@ -6186,10 +6223,7 @@ def test_is_inference_component_based_endpoint_negative_no_role(sagemaker_sessio def test_is_inference_component_based_endpoint_positive_multiple_variants(sagemaker_session): - describe_endpoint_response = { - "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:", - } + describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", "ProductionVariants": [{"VariantName": "AllTraffic1"}, {"VariantName": "AllTraffic2"}], @@ -6213,10 +6247,7 @@ def test_is_inference_component_based_endpoint_positive_multiple_variants(sagema def test_is_inference_component_based_endpoint_negative_no_variants(sagemaker_session): - describe_endpoint_response = { - "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:", - } + describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", "ProductionVariants": [], @@ -6240,10 +6271,7 @@ def test_is_inference_component_based_endpoint_negative_no_variants(sagemaker_se def test_is_inference_component_based_endpoint_negative_model_name_present(sagemaker_session): - describe_endpoint_response = { - "EndpointConfigName": "some-endpoint-config", - "EndpointArn": "arn:aws:sagemaker:", - } + describe_endpoint_response = {"EndpointConfigName": "some-endpoint-config"} describe_endpoint_config_response = { "ExecutionRoleArn": "some-role-arn", "ProductionVariants": [ From 30169b375853e46b5fd5c28950ce9a23bd0b2ce6 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Mon, 12 Feb 2024 14:24:41 -0800 Subject: [PATCH 09/10] Fix tests --- src/sagemaker/serve/utils/telemetry_logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index 31b5c5e771..84044c4d96 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -72,7 +72,7 @@ def wrapper(self, *args, **kwargs): if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI: extra += f"&x-modelName={self.model}" - if self.sagemaker_session.endpoint_arn: + if self.sagemaker_session and self.sagemaker_session.endpoint_arn: extra += f"&x-endpointArn={self.sagemaker_session.endpoint_arn}" start_timer = perf_counter() From 15669351f899fdfc6a797543778c3b61795d21b6 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Mon, 12 Feb 2024 17:34:55 -0800 Subject: [PATCH 10/10] Fix integ tests --- src/sagemaker/session.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 428b5eed21..14b2e904e8 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -4288,7 +4288,8 @@ def create_endpoint(self, endpoint_name, config_name, tags=None, wait=True, live res = self.sagemaker_client.create_endpoint( EndpointName=endpoint_name, EndpointConfigName=config_name, Tags=tags ) - self.endpoint_arn = res["EndpointArn"] + if res: + self.endpoint_arn = res["EndpointArn"] if wait: self.wait_for_endpoint(endpoint_name, live_logging=live_logging) @@ -4350,7 +4351,8 @@ def update_endpoint(self, endpoint_name, endpoint_config_name, wait=True): res = self.sagemaker_client.update_endpoint( EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name ) - self.endpoint_arn = res["EndpointArn"] + if res: + self.endpoint_arn = res["EndpointArn"] if wait: self.wait_for_endpoint(endpoint_name)