Skip to content

Commit 2432696

Browse files
makungaj1Jonathan Makungabenieric
authored and
root
committed
feat: Telemetry metrics (aws#4414)
* Emit additional telemetry metrics * Fix unit tests * Emit endpoint failure to telemetry * Address PR Comments * Emit latency in telemetry * Address PR Comments * Addressed PR Comments * Address PR Comments * Fix tests * Fix integ tests --------- Co-authored-by: Jonathan Makunga <[email protected]> Co-authored-by: Erick Benitez-Ramos <[email protected]>
1 parent af9212a commit 2432696

File tree

4 files changed

+53
-5
lines changed

4 files changed

+53
-5
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,4 @@ env/
3434
**/_repack_script_launcher.sh
3535
tests/data/**/_repack_model.py
3636
tests/data/experiment/sagemaker-dev-1.0.tar.gz
37-
src/sagemaker/serve/tmp_workspace
37+
src/sagemaker/serve/tmp_workspace

src/sagemaker/serve/utils/telemetry_logger.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@
1313
"""Placeholder docstring"""
1414
from __future__ import absolute_import
1515
import logging
16+
from time import perf_counter
17+
1618
import requests
1719

18-
from sagemaker import Session
20+
from sagemaker import Session, exceptions
1921
from sagemaker.serve.mode.function_pointers import Mode
2022
from sagemaker.serve.utils.exceptions import ModelBuilderException
2123
from sagemaker.serve.utils.types import ModelServer
24+
from sagemaker.user_agent import SDK_VERSION
2225

2326
logger = logging.getLogger(__name__)
2427

@@ -63,13 +66,21 @@ def wrapper(self, *args, **kwargs):
6366
f"{func_name}"
6467
f"&x-modelServer={MODEL_SERVER_TO_CODE[str(self.model_server)]}"
6568
f"&x-imageTag={image_uri_tail}"
69+
f"&x-sdkVersion={SDK_VERSION}"
6670
)
6771

6872
if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI:
6973
extra += f"&x-modelName={self.model}"
7074

75+
if self.sagemaker_session and self.sagemaker_session.endpoint_arn:
76+
extra += f"&x-endpointArn={self.sagemaker_session.endpoint_arn}"
77+
78+
start_timer = perf_counter()
7179
try:
7280
response = func(self, *args, **kwargs)
81+
stop_timer = perf_counter()
82+
elapsed = stop_timer - start_timer
83+
extra += f"&x-latency={round(elapsed, 2)}"
7384
if not self.serve_settings.telemetry_opt_out:
7485
_send_telemetry(
7586
"1",
@@ -79,7 +90,15 @@ def wrapper(self, *args, **kwargs):
7990
None,
8091
extra,
8192
)
82-
except ModelBuilderException as e:
93+
except (
94+
ModelBuilderException,
95+
exceptions.CapacityError,
96+
exceptions.UnexpectedStatusException,
97+
exceptions.AsyncInferenceError,
98+
) as e:
99+
stop_timer = perf_counter()
100+
elapsed = stop_timer - start_timer
101+
extra += f"&x-latency={round(elapsed, 2)}"
83102
if not self.serve_settings.telemetry_opt_out:
84103
_send_telemetry(
85104
"0",

src/sagemaker/session.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ def __init__(
243243
# sagemaker_config is validated and initialized inside :func:`_initialize`,
244244
# so if default_bucket is None and the sagemaker_config has a default S3 bucket configured,
245245
# _default_bucket_name_override will be set again inside :func:`_initialize`.
246+
self.endpoint_arn = None
246247
self._default_bucket = None
247248
self._default_bucket_name_override = default_bucket
248249
# this may also be set again inside :func:`_initialize` if it is None
@@ -4284,9 +4285,12 @@ def create_endpoint(self, endpoint_name, config_name, tags=None, wait=True, live
42844285
tags, "{}.{}.{}".format(SAGEMAKER, ENDPOINT, TAGS)
42854286
)
42864287

4287-
self.sagemaker_client.create_endpoint(
4288+
res = self.sagemaker_client.create_endpoint(
42884289
EndpointName=endpoint_name, EndpointConfigName=config_name, Tags=tags
42894290
)
4291+
if res:
4292+
self.endpoint_arn = res["EndpointArn"]
4293+
42904294
if wait:
42914295
self.wait_for_endpoint(endpoint_name, live_logging=live_logging)
42924296
return endpoint_name
@@ -4344,9 +4348,11 @@ def update_endpoint(self, endpoint_name, endpoint_config_name, wait=True):
43444348
"existing endpoint name".format(endpoint_name)
43454349
)
43464350

4347-
self.sagemaker_client.update_endpoint(
4351+
res = self.sagemaker_client.update_endpoint(
43484352
EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
43494353
)
4354+
if res:
4355+
self.endpoint_arn = res["EndpointArn"]
43504356

43514357
if wait:
43524358
self.wait_for_endpoint(endpoint_name)

tests/unit/sagemaker/serve/utils/test_telemetry_logger.py

+23
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
_construct_url,
2121
)
2222
from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException
23+
from sagemaker.user_agent import SDK_VERSION
2324

2425
MOCK_SESSION = Mock()
2526
MOCK_FUNC_NAME = "Mock.deploy"
@@ -32,6 +33,7 @@
3233
)
3334
MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf"
3435
MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex")
36+
MOCK_ENDPOINT_ARN = "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test"
3537

3638

3739
class ModelBuilderMock:
@@ -72,15 +74,22 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry):
7274
mock_model_builder.model = MOCK_HUGGINGFACE_ID
7375
mock_model_builder.mode = Mode.LOCAL_CONTAINER
7476
mock_model_builder.model_server = ModelServer.DJL_SERVING
77+
mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN
7578

7679
mock_model_builder.mock_deploy()
7780

81+
args = mock_send_telemetry.call_args.args
82+
latency = str(args[5]).split("latency=")[1]
7883
expected_extra_str = (
7984
f"{MOCK_FUNC_NAME}"
8085
"&x-modelServer=4"
8186
"&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
87+
f"&x-sdkVersion={SDK_VERSION}"
8288
f"&x-modelName={MOCK_HUGGINGFACE_ID}"
89+
f"&x-endpointArn={MOCK_ENDPOINT_ARN}"
90+
f"&x-latency={latency}"
8391
)
92+
8493
mock_send_telemetry.assert_called_once_with(
8594
"1", 2, MOCK_SESSION, None, None, expected_extra_str
8695
)
@@ -93,15 +102,22 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry):
93102
mock_model_builder.model = MOCK_HUGGINGFACE_ID
94103
mock_model_builder.mode = Mode.LOCAL_CONTAINER
95104
mock_model_builder.model_server = ModelServer.TGI
105+
mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN
96106

97107
mock_model_builder.mock_deploy()
98108

109+
args = mock_send_telemetry.call_args.args
110+
latency = str(args[5]).split("latency=")[1]
99111
expected_extra_str = (
100112
f"{MOCK_FUNC_NAME}"
101113
"&x-modelServer=6"
102114
"&x-imageTag=huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04"
115+
f"&x-sdkVersion={SDK_VERSION}"
103116
f"&x-modelName={MOCK_HUGGINGFACE_ID}"
117+
f"&x-endpointArn={MOCK_ENDPOINT_ARN}"
118+
f"&x-latency={latency}"
104119
)
120+
105121
mock_send_telemetry.assert_called_once_with(
106122
"1", 2, MOCK_SESSION, None, None, expected_extra_str
107123
)
@@ -126,6 +142,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
126142
mock_model_builder.model = MOCK_HUGGINGFACE_ID
127143
mock_model_builder.mode = Mode.LOCAL_CONTAINER
128144
mock_model_builder.model_server = ModelServer.DJL_SERVING
145+
mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN
129146

130147
mock_exception = Mock()
131148
mock_exception_obj = MOCK_EXCEPTION
@@ -134,12 +151,18 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
134151
with self.assertRaises(ModelBuilderException) as _:
135152
mock_model_builder.mock_deploy(mock_exception)
136153

154+
args = mock_send_telemetry.call_args.args
155+
latency = str(args[5]).split("latency=")[1]
137156
expected_extra_str = (
138157
f"{MOCK_FUNC_NAME}"
139158
"&x-modelServer=4"
140159
"&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
160+
f"&x-sdkVersion={SDK_VERSION}"
141161
f"&x-modelName={MOCK_HUGGINGFACE_ID}"
162+
f"&x-endpointArn={MOCK_ENDPOINT_ARN}"
163+
f"&x-latency={latency}"
142164
)
165+
143166
mock_send_telemetry.assert_called_once_with(
144167
"0",
145168
2,

0 commit comments

Comments
 (0)