Skip to content

Commit bdd6c63

Browse files
author
Jonathan Makunga
committed
Emit additional telemetry metrics
1 parent 0bbc884 commit bdd6c63

File tree

7 files changed

+54
-2
lines changed

7 files changed

+54
-2
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,4 @@ env/
3535
tests/data/**/_repack_model.py
3636
tests/data/experiment/sagemaker-dev-1.0.tar.gz
3737
src/sagemaker/serve/tmp_workspace
38+
src/sagemaker/image_uri_config/pysdk_version.json

setup.py

+15
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,21 @@ def read_version():
3232
return read("VERSION").strip()
3333

3434

35+
def pysdk_version():
36+
"""Persists Sagemaker Python SDK Version in Config"""
37+
content = '{"version": "' + read_version() + '"}'
38+
with open(
39+
os.path.join(
40+
os.path.dirname(__file__), "src", "sagemaker", "image_uri_config", "pysdk_version.json"
41+
),
42+
"w",
43+
) as v:
44+
v.write(content)
45+
46+
47+
pysdk_version()
48+
49+
3550
def read_requirements(filename):
3651
"""Reads requirements file which lists package dependencies.
3752

src/sagemaker/serve/utils/telemetry_logger.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515
import logging
1616
import requests
1717

18-
from sagemaker import Session
18+
from sagemaker import Session, exceptions
1919
from sagemaker.serve.mode.function_pointers import Mode
2020
from sagemaker.serve.utils.exceptions import ModelBuilderException
2121
from sagemaker.serve.utils.types import ModelServer
22+
from sagemaker.utils import pysdk_version
2223

2324
logger = logging.getLogger(__name__)
2425

@@ -63,11 +64,15 @@ def wrapper(self, *args, **kwargs):
6364
f"{func_name}"
6465
f"&x-modelServer={MODEL_SERVER_TO_CODE[str(self.model_server)]}"
6566
f"&x-imageTag={image_uri_tail}"
67+
f"&x-pySdkVersion={pysdk_version()}"
6668
)
6769

6870
if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI:
6971
extra += f"&x-modelName={self.model}"
7072

73+
if self.sagemaker_session.endpoint_arn:
74+
extra += f"&x-endpointArn={self.sagemaker_session.endpoint_arn}"
75+
7176
try:
7277
response = func(self, *args, **kwargs)
7378
if not self.serve_settings.telemetry_opt_out:
@@ -79,7 +84,11 @@ def wrapper(self, *args, **kwargs):
7984
None,
8085
extra,
8186
)
82-
except ModelBuilderException as e:
87+
except (
88+
ModelBuilderException,
89+
exceptions.CapacityError,
90+
exceptions.UnexpectedStatusException,
91+
) as e:
8392
if not self.serve_settings.telemetry_opt_out:
8493
_send_telemetry(
8594
"0",

src/sagemaker/session.py

+2
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ def __init__(
243243
# sagemaker_config is validated and initialized inside :func:`_initialize`,
244244
# so if default_bucket is None and the sagemaker_config has a default S3 bucket configured,
245245
# _default_bucket_name_override will be set again inside :func:`_initialize`.
246+
self.endpoint_arn = None
246247
self._default_bucket = None
247248
self._default_bucket_name_override = default_bucket
248249
# this may also be set again inside :func:`_initialize` if it is None
@@ -5054,6 +5055,7 @@ def wait_for_endpoint(self, endpoint, poll=DEFAULT_EP_POLL, live_logging=False):
50545055
poll=EP_LOGGER_POLL,
50555056
)
50565057
status = desc["EndpointStatus"]
5058+
self.endpoint_arn = desc["EndpointArn"]
50575059

50585060
if status != "InService":
50595061
reason = desc.get("FailureReason", None)

src/sagemaker/utils.py

+7
Original file line numberDiff line numberDiff line change
@@ -1489,3 +1489,10 @@ def format_tags(tags: Tags) -> List[TagsDict]:
14891489
return [{"Key": str(k), "Value": str(v)} for k, v in tags.items()]
14901490

14911491
return tags
1492+
1493+
1494+
def pysdk_version() -> str:
1495+
"""Returns the current Sagemaker Python SDK Version"""
1496+
v_path = os.path.join(os.path.dirname(__file__), "image_uri_config", "pysdk_version.json")
1497+
with open(v_path) as v:
1498+
return json.load(v).get("version")

tests/unit/sagemaker/serve/utils/test_telemetry_logger.py

+14
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
_construct_url,
2121
)
2222
from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException
23+
from sagemaker.utils import pysdk_version
2324

2425
MOCK_SESSION = Mock()
2526
MOCK_FUNC_NAME = "Mock.deploy"
@@ -32,6 +33,10 @@
3233
)
3334
MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf"
3435
MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex")
36+
MOCK_ENDPOINT_ARN = (
37+
"arn:aws:sagemaker:us-west-2:123456789012:endpoint/huggingface-pytorch-tgi-inference-2024-02-06"
38+
"-04-06-23-819"
39+
)
3540

3641

3742
class ModelBuilderMock:
@@ -72,14 +77,17 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry):
7277
mock_model_builder.model = MOCK_HUGGINGFACE_ID
7378
mock_model_builder.mode = Mode.LOCAL_CONTAINER
7479
mock_model_builder.model_server = ModelServer.DJL_SERVING
80+
mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN
7581

7682
mock_model_builder.mock_deploy()
7783

7884
expected_extra_str = (
7985
f"{MOCK_FUNC_NAME}"
8086
"&x-modelServer=4"
8187
"&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
88+
f"&x-pySdkVersion={pysdk_version()}"
8289
f"&x-modelName={MOCK_HUGGINGFACE_ID}"
90+
f"&x-endpointArn={MOCK_ENDPOINT_ARN}"
8391
)
8492
mock_send_telemetry.assert_called_once_with(
8593
"1", 2, MOCK_SESSION, None, None, expected_extra_str
@@ -93,14 +101,17 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry):
93101
mock_model_builder.model = MOCK_HUGGINGFACE_ID
94102
mock_model_builder.mode = Mode.LOCAL_CONTAINER
95103
mock_model_builder.model_server = ModelServer.TGI
104+
mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN
96105

97106
mock_model_builder.mock_deploy()
98107

99108
expected_extra_str = (
100109
f"{MOCK_FUNC_NAME}"
101110
"&x-modelServer=6"
102111
"&x-imageTag=huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04"
112+
f"&x-pySdkVersion={pysdk_version()}"
103113
f"&x-modelName={MOCK_HUGGINGFACE_ID}"
114+
f"&x-endpointArn={MOCK_ENDPOINT_ARN}"
104115
)
105116
mock_send_telemetry.assert_called_once_with(
106117
"1", 2, MOCK_SESSION, None, None, expected_extra_str
@@ -126,6 +137,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
126137
mock_model_builder.model = MOCK_HUGGINGFACE_ID
127138
mock_model_builder.mode = Mode.LOCAL_CONTAINER
128139
mock_model_builder.model_server = ModelServer.DJL_SERVING
140+
mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN
129141

130142
mock_exception = Mock()
131143
mock_exception_obj = MOCK_EXCEPTION
@@ -138,7 +150,9 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
138150
f"{MOCK_FUNC_NAME}"
139151
"&x-modelServer=4"
140152
"&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
153+
f"&x-pySdkVersion={pysdk_version()}"
141154
f"&x-modelName={MOCK_HUGGINGFACE_ID}"
155+
f"&x-endpointArn={MOCK_ENDPOINT_ARN}"
142156
)
143157
mock_send_telemetry.assert_called_once_with(
144158
"0",

tests/unit/test_utils.py

+4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
resolve_nested_dict_value_from_config,
4343
update_list_of_dicts_with_values_from_config,
4444
volume_size_supported,
45+
pysdk_version,
4546
)
4647
from tests.unit.sagemaker.workflow.helpers import CustomStep
4748
from sagemaker.workflow.parameters import ParameterString, ParameterInteger
@@ -1748,3 +1749,6 @@ def test_instance_family_from_full_instance_type(self):
17481749

17491750
for instance_type, family in instance_type_to_family_test_dict.items():
17501751
self.assertEqual(family, get_instance_type_family(instance_type))
1752+
1753+
def test_pysdk_version(self):
1754+
self.assertIsNotNone(pysdk_version())

0 commit comments

Comments
 (0)