20
20
_construct_url ,
21
21
)
22
22
from sagemaker .serve .utils .exceptions import ModelBuilderException , LocalModelOutOfMemoryException
23
+ from sagemaker .user_agent import SDK_VERSION
23
24
24
25
MOCK_SESSION = Mock ()
25
26
MOCK_FUNC_NAME = "Mock.deploy"
32
33
)
33
34
MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf"
34
35
MOCK_EXCEPTION = LocalModelOutOfMemoryException ("mock raise ex" )
36
+ MOCK_ENDPOINT_ARN = "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test"
35
37
36
38
37
39
class ModelBuilderMock :
@@ -72,15 +74,22 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry):
72
74
mock_model_builder .model = MOCK_HUGGINGFACE_ID
73
75
mock_model_builder .mode = Mode .LOCAL_CONTAINER
74
76
mock_model_builder .model_server = ModelServer .DJL_SERVING
77
+ mock_model_builder .sagemaker_session .endpoint_arn = MOCK_ENDPOINT_ARN
75
78
76
79
mock_model_builder .mock_deploy ()
77
80
81
+ args = mock_send_telemetry .call_args .args
82
+ latency = str (args [5 ]).split ("latency=" )[1 ]
78
83
expected_extra_str = (
79
84
f"{ MOCK_FUNC_NAME } "
80
85
"&x-modelServer=4"
81
86
"&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
87
+ f"&x-sdkVersion={ SDK_VERSION } "
82
88
f"&x-modelName={ MOCK_HUGGINGFACE_ID } "
89
+ f"&x-endpointArn={ MOCK_ENDPOINT_ARN } "
90
+ f"&x-latency={ latency } "
83
91
)
92
+
84
93
mock_send_telemetry .assert_called_once_with (
85
94
"1" , 2 , MOCK_SESSION , None , None , expected_extra_str
86
95
)
@@ -93,15 +102,22 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry):
93
102
mock_model_builder .model = MOCK_HUGGINGFACE_ID
94
103
mock_model_builder .mode = Mode .LOCAL_CONTAINER
95
104
mock_model_builder .model_server = ModelServer .TGI
105
+ mock_model_builder .sagemaker_session .endpoint_arn = MOCK_ENDPOINT_ARN
96
106
97
107
mock_model_builder .mock_deploy ()
98
108
109
+ args = mock_send_telemetry .call_args .args
110
+ latency = str (args [5 ]).split ("latency=" )[1 ]
99
111
expected_extra_str = (
100
112
f"{ MOCK_FUNC_NAME } "
101
113
"&x-modelServer=6"
102
114
"&x-imageTag=huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04"
115
+ f"&x-sdkVersion={ SDK_VERSION } "
103
116
f"&x-modelName={ MOCK_HUGGINGFACE_ID } "
117
+ f"&x-endpointArn={ MOCK_ENDPOINT_ARN } "
118
+ f"&x-latency={ latency } "
104
119
)
120
+
105
121
mock_send_telemetry .assert_called_once_with (
106
122
"1" , 2 , MOCK_SESSION , None , None , expected_extra_str
107
123
)
@@ -126,6 +142,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
126
142
mock_model_builder .model = MOCK_HUGGINGFACE_ID
127
143
mock_model_builder .mode = Mode .LOCAL_CONTAINER
128
144
mock_model_builder .model_server = ModelServer .DJL_SERVING
145
+ mock_model_builder .sagemaker_session .endpoint_arn = MOCK_ENDPOINT_ARN
129
146
130
147
mock_exception = Mock ()
131
148
mock_exception_obj = MOCK_EXCEPTION
@@ -134,12 +151,18 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
134
151
with self .assertRaises (ModelBuilderException ) as _ :
135
152
mock_model_builder .mock_deploy (mock_exception )
136
153
154
+ args = mock_send_telemetry .call_args .args
155
+ latency = str (args [5 ]).split ("latency=" )[1 ]
137
156
expected_extra_str = (
138
157
f"{ MOCK_FUNC_NAME } "
139
158
"&x-modelServer=4"
140
159
"&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
160
+ f"&x-sdkVersion={ SDK_VERSION } "
141
161
f"&x-modelName={ MOCK_HUGGINGFACE_ID } "
162
+ f"&x-endpointArn={ MOCK_ENDPOINT_ARN } "
163
+ f"&x-latency={ latency } "
142
164
)
165
+
143
166
mock_send_telemetry .assert_called_once_with (
144
167
"0" ,
145
168
2 ,
0 commit comments