Skip to content

Commit ab52225

Browse files
authored
fix: account for EI and version-based ECR repo naming in serving_image_uri() (aws#1273)
1 parent 41f80a2 commit ab52225

File tree

5 files changed

+81
-71
lines changed

5 files changed

+81
-71
lines changed

src/sagemaker/mxnet/model.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
import packaging.version
1919

20-
from sagemaker import fw_utils
21-
2220
import sagemaker
2321
from sagemaker.fw_utils import (
2422
create_image_uri,
@@ -143,29 +141,15 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
143141
dict[str, str]: A container definition object usable with the
144142
CreateModel API.
145143
"""
146-
is_mms_version = packaging.version.Version(
147-
self.framework_version
148-
) >= packaging.version.Version(self._LOWEST_MMS_VERSION)
149-
150144
deploy_image = self.image
151145
if not deploy_image:
152146
region_name = self.sagemaker_session.boto_session.region_name
153-
154-
framework_name = self.__framework_name__
155-
if is_mms_version:
156-
framework_name += "-serving"
157-
158-
deploy_image = create_image_uri(
159-
region_name,
160-
framework_name,
161-
instance_type,
162-
self.framework_version,
163-
self.py_version,
164-
accelerator_type=accelerator_type,
147+
deploy_image = self.serving_image_uri(
148+
region_name, instance_type, accelerator_type=accelerator_type
165149
)
166150

167151
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
168-
self._upload_code(deploy_key_prefix, is_mms_version)
152+
self._upload_code(deploy_key_prefix, self._is_mms_version())
169153
deploy_env = dict(self.env)
170154
deploy_env.update(self._framework_env_vars())
171155

@@ -175,22 +159,41 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
175159
deploy_image, self.repacked_model_data or self.model_data, deploy_env
176160
)
177161

178-
def serving_image_uri(self, region_name, instance_type):
162+
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
179163
"""Create a URI for the serving image.
180164
181165
Args:
182166
region_name (str): AWS region where the image is uploaded.
183167
instance_type (str): SageMaker instance type. Used to determine device type
184168
(cpu/gpu/family-specific optimized).
169+
accelerator_type (str): The Elastic Inference accelerator type to
170+
deploy to the instance for loading and making inferences to the
171+
model (default: None). For example, 'ml.eia1.medium'.
185172
186173
Returns:
187174
str: The appropriate image URI based on the given parameters.
188175
189176
"""
190-
return fw_utils.create_image_uri(
177+
framework_name = self.__framework_name__
178+
if self._is_mms_version():
179+
framework_name = "{}-serving".format(framework_name)
180+
181+
return create_image_uri(
191182
region_name,
192-
"-".join([self.__framework_name__, "serving"]),
183+
framework_name,
193184
instance_type,
194185
self.framework_version,
195186
self.py_version,
187+
accelerator_type=accelerator_type,
196188
)
189+
190+
def _is_mms_version(self):
191+
"""Whether the framework version corresponds to an inference image using
192+
the Multi-Model Server (https://github.com/awslabs/multi-model-server).
193+
194+
Returns:
195+
bool: If the framework version corresponds to an image using MMS.
196+
"""
197+
lowest_mms_version = packaging.version.Version(self._LOWEST_MMS_VERSION)
198+
framework_version = packaging.version.Version(self.framework_version)
199+
return framework_version >= lowest_mms_version

src/sagemaker/pytorch/model.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
import logging
1717
import packaging.version
18-
from sagemaker import fw_utils
1918

2019
import sagemaker
2120
from sagemaker.fw_utils import (
@@ -137,34 +136,21 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
137136
For example, 'ml.p2.xlarge'.
138137
accelerator_type (str): The Elastic Inference accelerator type to
139138
deploy to the instance for loading and making inferences to the
140-
model. For example, 'ml.eia1.medium'.
139+
model. Currently unsupported with PyTorch.
141140
142141
Returns:
143142
dict[str, str]: A container definition object usable with the
144143
CreateModel API.
145144
"""
146-
lowest_mms_version = packaging.version.Version(self._LOWEST_MMS_VERSION)
147-
framework_version = packaging.version.Version(self.framework_version)
148-
is_mms_version = framework_version >= lowest_mms_version
149-
150145
deploy_image = self.image
151146
if not deploy_image:
152147
region_name = self.sagemaker_session.boto_session.region_name
153-
154-
framework_name = self.__framework_name__
155-
if is_mms_version:
156-
framework_name += "-serving"
157-
158-
deploy_image = create_image_uri(
159-
region_name,
160-
framework_name,
161-
instance_type,
162-
self.framework_version,
163-
self.py_version,
164-
accelerator_type=accelerator_type,
148+
deploy_image = self.serving_image_uri(
149+
region_name, instance_type, accelerator_type=accelerator_type
165150
)
151+
166152
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
167-
self._upload_code(deploy_key_prefix, repack=is_mms_version)
153+
self._upload_code(deploy_key_prefix, repack=self._is_mms_version())
168154
deploy_env = dict(self.env)
169155
deploy_env.update(self._framework_env_vars())
170156

@@ -174,22 +160,41 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
174160
deploy_image, self.repacked_model_data or self.model_data, deploy_env
175161
)
176162

177-
def serving_image_uri(self, region_name, instance_type):
163+
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
178164
"""Create a URI for the serving image.
179165
180166
Args:
181167
region_name (str): AWS region where the image is uploaded.
182168
instance_type (str): SageMaker instance type. Used to determine device type
183169
(cpu/gpu/family-specific optimized).
170+
accelerator_type (str): The Elastic Inference accelerator type to
171+
deploy to the instance for loading and making inferences to the
172+
model. Currently unsupported with PyTorch.
184173
185174
Returns:
186175
str: The appropriate image URI based on the given parameters.
187176
188177
"""
189-
return fw_utils.create_image_uri(
178+
framework_name = self.__framework_name__
179+
if self._is_mms_version():
180+
framework_name = "{}-serving".format(framework_name)
181+
182+
return create_image_uri(
190183
region_name,
191-
"-".join([self.__framework_name__, "serving"]),
184+
framework_name,
192185
instance_type,
193186
self.framework_version,
194187
self.py_version,
188+
accelerator_type=accelerator_type,
195189
)
190+
191+
def _is_mms_version(self):
192+
"""Whether the framework version corresponds to an inference image using
193+
the Multi-Model Server (https://github.com/awslabs/multi-model-server).
194+
195+
Returns:
196+
bool: If the framework version corresponds to an image using MMS.
197+
"""
198+
lowest_mms_version = packaging.version.Version(self._LOWEST_MMS_VERSION)
199+
framework_version = packaging.version.Version(self.framework_version)
200+
return framework_version >= lowest_mms_version

src/sagemaker/tensorflow/model.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515

1616
import logging
1717

18-
from sagemaker import fw_utils
19-
2018
import sagemaker
2119
from sagemaker.fw_utils import (
2220
create_image_uri,
@@ -146,13 +144,8 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
146144
deploy_image = self.image
147145
if not deploy_image:
148146
region_name = self.sagemaker_session.boto_region_name
149-
deploy_image = create_image_uri(
150-
region_name,
151-
self.__framework_name__,
152-
instance_type,
153-
self.framework_version,
154-
self.py_version,
155-
accelerator_type=accelerator_type,
147+
deploy_image = self.serving_image_uri(
148+
region_name, instance_type, accelerator_type=accelerator_type
156149
)
157150

158151
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
@@ -165,22 +158,26 @@ def prepare_container_def(self, instance_type, accelerator_type=None):
165158

166159
return sagemaker.container_def(deploy_image, self.model_data, deploy_env)
167160

168-
def serving_image_uri(self, region_name, instance_type):
161+
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
169162
"""Create a URI for the serving image.
170163
171164
Args:
172165
region_name (str): AWS region where the image is uploaded.
173166
instance_type (str): SageMaker instance type. Used to determine device type
174167
(cpu/gpu/family-specific optimized).
168+
accelerator_type (str): The Elastic Inference accelerator type to
169+
deploy to the instance for loading and making inferences to the
170+
model (default: None). For example, 'ml.eia1.medium'.
175171
176172
Returns:
177173
str: The appropriate image URI based on the given parameters.
178174
179175
"""
180-
return fw_utils.create_image_uri(
176+
return create_image_uri(
181177
region_name,
182-
"-".join([self.__framework_name__, "serving"]),
178+
self.__framework_name__,
183179
instance_type,
184180
self.framework_version,
185181
self.py_version,
182+
accelerator_type=accelerator_type,
186183
)

src/sagemaker/tensorflow/serving.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,16 +276,21 @@ def _get_image_uri(self, instance_type, accelerator_type=None):
276276
accelerator_type=accelerator_type,
277277
)
278278

279-
def serving_image_uri(self, region_name, instance_type): # pylint: disable=unused-argument
279+
def serving_image_uri(
280+
self, region_name, instance_type, accelerator_type=None
281+
): # pylint: disable=unused-argument
280282
"""Create a URI for the serving image.
281283
282284
Args:
283285
region_name (str): AWS region where the image is uploaded.
284286
instance_type (str): SageMaker instance type. Used to determine device type
285287
(cpu/gpu/family-specific optimized).
288+
accelerator_type (str): The Elastic Inference accelerator type to
289+
deploy to the instance for loading and making inferences to the
290+
model (default: None). For example, 'ml.eia1.medium'.
286291
287292
Returns:
288293
str: The appropriate image URI based on the given parameters.
289294
290295
"""
291-
return self._get_image_uri(instance_type=instance_type)
296+
return self._get_image_uri(instance_type=instance_type, accelerator_type=accelerator_type)

tests/unit/test_airflow.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ def test_model_config_from_framework_estimator(sagemaker_session):
10301030
entry_point="{{ entry_point }}",
10311031
source_dir="{{ source_dir }}",
10321032
py_version="py3",
1033-
framework_version="1.3.0",
1033+
framework_version="1.6.0",
10341034
role="{{ role }}",
10351035
train_instance_count=1,
10361036
train_instance_type="ml.m4.xlarge",
@@ -1051,9 +1051,9 @@ def test_model_config_from_framework_estimator(sagemaker_session):
10511051
task_type="training",
10521052
)
10531053
expected_config = {
1054-
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
1054+
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
10551055
"PrimaryContainer": {
1056-
"Image": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-serving:1.3.0-cpu-py3",
1056+
"Image": "763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.6.0-cpu-py3",
10571057
"Environment": {
10581058
"SAGEMAKER_PROGRAM": "{{ entry_point }}",
10591059
"SAGEMAKER_SUBMIT_DIRECTORY": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']"
@@ -1184,7 +1184,7 @@ def test_transform_config_from_framework_estimator(sagemaker_session):
11841184
entry_point="{{ entry_point }}",
11851185
source_dir="{{ source_dir }}",
11861186
py_version="py3",
1187-
framework_version="1.3.0",
1187+
framework_version="1.6.0",
11881188
role="{{ role }}",
11891189
train_instance_count=1,
11901190
train_instance_type="ml.m4.xlarge",
@@ -1209,9 +1209,9 @@ def test_transform_config_from_framework_estimator(sagemaker_session):
12091209
)
12101210
expected_config = {
12111211
"Model": {
1212-
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
1212+
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
12131213
"PrimaryContainer": {
1214-
"Image": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-serving:1.3.0-gpu-py3",
1214+
"Image": "763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.6.0-gpu-py3",
12151215
"Environment": {
12161216
"SAGEMAKER_PROGRAM": "{{ entry_point }}",
12171217
"SAGEMAKER_SUBMIT_DIRECTORY": "s3://output/{{ ti.xcom_pull(task_ids='task_id')"
@@ -1228,7 +1228,7 @@ def test_transform_config_from_framework_estimator(sagemaker_session):
12281228
},
12291229
"Transform": {
12301230
"TransformJobName": "{{ base_job_name }}-%s" % TIME_STAMP,
1231-
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
1231+
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
12321232
"TransformInput": {
12331233
"DataSource": {
12341234
"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": "{{ transform_data }}"}
@@ -1425,7 +1425,7 @@ def test_deploy_config_from_framework_estimator(sagemaker_session):
14251425
entry_point="{{ entry_point }}",
14261426
source_dir="{{ source_dir }}",
14271427
py_version="py3",
1428-
framework_version="1.3.0",
1428+
framework_version="1.6.0",
14291429
role="{{ role }}",
14301430
train_instance_count=1,
14311431
train_instance_type="ml.m4.xlarge",
@@ -1449,9 +1449,9 @@ def test_deploy_config_from_framework_estimator(sagemaker_session):
14491449
)
14501450
expected_config = {
14511451
"Model": {
1452-
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
1452+
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
14531453
"PrimaryContainer": {
1454-
"Image": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-serving:1.3.0-cpu-py3",
1454+
"Image": "763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.6.0-cpu-py3",
14551455
"Environment": {
14561456
"SAGEMAKER_PROGRAM": "{{ entry_point }}",
14571457
"SAGEMAKER_SUBMIT_DIRECTORY": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']"
@@ -1466,20 +1466,20 @@ def test_deploy_config_from_framework_estimator(sagemaker_session):
14661466
"ExecutionRoleArn": "{{ role }}",
14671467
},
14681468
"EndpointConfig": {
1469-
"EndpointConfigName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
1469+
"EndpointConfigName": "mxnet-inference-%s" % TIME_STAMP,
14701470
"ProductionVariants": [
14711471
{
14721472
"InstanceType": "ml.c4.large",
14731473
"InitialInstanceCount": "{{ instance_count}}",
1474-
"ModelName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
1474+
"ModelName": "mxnet-inference-%s" % TIME_STAMP,
14751475
"VariantName": "AllTraffic",
14761476
"InitialVariantWeight": 1,
14771477
}
14781478
],
14791479
},
14801480
"Endpoint": {
14811481
"EndpointName": "mxnet-endpoint",
1482-
"EndpointConfigName": "sagemaker-mxnet-serving-%s" % TIME_STAMP,
1482+
"EndpointConfigName": "mxnet-inference-%s" % TIME_STAMP,
14831483
},
14841484
}
14851485

0 commit comments

Comments
 (0)