Skip to content

Commit 338fd76

Browse files
authored
feature: add serverless inference image_uri retrieve support (#3035)
1 parent 34a0381 commit 338fd76

29 files changed

+859
-113
lines changed

doc/overview.rst

+8-9
Original file line numberDiff line numberDiff line change
@@ -1226,28 +1226,28 @@ to configure or manage the underlying infrastructure. After you trained a model,
12261226
Serverless endpoint and then invoke the endpoint with the model to get inference results back. More information about
12271227
SageMaker Serverless Inference can be found in the `AWS documentation <https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html>`__.
12281228

1229-
For using SageMaker Serverless Inference, if you plan to use any of the SageMaker-provided container or Bring Your Own Container
1230-
model, you will need to pass ``image_uri``. An example to use ``image_uri`` for creating MXNet model:
1229+
For using SageMaker Serverless Inference, you can either use SageMaker-provided container or Bring Your Own Container model.
1230+
A step by step example for using Serverless Inference with MXNet image :
1231+
1232+
Firstly, create MXNet model
12311233

12321234
.. code:: python
12331235
12341236
from sagemaker.mxnet import MXNetModel
1237+
from sagemaker.serverless import ServerlessInferenceConfig
12351238
import sagemaker
12361239
12371240
role = sagemaker.get_execution_role()
12381241
12391242
# create MXNet Model Class
1240-
mxnet_model = MXNetModel(
1243+
model = MXNetModel(
12411244
model_data="s3://my_bucket/pretrained_model/model.tar.gz", # path to your trained sagemaker model
12421245
role=role, # iam role with permissions to create an Endpoint
12431246
entry_point="inference.py",
1244-
image_uri="763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.4.1-cpu-py3" # image wanted to use
1247+
py_version="py3", # Python version
1248+
framework_version="1.6.0", # MXNet framework version
12451249
)
12461250
1247-
For more Amazon SageMaker provided algorithms and containers image paths, please check this page: `Amazon SageMaker provided
1248-
algorithms and Deep Learning Containers <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
1249-
After creating model using ``image_uri``, you can then follow the steps below to create serverless endpoint.
1250-
12511251
To deploy serverless endpoint, you will need to create a ``ServerlessInferenceConfig``.
12521252
If you create ``ServerlessInferenceConfig`` without specifying its arguments, the default ``MemorySizeInMB`` will be **2048** and
12531253
the default ``MaxConcurrency`` will be **5** :
@@ -1283,7 +1283,6 @@ Or directly using model's ``deploy()`` method to deploy a serverless endpoint:
12831283
# Deploys the model to a SageMaker serverless endpoint
12841284
serverless_predictor = model.deploy(serverless_inference_config=serverless_config)
12851285
1286-
12871286
After deployment is complete, you can use predictor's ``predict()`` method to invoke the serverless endpoint just like
12881287
real-time endpoints:
12891288

src/sagemaker/chainer/model.py

+22-11
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,10 @@ def __init__(
9999
file which should be executed as the entry point to model
100100
hosting. If ``source_dir`` is specified, then ``entry_point``
101101
must point to a file located at the root of ``source_dir``.
102-
image_uri (str): A Docker image URI (default: None). In serverless
103-
inferece, it is required. More image information can be found in
104-
`Amazon SageMaker provided algorithms and Deep Learning Containers
105-
<https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
106-
In instance based inference, if not specified, a default image for
107-
Chainer will be used. If ``framework_version`` or ``py_version``
108-
are ``None``, then ``image_uri`` is required. If also ``None``,
102+
image_uri (str): A Docker image URI (default: None). If not specified,
103+
a default image for Chainer will be used.
104+
If ``framework_version`` or ``py_version``
105+
are ``None``, then ``image_uri`` is required. If ``image_uri`` is also ``None``,
109106
then a ``ValueError`` will be raised.
110107
framework_version (str): Chainer version you want to use for
111108
executing your model training code. Defaults to ``None``. Required
@@ -143,7 +140,9 @@ def __init__(
143140

144141
self.model_server_workers = model_server_workers
145142

146-
def prepare_container_def(self, instance_type=None, accelerator_type=None):
143+
def prepare_container_def(
144+
self, instance_type=None, accelerator_type=None, serverless_inference_config=None
145+
):
147146
"""Return a container definition with framework configuration set in model environment.
148147
149148
Args:
@@ -152,21 +151,27 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
152151
accelerator_type (str): The Elastic Inference accelerator type to
153152
deploy to the instance for loading and making inferences to the
154153
model. For example, 'ml.eia1.medium'.
154+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
155+
Specifies configuration related to serverless endpoint. Instance type is
156+
not provided in serverless inference. So this is used to find image URIs.
155157
156158
Returns:
157159
dict[str, str]: A container definition object usable with the
158160
CreateModel API.
159161
"""
160162
deploy_image = self.image_uri
161163
if not deploy_image:
162-
if instance_type is None:
164+
if instance_type is None and serverless_inference_config is None:
163165
raise ValueError(
164166
"Must supply either an instance type (for choosing CPU vs GPU) or an image URI."
165167
)
166168

167169
region_name = self.sagemaker_session.boto_session.region_name
168170
deploy_image = self.serving_image_uri(
169-
region_name, instance_type, accelerator_type=accelerator_type
171+
region_name,
172+
instance_type,
173+
accelerator_type=accelerator_type,
174+
serverless_inference_config=serverless_inference_config,
170175
)
171176

172177
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
@@ -178,13 +183,18 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
178183
deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers)
179184
return sagemaker.container_def(deploy_image, self.model_data, deploy_env)
180185

181-
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
186+
def serving_image_uri(
187+
self, region_name, instance_type, accelerator_type=None, serverless_inference_config=None
188+
):
182189
"""Create a URI for the serving image.
183190
184191
Args:
185192
region_name (str): AWS region where the image is uploaded.
186193
instance_type (str): SageMaker instance type. Used to determine device type
187194
(cpu/gpu/family-specific optimized).
195+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
196+
Specifies configuration related to serverless endpoint. Instance type is
197+
not provided in serverless inference. So this is used to determine device type.
188198
189199
Returns:
190200
str: The appropriate image URI based on the given parameters.
@@ -198,4 +208,5 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
198208
instance_type=instance_type,
199209
accelerator_type=accelerator_type,
200210
image_scope="inference",
211+
serverless_inference_config=serverless_inference_config,
201212
)

src/sagemaker/huggingface/model.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,7 @@ def __init__(
133133
py_version (str): Python version you want to use for executing your
134134
model training code. Defaults to ``None``. Required unless
135135
``image_uri`` is provided.
136-
image_uri (str): A Docker image URI. Defaults to None. For serverless
137-
inferece, it is required. More image information can be found in
138-
`Amazon SageMaker provided algorithms and Deep Learning Containers
139-
<https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
140-
For instance based inference, if not specified, a
136+
image_uri (str): A Docker image URI. Defaults to None. If not specified, a
141137
default image for PyTorch will be used. If ``framework_version``
142138
or ``py_version`` are ``None``, then ``image_uri`` is required. If
143139
also ``None``, then a ``ValueError`` will be raised.
@@ -272,7 +268,7 @@ def deploy(
272268
is not None. Otherwise, return None.
273269
"""
274270

275-
if not self.image_uri and instance_type.startswith("ml.inf"):
271+
if not self.image_uri and instance_type is not None and instance_type.startswith("ml.inf"):
276272
self.image_uri = self.serving_image_uri(
277273
region_name=self.sagemaker_session.boto_session.region_name,
278274
instance_type=instance_type,
@@ -365,7 +361,9 @@ def register(
365361
drift_check_baselines=drift_check_baselines,
366362
)
367363

368-
def prepare_container_def(self, instance_type=None, accelerator_type=None):
364+
def prepare_container_def(
365+
self, instance_type=None, accelerator_type=None, serverless_inference_config=None
366+
):
369367
"""A container definition with framework configuration set in model environment variables.
370368
371369
Args:
@@ -374,21 +372,27 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
374372
accelerator_type (str): The Elastic Inference accelerator type to
375373
deploy to the instance for loading and making inferences to the
376374
model.
375+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
376+
Specifies configuration related to serverless endpoint. Instance type is
377+
not provided in serverless inference. So this is used to find image URIs.
377378
378379
Returns:
379380
dict[str, str]: A container definition object usable with the
380381
CreateModel API.
381382
"""
382383
deploy_image = self.image_uri
383384
if not deploy_image:
384-
if instance_type is None:
385+
if instance_type is None and serverless_inference_config is None:
385386
raise ValueError(
386387
"Must supply either an instance type (for choosing CPU vs GPU) or an image URI."
387388
)
388389

389390
region_name = self.sagemaker_session.boto_session.region_name
390391
deploy_image = self.serving_image_uri(
391-
region_name, instance_type, accelerator_type=accelerator_type
392+
region_name,
393+
instance_type,
394+
accelerator_type=accelerator_type,
395+
serverless_inference_config=serverless_inference_config,
392396
)
393397

394398
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
@@ -402,7 +406,13 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
402406
deploy_image, self.repacked_model_data or self.model_data, deploy_env
403407
)
404408

405-
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
409+
def serving_image_uri(
410+
self,
411+
region_name,
412+
instance_type=None,
413+
accelerator_type=None,
414+
serverless_inference_config=None,
415+
):
406416
"""Create a URI for the serving image.
407417
408418
Args:
@@ -412,6 +422,9 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
412422
accelerator_type (str): The Elastic Inference accelerator type to
413423
deploy to the instance for loading and making inferences to the
414424
model.
425+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
426+
Specifies configuration related to serverless endpoint. Instance type is
427+
not provided in serverless inference. So this is used used to determine device type.
415428
416429
Returns:
417430
str: The appropriate image URI based on the given parameters.
@@ -432,4 +445,5 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
432445
accelerator_type=accelerator_type,
433446
image_scope="inference",
434447
base_framework_version=base_framework_version,
448+
serverless_inference_config=serverless_inference_config,
435449
)

src/sagemaker/image_uris.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def retrieve(
4848
tolerate_deprecated_model=False,
4949
sdk_version=None,
5050
inference_tool=None,
51+
serverless_inference_config=None,
5152
) -> str:
5253
"""Retrieves the ECR URI for the Docker image matching the given arguments.
5354
@@ -94,6 +95,9 @@ def retrieve(
9495
inference_tool (str): the tool that will be used to aid in the inference.
9596
Valid values: "neuron, None"
9697
(default: None).
98+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
99+
Specifies configuration related to serverless endpoint. Instance type is
100+
not provided in serverless inference. So this is used to determine processor type.
97101
98102
Returns:
99103
str: The ECR URI for the corresponding SageMaker Docker image.
@@ -159,7 +163,9 @@ def retrieve(
159163
repo = version_config["repository"]
160164

161165
processor = _processor(
162-
instance_type, config.get("processors") or version_config.get("processors")
166+
instance_type,
167+
config.get("processors") or version_config.get("processors"),
168+
serverless_inference_config,
163169
)
164170

165171
# if container version is available in .json file, utilize that
@@ -202,7 +208,9 @@ def retrieve(
202208

203209
tag = _format_tag(tag_prefix, processor, py_version, container_version, inference_tool)
204210

205-
if _should_auto_select_container_version(instance_type, distribution):
211+
if instance_type is not None and _should_auto_select_container_version(
212+
instance_type, distribution
213+
):
206214
container_versions = {
207215
"tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
208216
"tensorflow-2.3.1-gpu-py37": "cu110-ubuntu18.04",
@@ -327,7 +335,7 @@ def _registry_from_region(region, registry_dict):
327335
return registry_dict[region]
328336

329337

330-
def _processor(instance_type, available_processors):
338+
def _processor(instance_type, available_processors, serverless_inference_config=None):
331339
"""Returns the processor type for the given instance type."""
332340
if not available_processors:
333341
logger.info("Ignoring unnecessary instance type: %s.", instance_type)
@@ -337,6 +345,10 @@ def _processor(instance_type, available_processors):
337345
logger.info("Defaulting to only supported image scope: %s.", available_processors[0])
338346
return available_processors[0]
339347

348+
if serverless_inference_config is not None:
349+
logger.info("Defaulting to CPU type when using serverless inference")
350+
return "cpu"
351+
340352
if not instance_type:
341353
raise ValueError(
342354
"Empty SageMaker instance type. For options, see: "

src/sagemaker/model.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,10 @@ def _init_sagemaker_session_if_does_not_exist(self, instance_type=None):
383383
self.sagemaker_session = session.Session()
384384

385385
def prepare_container_def(
386-
self, instance_type=None, accelerator_type=None
386+
self,
387+
instance_type=None,
388+
accelerator_type=None,
389+
serverless_inference_config=None,
387390
): # pylint: disable=unused-argument
388391
"""Return a dict created by ``sagemaker.container_def()``.
389392
@@ -398,6 +401,9 @@ def prepare_container_def(
398401
accelerator_type (str): The Elastic Inference accelerator type to
399402
deploy to the instance for loading and making inferences to the
400403
model. For example, 'ml.eia1.medium'.
404+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
405+
Specifies configuration related to serverless endpoint. Instance type is
406+
not provided in serverless inference. So this is used to find image URIs.
401407
402408
Returns:
403409
dict: A container definition object usable with the CreateModel API.
@@ -498,7 +504,9 @@ def enable_network_isolation(self):
498504
"""
499505
return self._enable_network_isolation
500506

501-
def _create_sagemaker_model(self, instance_type=None, accelerator_type=None, tags=None):
507+
def _create_sagemaker_model(
508+
self, instance_type=None, accelerator_type=None, tags=None, serverless_inference_config=None
509+
):
502510
"""Create a SageMaker Model Entity
503511
504512
Args:
@@ -514,8 +522,15 @@ def _create_sagemaker_model(self, instance_type=None, accelerator_type=None, tag
514522
'tagvalue'}] For more information about tags, see
515523
https://boto3.amazonaws.com/v1/documentation
516524
/api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags
525+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
526+
Specifies configuration related to serverless endpoint. Instance type is
527+
not provided in serverless inference. So this is used to find image URIs.
517528
"""
518-
container_def = self.prepare_container_def(instance_type, accelerator_type=accelerator_type)
529+
container_def = self.prepare_container_def(
530+
instance_type,
531+
accelerator_type=accelerator_type,
532+
serverless_inference_config=serverless_inference_config,
533+
)
519534

520535
self._ensure_base_name_if_needed(
521536
image_uri=container_def["Image"], script_uri=self.source_dir, model_uri=self.model_data
@@ -983,7 +998,9 @@ def deploy(
983998
if self._base_name is not None:
984999
self._base_name = "-".join((self._base_name, compiled_model_suffix))
9851000

986-
self._create_sagemaker_model(instance_type, accelerator_type, tags)
1001+
self._create_sagemaker_model(
1002+
instance_type, accelerator_type, tags, serverless_inference_config
1003+
)
9871004

9881005
serverless_inference_config_dict = (
9891006
serverless_inference_config._to_request_dict() if is_serverless else None

0 commit comments

Comments
 (0)