Skip to content

Commit d7ea758

Browse files
committed
feature: add serverless inference image_uri retrieve support
1 parent 64ce910 commit d7ea758

30 files changed

+858
-108
lines changed

doc/overview.rst

+8-9
Original file line numberDiff line numberDiff line change
@@ -1226,28 +1226,28 @@ to configure or manage the underlying infrastructure. After you trained a model,
12261226
Serverless endpoint and then invoke the endpoint with the model to get inference results back. More information about
12271227
SageMaker Serverless Inference can be found in the `AWS documentation <https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html>`__.
12281228

1229-
For using SageMaker Serverless Inference, if you plan to use any of the SageMaker-provided container or Bring Your Own Container
1230-
model, you will need to pass ``image_uri``. An example to use ``image_uri`` for creating MXNet model:
1229+
For using SageMaker Serverless Inference, you can either use SageMaker-provided container or Bring Your Own Container model.
1230+
An step by step example for using Serverless Inference with MXNet image :
1231+
1232+
Firstly, create MXNet model
12311233

12321234
.. code:: python
12331235
12341236
from sagemaker.mxnet import MXNetModel
1237+
from sagemaker.serverless import ServerlessInferenceConfig
12351238
import sagemaker
12361239
12371240
role = sagemaker.get_execution_role()
12381241
12391242
# create MXNet Model Class
1240-
mxnet_model = MXNetModel(
1243+
model = MXNetModel(
12411244
model_data="s3://my_bucket/pretrained_model/model.tar.gz", # path to your trained sagemaker model
12421245
role=role, # iam role with permissions to create an Endpoint
12431246
entry_point="inference.py",
1244-
image_uri="763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.4.1-cpu-py3" # image wanted to use
1247+
py_version="py3", # Python version
1248+
framework_version="1.6.0", # MXNet framework version
12451249
)
12461250
1247-
For more Amazon SageMaker provided algorithms and containers image paths, please check this page: `Amazon SageMaker provided
1248-
algorithms and Deep Learning Containers <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
1249-
After creating model using ``image_uri``, you can then follow the steps below to create serverless endpoint.
1250-
12511251
To deploy serverless endpoint, you will need to create a ``ServerlessInferenceConfig``.
12521252
If you create ``ServerlessInferenceConfig`` without specifying its arguments, the default ``MemorySizeInMB`` will be **2048** and
12531253
the default ``MaxConcurrency`` will be **5** :
@@ -1283,7 +1283,6 @@ Or directly using model's ``deploy()`` method to deploy a serverless endpoint:
12831283
# Deploys the model to a SageMaker serverless endpoint
12841284
serverless_predictor = model.deploy(serverless_inference_config=serverless_config)
12851285
1286-
12871286
After deployment is complete, you can use predictor's ``predict()`` method to invoke the serverless endpoint just like
12881287
real-time endpoints:
12891288

src/sagemaker/chainer/model.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,9 @@ def __init__(
9999
file which should be executed as the entry point to model
100100
hosting. If ``source_dir`` is specified, then ``entry_point``
101101
must point to a file located at the root of ``source_dir``.
102-
image_uri (str): A Docker image URI (default: None). In serverless
103-
inferece, it is required. More image information can be found in
104-
`Amazon SageMaker provided algorithms and Deep Learning Containers
105-
<https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
106-
In instance based inference, if not specified, a default image for
107-
Chainer will be used. If ``framework_version`` or ``py_version``
102+
image_uri (str): A Docker image URI (default: None). If not specified,
103+
a default image for Chainer will be used.
104+
If ``framework_version`` or ``py_version``
108105
are ``None``, then ``image_uri`` is required. If also ``None``,
109106
then a ``ValueError`` will be raised.
110107
framework_version (str): Chainer version you want to use for
@@ -143,7 +140,9 @@ def __init__(
143140

144141
self.model_server_workers = model_server_workers
145142

146-
def prepare_container_def(self, instance_type=None, accelerator_type=None):
143+
def prepare_container_def(
144+
self, instance_type=None, accelerator_type=None, serverless_inference_config=None
145+
):
147146
"""Return a container definition with framework configuration set in model environment.
148147
149148
Args:
@@ -152,21 +151,27 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
152151
accelerator_type (str): The Elastic Inference accelerator type to
153152
deploy to the instance for loading and making inferences to the
154153
model. For example, 'ml.eia1.medium'.
154+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
155+
Specifies configuration related to serverless endpoint. Instance type is
156+
not provided in serverless inference. So this is used to find image URIs.
155157
156158
Returns:
157159
dict[str, str]: A container definition object usable with the
158160
CreateModel API.
159161
"""
160162
deploy_image = self.image_uri
161163
if not deploy_image:
162-
if instance_type is None:
164+
if instance_type is None and serverless_inference_config is None:
163165
raise ValueError(
164166
"Must supply either an instance type (for choosing CPU vs GPU) or an image URI."
165167
)
166168

167169
region_name = self.sagemaker_session.boto_session.region_name
168170
deploy_image = self.serving_image_uri(
169-
region_name, instance_type, accelerator_type=accelerator_type
171+
region_name,
172+
instance_type,
173+
accelerator_type=accelerator_type,
174+
serverless_inference_config=serverless_inference_config,
170175
)
171176

172177
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
@@ -178,13 +183,18 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
178183
deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers)
179184
return sagemaker.container_def(deploy_image, self.model_data, deploy_env)
180185

181-
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
186+
def serving_image_uri(
187+
self, region_name, instance_type, accelerator_type=None, serverless_inference_config=None
188+
):
182189
"""Create a URI for the serving image.
183190
184191
Args:
185192
region_name (str): AWS region where the image is uploaded.
186193
instance_type (str): SageMaker instance type. Used to determine device type
187194
(cpu/gpu/family-specific optimized).
195+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
196+
Specifies configuration related to serverless endpoint. Instance type is
197+
not provided in serverless inference. So this is used to determine device type.
188198
189199
Returns:
190200
str: The appropriate image URI based on the given parameters.
@@ -198,4 +208,5 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
198208
instance_type=instance_type,
199209
accelerator_type=accelerator_type,
200210
image_scope="inference",
211+
serverless_inference_config=serverless_inference_config,
201212
)

src/sagemaker/huggingface/model.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,7 @@ def __init__(
133133
py_version (str): Python version you want to use for executing your
134134
model training code. Defaults to ``None``. Required unless
135135
``image_uri`` is provided.
136-
image_uri (str): A Docker image URI. Defaults to None. For serverless
137-
inferece, it is required. More image information can be found in
138-
`Amazon SageMaker provided algorithms and Deep Learning Containers
139-
<https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
140-
For instance based inference, if not specified, a
136+
image_uri (str): A Docker image URI. Defaults to None. If not specified, a
141137
default image for PyTorch will be used. If ``framework_version``
142138
or ``py_version`` are ``None``, then ``image_uri`` is required. If
143139
also ``None``, then a ``ValueError`` will be raised.
@@ -272,7 +268,7 @@ def deploy(
272268
is not None. Otherwise, return None.
273269
"""
274270

275-
if not self.image_uri and instance_type.startswith("ml.inf"):
271+
if not self.image_uri and instance_type is not None and instance_type.startswith("ml.inf"):
276272
self.image_uri = self.serving_image_uri(
277273
region_name=self.sagemaker_session.boto_session.region_name,
278274
instance_type=instance_type,
@@ -365,7 +361,9 @@ def register(
365361
drift_check_baselines=drift_check_baselines,
366362
)
367363

368-
def prepare_container_def(self, instance_type=None, accelerator_type=None):
364+
def prepare_container_def(
365+
self, instance_type=None, accelerator_type=None, serverless_inference_config=None
366+
):
369367
"""A container definition with framework configuration set in model environment variables.
370368
371369
Args:
@@ -374,21 +372,27 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
374372
accelerator_type (str): The Elastic Inference accelerator type to
375373
deploy to the instance for loading and making inferences to the
376374
model.
375+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
376+
Specifies configuration related to serverless endpoint. Instance type is
377+
not provided in serverless inference. So this is used to find image URIs.
377378
378379
Returns:
379380
dict[str, str]: A container definition object usable with the
380381
CreateModel API.
381382
"""
382383
deploy_image = self.image_uri
383384
if not deploy_image:
384-
if instance_type is None:
385+
if instance_type is None and serverless_inference_config is None:
385386
raise ValueError(
386387
"Must supply either an instance type (for choosing CPU vs GPU) or an image URI."
387388
)
388389

389390
region_name = self.sagemaker_session.boto_session.region_name
390391
deploy_image = self.serving_image_uri(
391-
region_name, instance_type, accelerator_type=accelerator_type
392+
region_name,
393+
instance_type,
394+
accelerator_type=accelerator_type,
395+
serverless_inference_config=serverless_inference_config,
392396
)
393397

394398
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
@@ -402,7 +406,13 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
402406
deploy_image, self.repacked_model_data or self.model_data, deploy_env
403407
)
404408

405-
def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
409+
def serving_image_uri(
410+
self,
411+
region_name,
412+
instance_type=None,
413+
accelerator_type=None,
414+
serverless_inference_config=None,
415+
):
406416
"""Create a URI for the serving image.
407417
408418
Args:
@@ -412,6 +422,9 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
412422
accelerator_type (str): The Elastic Inference accelerator type to
413423
deploy to the instance for loading and making inferences to the
414424
model.
425+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
426+
Specifies configuration related to serverless endpoint. Instance type is
427+
not provided in serverless inference. So this is used used to determine device type.
415428
416429
Returns:
417430
str: The appropriate image URI based on the given parameters.
@@ -432,4 +445,5 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
432445
accelerator_type=accelerator_type,
433446
image_scope="inference",
434447
base_framework_version=base_framework_version,
448+
serverless_inference_config=serverless_inference_config,
435449
)

src/sagemaker/image_uris.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def retrieve(
4848
tolerate_deprecated_model=False,
4949
sdk_version=None,
5050
inference_tool=None,
51+
serverless_inference_config=None,
5152
) -> str:
5253
"""Retrieves the ECR URI for the Docker image matching the given arguments.
5354
@@ -94,6 +95,9 @@ def retrieve(
9495
inference_tool (str): the tool that will be used to aid in the inference.
9596
Valid values: "neuron, None"
9697
(default: None).
98+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
99+
Specifies configuration related to serverless endpoint. Instance type is
100+
not provided in serverless inference. So this is used to determine processor type.
97101
98102
Returns:
99103
str: The ECR URI for the corresponding SageMaker Docker image.
@@ -159,7 +163,9 @@ def retrieve(
159163
repo = version_config["repository"]
160164

161165
processor = _processor(
162-
instance_type, config.get("processors") or version_config.get("processors")
166+
instance_type,
167+
config.get("processors") or version_config.get("processors"),
168+
serverless_inference_config,
163169
)
164170

165171
# if container version is available in .json file, utilize that
@@ -202,7 +208,9 @@ def retrieve(
202208

203209
tag = _format_tag(tag_prefix, processor, py_version, container_version, inference_tool)
204210

205-
if _should_auto_select_container_version(instance_type, distribution):
211+
if instance_type is not None and _should_auto_select_container_version(
212+
instance_type, distribution
213+
):
206214
container_versions = {
207215
"tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
208216
"tensorflow-2.3.1-gpu-py37": "cu110-ubuntu18.04",
@@ -327,7 +335,7 @@ def _registry_from_region(region, registry_dict):
327335
return registry_dict[region]
328336

329337

330-
def _processor(instance_type, available_processors):
338+
def _processor(instance_type, available_processors, serverless_inference_config=None):
331339
"""Returns the processor type for the given instance type."""
332340
if not available_processors:
333341
logger.info("Ignoring unnecessary instance type: %s.", instance_type)
@@ -337,6 +345,10 @@ def _processor(instance_type, available_processors):
337345
logger.info("Defaulting to only supported image scope: %s.", available_processors[0])
338346
return available_processors[0]
339347

348+
if serverless_inference_config is not None:
349+
logger.info("Defaulting to CPU type when using serverless inference")
350+
return "cpu"
351+
340352
if not instance_type:
341353
raise ValueError(
342354
"Empty SageMaker instance type. For options, see: "

src/sagemaker/model.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,10 @@ def _init_sagemaker_session_if_does_not_exist(self, instance_type=None):
384384
self.sagemaker_session = session.Session()
385385

386386
def prepare_container_def(
387-
self, instance_type=None, accelerator_type=None
387+
self,
388+
instance_type=None,
389+
accelerator_type=None,
390+
serverless_inference_config=None,
388391
): # pylint: disable=unused-argument
389392
"""Return a dict created by ``sagemaker.container_def()``.
390393
@@ -399,6 +402,9 @@ def prepare_container_def(
399402
accelerator_type (str): The Elastic Inference accelerator type to
400403
deploy to the instance for loading and making inferences to the
401404
model. For example, 'ml.eia1.medium'.
405+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
406+
Specifies configuration related to serverless endpoint. Instance type is
407+
not provided in serverless inference. So this is used to find image URIs.
402408
403409
Returns:
404410
dict: A container definition object usable with the CreateModel API.
@@ -499,7 +505,9 @@ def enable_network_isolation(self):
499505
"""
500506
return self._enable_network_isolation
501507

502-
def _create_sagemaker_model(self, instance_type=None, accelerator_type=None, tags=None):
508+
def _create_sagemaker_model(
509+
self, instance_type=None, accelerator_type=None, tags=None, serverless_inference_config=None
510+
):
503511
"""Create a SageMaker Model Entity
504512
505513
Args:
@@ -515,8 +523,15 @@ def _create_sagemaker_model(self, instance_type=None, accelerator_type=None, tag
515523
'tagvalue'}] For more information about tags, see
516524
https://boto3.amazonaws.com/v1/documentation
517525
/api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags
526+
serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
527+
Specifies configuration related to serverless endpoint. Instance type is
528+
not provided in serverless inference. So this is used to find image URIs.
518529
"""
519-
container_def = self.prepare_container_def(instance_type, accelerator_type=accelerator_type)
530+
container_def = self.prepare_container_def(
531+
instance_type,
532+
accelerator_type=accelerator_type,
533+
serverless_inference_config=serverless_inference_config,
534+
)
520535

521536
self._ensure_base_name_if_needed(
522537
image_uri=container_def["Image"], script_uri=self.source_dir, model_uri=self.model_data
@@ -984,7 +999,9 @@ def deploy(
984999
if self._base_name is not None:
9851000
self._base_name = "-".join((self._base_name, compiled_model_suffix))
9861001

987-
self._create_sagemaker_model(instance_type, accelerator_type, tags)
1002+
self._create_sagemaker_model(
1003+
instance_type, accelerator_type, tags, serverless_inference_config
1004+
)
9881005

9891006
serverless_inference_config_dict = (
9901007
serverless_inference_config._to_request_dict() if is_serverless else None

0 commit comments

Comments
 (0)