@@ -99,10 +99,11 @@ def __init__(
99
99
file which should be executed as the entry point to model
100
100
hosting. If ``source_dir`` is specified, then ``entry_point``
101
101
must point to a file located at the root of ``source_dir``.
102
- image_uri (str): A Docker image URI (default: None). If not specified, a
103
- default image for Chainer will be used. If ``framework_version``
104
- or ``py_version`` are ``None``, then ``image_uri`` is required. If
105
- also ``None``, then a ``ValueError`` will be raised.
102
+ image_uri (str): A Docker image URI (default: None). If not specified,
103
+ a default image for Chainer will be used.
104
+ If ``framework_version`` or ``py_version``
105
+ are ``None``, then ``image_uri`` is required. If ``image_uri`` is also ``None``,
106
+ then a ``ValueError`` will be raised.
106
107
framework_version (str): Chainer version you want to use for
107
108
executing your model training code. Defaults to ``None``. Required
108
109
unless ``image_uri`` is provided.
@@ -139,7 +140,9 @@ def __init__(
139
140
140
141
self .model_server_workers = model_server_workers
141
142
142
- def prepare_container_def (self , instance_type = None , accelerator_type = None ):
143
+ def prepare_container_def (
144
+ self , instance_type = None , accelerator_type = None , serverless_inference_config = None
145
+ ):
143
146
"""Return a container definition with framework configuration set in model environment.
144
147
145
148
Args:
@@ -148,21 +151,27 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
148
151
accelerator_type (str): The Elastic Inference accelerator type to
149
152
deploy to the instance for loading and making inferences to the
150
153
model. For example, 'ml.eia1.medium'.
154
+ serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
155
+ Specifies configuration related to serverless endpoint. Instance type is
156
+ not provided in serverless inference. So this is used to find image URIs.
151
157
152
158
Returns:
153
159
dict[str, str]: A container definition object usable with the
154
160
CreateModel API.
155
161
"""
156
162
deploy_image = self .image_uri
157
163
if not deploy_image :
158
- if instance_type is None :
164
+ if instance_type is None and serverless_inference_config is None :
159
165
raise ValueError (
160
166
"Must supply either an instance type (for choosing CPU vs GPU) or an image URI."
161
167
)
162
168
163
169
region_name = self .sagemaker_session .boto_session .region_name
164
170
deploy_image = self .serving_image_uri (
165
- region_name , instance_type , accelerator_type = accelerator_type
171
+ region_name ,
172
+ instance_type ,
173
+ accelerator_type = accelerator_type ,
174
+ serverless_inference_config = serverless_inference_config ,
166
175
)
167
176
168
177
deploy_key_prefix = model_code_key_prefix (self .key_prefix , self .name , deploy_image )
@@ -174,13 +183,18 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
174
183
deploy_env [MODEL_SERVER_WORKERS_PARAM_NAME .upper ()] = str (self .model_server_workers )
175
184
return sagemaker .container_def (deploy_image , self .model_data , deploy_env )
176
185
177
- def serving_image_uri (self , region_name , instance_type , accelerator_type = None ):
186
+ def serving_image_uri (
187
+ self , region_name , instance_type , accelerator_type = None , serverless_inference_config = None
188
+ ):
178
189
"""Create a URI for the serving image.
179
190
180
191
Args:
181
192
region_name (str): AWS region where the image is uploaded.
182
193
instance_type (str): SageMaker instance type. Used to determine device type
183
194
(cpu/gpu/family-specific optimized).
195
+ serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
196
+ Specifies configuration related to serverless endpoint. Instance type is
197
+ not provided in serverless inference. So this is used to determine device type.
184
198
185
199
Returns:
186
200
str: The appropriate image URI based on the given parameters.
@@ -194,4 +208,5 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
194
208
instance_type = instance_type ,
195
209
accelerator_type = accelerator_type ,
196
210
image_scope = "inference" ,
211
+ serverless_inference_config = serverless_inference_config ,
197
212
)
0 commit comments