Skip to content
This repository was archived by the owner on May 23, 2024. It is now read-only.

Commit 76113eb

Browse files
Merge branch 'master' into mme-fix
2 parents 0b30a38 + a58583d commit 76113eb

File tree

10 files changed

+278
-170
lines changed

10 files changed

+278
-170
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@ __pycache__
22
.tox/
33
log.txt
44
.idea/
5+
node_modules/
6+
package.json
7+
package-lock.json

README.md

+19-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ For notebook examples, see: [Amazon SageMaker Examples](https://github.com/awsla
4242
3. [Running the tests](#running-the-tests)
4343
4. [Pre/Post-Processing](#pre/post-processing)
4444
5. [Deploying a TensorFlow Serving Model](#deploying-a-tensorflow-serving-model)
45-
6. [Deploying to Multi-Model Endpoint](#deploying-to-multi-model-endpoint)
45+
6. [Enable Batching](#enabling-batching)
46+
7. [Configurable SageMaker Environment Variables](#configurable-sagemaker-environment-variables)
47+
8. [Deploying to Multi-Model Endpoint](#deploying-to-multi-model-endpoint)
4648

4749
## Getting Started
4850

@@ -612,6 +614,22 @@ SAGEMAKER_TFS_NUM_BATCH_THREADS="16"
612614
SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES="10000"
613615
```
614616

617+
## Configurable SageMaker Environment Variables
618+
The following environment variables can be set on a SageMaker Model or Transform Job if further configuration is required:
619+
620+
[Configures](https://docs.gunicorn.org/en/stable/settings.html#loglevel)
621+
the logging level for Gunicorn.
622+
```bash
623+
# Defaults to "info"
624+
SAGEMAKER_GUNICORN_LOGLEVEL="debug"
625+
```
626+
[Configures](https://docs.gunicorn.org/en/stable/settings.html#timeout)
627+
how long a Gunicorn worker may be silent before it is killed and restarted.
628+
```bash
629+
# Defaults to 30.
630+
SAGEMAKER_GUNICORN_TIMEOUT_SECONDS="60"
631+
```
632+
615633
## Deploying to Multi-Model Endpoint
616634

617635
SageMaker TensorFlow Serving container (version 1.5.0 and 2.1.0, CPU) now supports Multi-Model Endpoint. With this feature, you can deploy different models (not just different versions of a model) to a single endpoint.

docker/1.15/Dockerfile.cpu

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM ubuntu:18.04
1+
FROM public.ecr.aws/ubuntu/ubuntu:18.04
22

33
LABEL maintainer="Amazon AI"
44
# Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT

docker/2.1/Dockerfile.cpu

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM ubuntu:18.04
1+
FROM public.ecr.aws/ubuntu/ubuntu:18.04
22

33
LABEL maintainer="Amazon AI"
44
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true

docker/build_artifacts/deep_learning_container.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,33 @@ def _retrieve_instance_region():
4949
Retrieve instance region from instance metadata service
5050
"""
5151
region = None
52-
valid_regions = ['ap-northeast-1', 'ap-northeast-2', 'ap-southeast-1', 'ap-southeast-2',
53-
'ap-south-1', 'ca-central-1', 'eu-central-1', 'eu-north-1',
54-
'eu-west-1', 'eu-west-2', 'eu-west-3', 'sa-east-1',
55-
'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2']
52+
valid_regions = [
53+
"ap-northeast-1",
54+
"ap-northeast-2",
55+
"ap-southeast-1",
56+
"ap-southeast-2",
57+
"ap-south-1",
58+
"ca-central-1",
59+
"eu-central-1",
60+
"eu-north-1",
61+
"eu-west-1",
62+
"eu-west-2",
63+
"eu-west-3",
64+
"sa-east-1",
65+
"us-east-1",
66+
"us-east-2",
67+
"us-west-1",
68+
"us-west-2",
69+
]
5670

5771
url = "http://169.254.169.254/latest/dynamic/instance-identity/document"
5872
response = requests_helper(url, timeout=0.1)
5973

6074
if response is not None:
6175
response_json = json.loads(response.text)
6276

63-
if response_json['region'] in valid_regions:
64-
region = response_json['region']
77+
if response_json["region"] in valid_regions:
78+
region = response_json["region"]
6579

6680
return region
6781

@@ -75,8 +89,10 @@ def query_bucket():
7589
region = _retrieve_instance_region()
7690

7791
if instance_id is not None and region is not None:
78-
url = ("https://aws-deep-learning-containers-{0}.s3.{0}.amazonaws.com"
79-
"/dlc-containers.txt?x-instance-id={1}".format(region, instance_id))
92+
url = (
93+
"https://aws-deep-learning-containers-{0}.s3.{0}.amazonaws.com"
94+
"/dlc-containers.txt?x-instance-id={1}".format(region, instance_id)
95+
)
8096
response = requests_helper(url, timeout=0.2)
8197

8298
logging.debug("Query bucket finished: {}".format(response))
@@ -105,5 +121,5 @@ def main():
105121
query_bucket()
106122

107123

108-
if __name__ == '__main__':
124+
if __name__ == "__main__":
109125
main()

docker/build_artifacts/dockerd-entrypoint.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@
1919
if not os.path.exists("/opt/ml/input/config"):
2020
subprocess.call(["python", "/usr/local/bin/deep_learning_container.py", "&>/dev/null", "&"])
2121

22-
subprocess.check_call(shlex.split(' '.join(sys.argv[1:])))
22+
subprocess.check_call(shlex.split(" ".join(sys.argv[1:])))

docker/build_artifacts/sagemaker/python_service.py

+67-68
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def default_handler(data, context):
5858

5959

6060
class PythonServiceResource:
61-
6261
def __init__(self):
6362
if SAGEMAKER_MULTI_MODEL_ENABLED:
6463
self._model_tfs_rest_port = {}
@@ -83,9 +82,9 @@ def __init__(self):
8382
log.info("Inference script exists, importing handlers.")
8483
# Single-Model Mode & Multi-Model Mode both use one inference.py
8584
self._handler, self._input_handler, self._output_handler = self._import_handlers()
86-
self._handlers = self._make_handler(self._handler,
87-
self._input_handler,
88-
self._output_handler)
85+
self._handlers = self._make_handler(
86+
self._handler, self._input_handler, self._output_handler
87+
)
8988
else:
9089
log.info("Inference script does not exist, using default handlers.")
9190
self._handlers = default_handler
@@ -108,7 +107,7 @@ def _pick_port(self, ports):
108107
return random.choice(ports)
109108

110109
def _parse_sagemaker_port_range_mme(self, port_range):
111-
lower, upper = port_range.split('-')
110+
lower, upper = port_range.split("-")
112111
lower = int(lower)
113112
upper = lower + int((int(upper) - lower) * 0.9) # only utilizing 90% of the ports
114113
rest_port = lower
@@ -132,16 +131,14 @@ def _handle_load_model_post(self, res, data): # noqa: C901
132131
# model is already loaded
133132
if model_name in self._model_tfs_pid:
134133
res.status = falcon.HTTP_409
135-
res.body = json.dumps({
136-
"error": "Model {} is already loaded.".format(model_name)
137-
})
134+
res.body = json.dumps({"error": "Model {} is already loaded.".format(model_name)})
138135

139136
# check if there are available ports
140137
if not self._ports_available():
141138
res.status = falcon.HTTP_507
142-
res.body = json.dumps({
143-
"error": "Memory exhausted: no available ports to load the model."
144-
})
139+
res.body = json.dumps(
140+
{"error": "Memory exhausted: no available ports to load the model."}
141+
)
145142
with lock():
146143
self._model_tfs_rest_port[model_name] = self._tfs_ports["rest_port"].pop()
147144
self._model_tfs_grpc_port[model_name] = self._tfs_ports["grpc_port"].pop()
@@ -157,7 +154,8 @@ def _handle_load_model_post(self, res, data): # noqa: C901
157154
f.write(tfs_config)
158155

159156
batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format(
160-
model_name)
157+
model_name
158+
)
161159
if self._tfs_enable_batching:
162160
tfs_utils.create_batching_config(batching_config_file)
163161

@@ -170,22 +168,26 @@ def _handle_load_model_post(self, res, data): # noqa: C901
170168
)
171169
p = subprocess.Popen(cmd.split())
172170

173-
tfs_utils.wait_for_model(self._model_tfs_rest_port[model_name], model_name,
174-
self._tfs_wait_time_seconds)
171+
tfs_utils.wait_for_model(
172+
self._model_tfs_rest_port[model_name], model_name, self._tfs_wait_time_seconds
173+
)
175174

176175
log.info("started tensorflow serving (pid: %d)", p.pid)
177176
# update model name <-> tfs pid map
178177
self._model_tfs_pid[model_name] = p
179178

180179
res.status = falcon.HTTP_200
181-
res.body = json.dumps({
182-
"success":
183-
"Successfully loaded model {}, "
180+
res.body = json.dumps(
181+
{
182+
"success": "Successfully loaded model {}, "
184183
"listening on rest port {} "
185-
"and grpc port {}.".format(model_name,
186-
self._model_tfs_rest_port,
187-
self._model_tfs_grpc_port,)
188-
})
184+
"and grpc port {}.".format(
185+
model_name,
186+
self._model_tfs_rest_port,
187+
self._model_tfs_grpc_port,
188+
)
189+
}
190+
)
189191
except MultiModelException as multi_model_exception:
190192
self._cleanup_config_file(tfs_config_file)
191193
self._cleanup_config_file(batching_config_file)
@@ -199,25 +201,28 @@ def _handle_load_model_post(self, res, data): # noqa: C901
199201
raise MultiModelException(falcon.HTTP_500, multi_model_exception.msg)
200202
except FileExistsError as e:
201203
res.status = falcon.HTTP_409
202-
res.body = json.dumps({
203-
"error": "Model {} is already loaded. {}".format(model_name, str(e))
204-
})
204+
res.body = json.dumps(
205+
{"error": "Model {} is already loaded. {}".format(model_name, str(e))}
206+
)
205207
except OSError as os_error:
206208
self._cleanup_config_file(tfs_config_file)
207209
self._cleanup_config_file(batching_config_file)
208210
if os_error.errno == 12:
209-
raise MultiModelException(falcon.HTTP_507,
210-
"Memory exhausted: "
211-
"not enough memory to start TFS instance")
211+
raise MultiModelException(
212+
falcon.HTTP_507,
213+
"Memory exhausted: " "not enough memory to start TFS instance",
214+
)
212215
else:
213216
raise MultiModelException(falcon.HTTP_500, os_error.strerror)
214217
else:
215218
res.status = falcon.HTTP_404
216-
res.body = json.dumps({
217-
"error":
218-
"Could not find valid base path {} for servable {}".format(base_path,
219-
model_name)
220-
})
219+
res.body = json.dumps(
220+
{
221+
"error": "Could not find valid base path {} for servable {}".format(
222+
base_path, model_name
223+
)
224+
}
225+
)
221226

222227
def _cleanup_config_file(self, config_file):
223228
if os.path.exists(config_file):
@@ -228,31 +233,37 @@ def _handle_invocation_post(self, req, res, model_name=None):
228233
if model_name:
229234
if model_name not in self._model_tfs_rest_port:
230235
res.status = falcon.HTTP_404
231-
res.body = json.dumps({
232-
"error": "Model {} is not loaded yet.".format(model_name)
233-
})
236+
res.body = json.dumps(
237+
{"error": "Model {} is not loaded yet.".format(model_name)}
238+
)
234239
return
235240
else:
236241
log.info("model name: {}".format(model_name))
237242
rest_port = self._model_tfs_rest_port[model_name]
238243
log.info("rest port: {}".format(str(self._model_tfs_rest_port[model_name])))
239244
grpc_port = self._model_tfs_grpc_port[model_name]
240245
log.info("grpc port: {}".format(str(self._model_tfs_grpc_port[model_name])))
241-
data, context = tfs_utils.parse_request(req, rest_port, grpc_port,
242-
self._tfs_default_model_name,
243-
model_name=model_name)
246+
data, context = tfs_utils.parse_request(
247+
req,
248+
rest_port,
249+
grpc_port,
250+
self._tfs_default_model_name,
251+
model_name=model_name,
252+
)
244253
else:
245254
res.status = falcon.HTTP_400
246-
res.body = json.dumps({
247-
"error": "Invocation request does not contain model name."
248-
})
255+
res.body = json.dumps({"error": "Invocation request does not contain model name."})
249256
else:
250257
# Randomly pick port used for routing incoming request.
251258
grpc_port = self._pick_port(self._tfs_grpc_ports)
252259
rest_port = self._pick_port(self._tfs_rest_ports)
253-
data, context = tfs_utils.parse_request(req, rest_port, grpc_port,
254-
self._tfs_default_model_name,
255-
channel=self._channels[grpc_port])
260+
data, context = tfs_utils.parse_request(
261+
req,
262+
rest_port,
263+
grpc_port,
264+
self._tfs_default_model_name,
265+
channel=self._channels[grpc_port],
266+
)
256267

257268
try:
258269
res.status = falcon.HTTP_200
@@ -261,9 +272,7 @@ def _handle_invocation_post(self, req, res, model_name=None):
261272
except Exception as e: # pylint: disable=broad-except
262273
log.exception("exception handling request: {}".format(e))
263274
res.status = falcon.HTTP_500
264-
res.body = json.dumps({
265-
"error": str(e)
266-
}).encode("utf-8") # pylint: disable=E1101
275+
res.body = json.dumps({"error": str(e)}).encode("utf-8") # pylint: disable=E1101
267276

268277
def _setup_channel(self, grpc_port):
269278
if grpc_port not in self._channels:
@@ -309,39 +318,31 @@ def on_get(self, req, res, model_name=None): # pylint: disable=W0613
309318
except ValueError as e:
310319
log.exception("exception handling request: {}".format(e))
311320
res.status = falcon.HTTP_500
312-
res.body = json.dumps({
313-
"error": str(e)
314-
}).encode("utf-8")
321+
res.body = json.dumps({"error": str(e)}).encode("utf-8")
315322
res.status = falcon.HTTP_200
316323
res.body = json.dumps(models_info)
317324
else:
318325
if model_name not in self._model_tfs_rest_port:
319326
res.status = falcon.HTTP_404
320-
res.body = json.dumps({
321-
"error": "Model {} is loaded yet.".format(model_name)
322-
}).encode("utf-8")
327+
res.body = json.dumps(
328+
{"error": "Model {} is loaded yet.".format(model_name)}
329+
).encode("utf-8")
323330
else:
324331
port = self._model_tfs_rest_port[model_name]
325332
uri = "http://localhost:{}/v1/models/{}".format(port, model_name)
326333
try:
327334
info = requests.get(uri)
328335
res.status = falcon.HTTP_200
329-
res.body = json.dumps({
330-
"model": info
331-
}).encode("utf-8")
336+
res.body = json.dumps({"model": info}).encode("utf-8")
332337
except ValueError as e:
333338
log.exception("exception handling GET models request.")
334339
res.status = falcon.HTTP_500
335-
res.body = json.dumps({
336-
"error": str(e)
337-
}).encode("utf-8")
340+
res.body = json.dumps({"error": str(e)}).encode("utf-8")
338341

339342
def on_delete(self, req, res, model_name): # pylint: disable=W0613
340343
if model_name not in self._model_tfs_pid:
341344
res.status = falcon.HTTP_404
342-
res.body = json.dumps({
343-
"error": "Model {} is not loaded yet".format(model_name)
344-
})
345+
res.body = json.dumps({"error": "Model {} is not loaded yet".format(model_name)})
345346
else:
346347
try:
347348
self._model_tfs_pid[model_name].kill()
@@ -356,14 +357,12 @@ def on_delete(self, req, res, model_name): # pylint: disable=W0613
356357
del self._model_tfs_grpc_port[model_name]
357358
del self._model_tfs_pid[model_name]
358359
res.status = falcon.HTTP_200
359-
res.body = json.dumps({
360-
"success": "Successfully unloaded model {}.".format(model_name)
361-
})
360+
res.body = json.dumps(
361+
{"success": "Successfully unloaded model {}.".format(model_name)}
362+
)
362363
except OSError as error:
363364
res.status = falcon.HTTP_500
364-
res.body = json.dumps({
365-
"error": str(error)
366-
}).encode("utf-8")
365+
res.body = json.dumps({"error": str(error)}).encode("utf-8")
367366

368367
def validate_model_dir(self, model_path):
369368
# model base path doesn't exits

0 commit comments

Comments
 (0)