Skip to content
This repository was archived by the owner on May 23, 2024. It is now read-only.

Commit 3ef74b0

Browse files
committed
Feature: Support multiple inference.py files and universal inference.py file along with universal requirements.txt file
1 parent 1a265db commit 3ef74b0

File tree

6 files changed

+25
-5
lines changed

6 files changed

+25
-5
lines changed

docker/build_artifacts/sagemaker/python_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,4 +407,4 @@ def add_routes(self, application):
407407

408408
app = falcon.API()
409409
resources = ServiceResources()
410-
resources.add_routes(app)
410+
resources.add_routes(app)

docker/build_artifacts/sagemaker/serve.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,14 @@ def _enable_per_process_gpu_memory_fraction(self):
308308

309309
return False
310310

311+
def _get_number_of_gpu_on_host(self):
312+
nvidia_smi_exist = os.path.exists("/usr/bin/nvidia-smi")
313+
if nvidia_smi_exist:
314+
return len(subprocess.check_output(['nvidia-smi', '-L'])
315+
.decode('utf-8').strip().split('\n'))
316+
317+
return 0
318+
311319
def _calculate_per_process_gpu_memory_fraction(self):
312320
return round((1 - self._tfs_gpu_margin) / float(self._tfs_instance_count), 4)
313321

@@ -420,8 +428,20 @@ def _start_single_tfs(self, instance_id):
420428
tfs_gpu_memory_fraction=self._calculate_per_process_gpu_memory_fraction(),
421429
)
422430
log.info("tensorflow serving command: {}".format(cmd))
423-
p = subprocess.Popen(cmd.split())
424-
log.info("started tensorflow serving (pid: %d)", p.pid)
431+
432+
num_gpus = self._get_number_of_gpu_on_host()
433+
if num_gpus > 1:
434+
# utilizing multi-gpu
435+
worker_env = os.environ.copy()
436+
worker_env["CUDA_VISIBLE_DEVICES"] = str(instance_id % num_gpus)
437+
p = subprocess.Popen(cmd.split(), env=worker_env)
438+
log.info("started tensorflow serving (pid: {}) on GPU {}"
439+
.format(p.pid, instance_id % num_gpus))
440+
else:
441+
# cpu and single gpu
442+
p = subprocess.Popen(cmd.split())
443+
log.info("started tensorflow serving (pid: {})".format(p.pid))
444+
425445
return p
426446

427447
def _monitor(self):
@@ -480,4 +500,4 @@ def start(self):
480500

481501

482502
if __name__ == "__main__":
483-
ServiceManager().start()
503+
ServiceManager().start()

test/integration/local/test_pre_post_processing_mme.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,4 +143,4 @@ def test_unsupported_content_type():
143143
data = "aW1hZ2UgYnl0ZXM="
144144
response = requests.post(INVOCATION_URL.format(MODEL_NAME), data=data, headers=headers)
145145
assert 500 == response.status_code
146-
assert "unsupported content type" in response.text
146+
assert "unsupported content type" in response.text

0 commit comments

Comments
 (0)