From 741f68795893e61d72f14d625a395e7ccd0f5ffb Mon Sep 17 00:00:00 2001 From: Chuyang Deng Date: Fri, 12 Jun 2020 11:18:11 -0700 Subject: [PATCH 1/3] update warning message --- src/sagemaker/fw_utils.py | 10 +++++----- tests/unit/test_fw_utils.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 9141ae8c72..ea3a4ac171 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -49,8 +49,8 @@ "Please set the argument \"py_version='py3'\" to use the Python 3 {framework} image." ) PARAMETER_SERVER_MULTI_GPU_WARNING = ( - "You have selected a multi-GPU training instance type. " - "You have also enabled parameter server for distributed training. " + "If you have selected a multi-GPU training instance type, " + "and have also enabled parameter server for distributed training. " "Distributed training with the default parameter server configuration will not " "fully leverage all GPU cores; the parameter server will be configured to run " "only one worker per host regardless of the number of GPUs." @@ -617,9 +617,9 @@ def warn_if_parameter_server_with_multi_gpu(training_instance_type, distribution return is_multi_gpu_instance = ( - training_instance_type.split(".")[1].startswith("p") - and training_instance_type not in SINGLE_GPU_INSTANCE_TYPES - ) + training_instance_type == "local_gpu" + or training_instance_type.split(".")[1].startswith("p") + ) and training_instance_type not in SINGLE_GPU_INSTANCE_TYPES ps_enabled = "parameter_server" in distributions and distributions["parameter_server"].get( "enabled", False diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py index 1e211193ca..a015e5595e 100644 --- a/tests/unit/test_fw_utils.py +++ b/tests/unit/test_fw_utils.py @@ -1272,3 +1272,13 @@ def test_warn_if_parameter_server_with_multi_gpu(caplog): training_instance_type=train_instance_type, distributions=distributions ) assert fw_utils.PARAMETER_SERVER_MULTI_GPU_WARNING in caplog.text + + +def test_war_if_parameter_server_with_multi_gpu(caplog): + train_instance_type = "local_gpu" + distributions = {"parameter_server": {"enabled": True}} + + fw_utils.warn_if_parameter_server_with_multi_gpu( + training_instance_type=train_instance_type, distributions=distributions + ) + assert fw_utils.PARAMETER_SERVER_MULTI_GPU_WARNING in caplog.text From c269990b45249b68df7c8cf4ec6d99d1fb3b87d5 Mon Sep 17 00:00:00 2001 From: Chuyang Date: Fri, 12 Jun 2020 11:26:33 -0700 Subject: [PATCH 2/3] Update tests/unit/test_fw_utils.py Co-authored-by: Chaitanya Prakash Bapat --- tests/unit/test_fw_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py index a015e5595e..ac3119413c 100644 --- a/tests/unit/test_fw_utils.py +++ b/tests/unit/test_fw_utils.py @@ -1274,7 +1274,7 @@ def test_warn_if_parameter_server_with_multi_gpu(caplog): assert fw_utils.PARAMETER_SERVER_MULTI_GPU_WARNING in caplog.text -def test_war_if_parameter_server_with_multi_gpu(caplog): +def test_warn_if_parameter_server_with_multi_gpu(caplog): train_instance_type = "local_gpu" distributions = {"parameter_server": {"enabled": True}} From b4d915ba2db42b869a9842190abba4adf25c06af Mon Sep 17 00:00:00 2001 From: Chuyang Deng Date: Fri, 12 Jun 2020 11:49:12 -0700 Subject: [PATCH 3/3] update test name --- tests/unit/test_fw_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py index a015e5595e..ed1aee3ba8 100644 --- a/tests/unit/test_fw_utils.py +++ b/tests/unit/test_fw_utils.py @@ -1274,7 +1274,7 @@ def test_warn_if_parameter_server_with_multi_gpu(caplog): assert fw_utils.PARAMETER_SERVER_MULTI_GPU_WARNING in caplog.text -def test_war_if_parameter_server_with_multi_gpu(caplog): +def test_warn_if_parameter_server_with_local_multi_gpu(caplog): train_instance_type = "local_gpu" distributions = {"parameter_server": {"enabled": True}}