Black formatted files

Lokiiiiii · Lokiiiiii · commit ff0e615d4636 · 2022-05-19T12:55:34.000-07:00
diff --git a/src/sagemaker_tensorflow_container/training.py b/src/sagemaker_tensorflow_container/training.py
@@ -165,22 +165,22 @@ def train(env, cmd_args):
     multi_worker_mirrored_enabled = env.additional_framework_parameters.get(
         SAGEMAKER_MULTI_WORKER_MIRRORED_ENABLED, False
     )
-    
+
     # Setup
     if parameter_server_enabled:
-        
+
         tf_config = _build_tf_config_for_ps(hosts=env.hosts, current_host=env.current_host)
         logger.info("Running distributed training job with parameter servers")
-    
+
     elif multi_worker_mirrored_enabled:
-        
+
         tf_config = _build_tf_config_for_mwm(hosts=env.hosts, current_host=env.current_host)
         logger.info("Running distributed training job with multi_worker_mirrored setup")
 
 
     # Run
     if parameter_server_enabled:
-        
+
         logger.info("Launching parameter server process")
         _run_ps(env, tf_config["cluster"])
         logger.info("Launching worker process")
diff --git a/test/integration/sagemaker/test_multi_worker_mirrored.py b/test/integration/sagemaker/test_multi_worker_mirrored.py
@@ -14,33 +14,30 @@
 
 import os
 
-import boto3
-import pytest
 from sagemaker.tensorflow import TensorFlow
 from sagemaker.utils import unique_name_from_base
-from six.moves.urllib.parse import urlparse
-
-from timeout import timeout
-
 
 
 RESOURCE_PATH = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
 
 
-
-def test_multi_node(sagemaker_session, instance_type, image_uri, tmpdir, framework_version):
+def test_multi_node(
+    sagemaker_session, instance_type, image_uri, tmpdir, framework_version
+):
     estimator = TensorFlow(
-                        entry_point=os.path.join(RESOURCE_PATH, "multi_worker_mirrored", "train_sample.py"),
-                        role="SageMakerRole",
-                        instance_type=instance_type,
-                        instance_count=2,
-                        image_name=image_uri,
-                        framework_version=framework_version,
-                        py_version="py3",
-                        hyperparameters={
-                                    'sagemaker_multi_worker_mirrored_enabled': True,
-                        },
-                        sagemaker_session=sagemaker_session,
-                    )
+        entry_point=os.path.join(
+            RESOURCE_PATH, "multi_worker_mirrored", "train_sample.py"
+        ),
+        role="SageMakerRole",
+        instance_type=instance_type,
+        instance_count=2,
+        image_name=image_uri,
+        framework_version=framework_version,
+        py_version="py3",
+        hyperparameters={
+            "sagemaker_multi_worker_mirrored_enabled": True,
+        },
+        sagemaker_session=sagemaker_session,
+    )
     estimator.fit(job_name=unique_name_from_base("test-tf-mwms"))
-    raise NotImplementedError('Yet to add assertion')
+    raise NotImplementedError("Yet to add assertion")
diff --git a/test/resources/multi_worker_mirrored/train_sample.py b/test/resources/multi_worker_mirrored/train_sample.py
@@ -1,20 +1,25 @@
 import tensorflow as tf
-
+import numpy as np
 
 
 strategy = tf.distribute.MultiWorkerMirroredStrategy()
 
 with strategy.scope():
-  model = tf.keras.Sequential([
-    tf.keras.layers.Dense(2, input_shape=(5,)),
-  ])
-  optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
+    model = tf.keras.Sequential(
+        [
+            tf.keras.layers.Dense(2, input_shape=(5,)),
+        ]
+    )
+    optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
+
 
 def dataset_fn(ctx):
-  x = np.random.random((2, 5)).astype(np.float32)
-  y = np.random.randint(2, size=(2, 1))
-  dataset = tf.data.Dataset.from_tensor_slices((x, y))
-  return dataset.repeat().batch(1, drop_remainder=True)
+    x = np.random.random((2, 5)).astype(np.float32)
+    y = np.random.randint(2, size=(2, 1))
+    dataset = tf.data.Dataset.from_tensor_slices((x, y))
+    return dataset.repeat().batch(1, drop_remainder=True)
+
+
 dist_dataset = strategy.distribute_datasets_from_function(dataset_fn)
 
 model.compile()
diff --git a/test/unit/test_training.py b/test/unit/test_training.py
@@ -35,9 +35,7 @@
     "worker": ["{}:8890".format(HOST2)],
     "ps": ["{}:2223".format(HOST1), "{}:2223".format(HOST2)],
 }
-CLUSTER_WITH_MWMS = {
-    "worker": ["{}:8890".format(HOST) for HOST IN (HOST1, HOST2)],
-}
+CLUSTER_WITH_MWMS = {"worker": ["{}:8890".format(HOST) for HOST in HOST_LIST]}
 
 MASTER_TASK = {"index": 0, "type": "master"}
 WORKER_TASK = {"index": 0, "type": "worker"}
@@ -54,7 +52,9 @@ def distributed_training_env():
     env = simple_training_env()
 
     env.hosts = HOST_LIST
-    env.additional_framework_parameters = {training.SAGEMAKER_PARAMETER_SERVER_ENABLED: True}
+    env.additional_framework_parameters = {
+        training.SAGEMAKER_PARAMETER_SERVER_ENABLED: True
+    }
     return env
 
 
@@ -98,7 +98,9 @@ def test_single_machine(run_module, single_machine_training_env):
 
 @patch("sagemaker_training.entry_point.run")
 def test_train_horovod(run_module, single_machine_training_env):
-    single_machine_training_env.additional_framework_parameters["sagemaker_mpi_enabled"] = True
+    single_machine_training_env.additional_framework_parameters[
+        "sagemaker_mpi_enabled"
+    ] = True
 
     training.train(single_machine_training_env, MODEL_DIR_CMD_LIST)
     run_module.assert_called_with(
@@ -113,22 +115,32 @@ def test_train_horovod(run_module, single_machine_training_env):
 
 @pytest.mark.skip_on_pipeline
 @pytest.mark.skipif(
-    sys.version_info.major != 3, reason="Skip this for python 2 because of dict key order mismatch"
+    sys.version_info.major != 3,
+    reason="Skip this for python 2 because of dict key order mismatch",
 )
 @patch("tensorflow.train.ClusterSpec")
 @patch("tensorflow.train.Server")
 @patch("sagemaker_training.entry_point.run")
 @patch("multiprocessing.Process", lambda target: target())
 @patch("time.sleep", MagicMock())
-def test_train_distributed_master(run, tf_server, cluster_spec, distributed_training_env):
+def test_train_distributed_master(
+    run, tf_server, cluster_spec, distributed_training_env
+):
     training.train(distributed_training_env, MODEL_DIR_CMD_LIST)
 
     cluster_spec.assert_called_with(
-        {"worker": ["host2:2222"], "master": ["host1:2222"], "ps": ["host1:2223", "host2:2223"]}
+        {
+            "worker": ["host2:2222"],
+            "master": ["host1:2222"],
+            "ps": ["host1:2223", "host2:2223"],
+        }
     )
 
     tf_server.assert_called_with(
-        cluster_spec(), job_name="ps", task_index=0, config=tf.ConfigProto(device_count={"GPU": 0})
+        cluster_spec(),
+        job_name="ps",
+        task_index=0,
+        config=tf.ConfigProto(device_count={"GPU": 0}),
     )
     tf_server().join.assert_called_with()
 
@@ -152,24 +164,34 @@ def test_train_distributed_master(run, tf_server, cluster_spec, distributed_trai
 
 @pytest.mark.skip_on_pipeline
 @pytest.mark.skipif(
-    sys.version_info.major != 3, reason="Skip this for python 2 because of dict key order mismatch"
+    sys.version_info.major != 3,
+    reason="Skip this for python 2 because of dict key order mismatch",
 )
 @patch("tensorflow.train.ClusterSpec")
 @patch("tensorflow.train.Server")
 @patch("sagemaker_training.entry_point.run")
 @patch("multiprocessing.Process", lambda target: target())
 @patch("time.sleep", MagicMock())
-def test_train_distributed_worker(run, tf_server, cluster_spec, distributed_training_env):
+def test_train_distributed_worker(
+    run, tf_server, cluster_spec, distributed_training_env
+):
     distributed_training_env.current_host = HOST2
 
     training.train(distributed_training_env, MODEL_DIR_CMD_LIST)
 
     cluster_spec.assert_called_with(
-        {"worker": ["host2:2222"], "master": ["host1:2222"], "ps": ["host1:2223", "host2:2223"]}
+        {
+            "worker": ["host2:2222"],
+            "master": ["host1:2222"],
+            "ps": ["host1:2223", "host2:2223"],
+        }
     )
 
     tf_server.assert_called_with(
-        cluster_spec(), job_name="ps", task_index=1, config=tf.ConfigProto(device_count={"GPU": 0})
+        cluster_spec(),
+        job_name="ps",
+        task_index=1,
+        config=tf.ConfigProto(device_count={"GPU": 0}),
     )
     tf_server().join.assert_called_with()
 
@@ -248,8 +270,9 @@ def test_build_tf_config_for_ps():
 def test_build_tf_config_for_ps_error():
     with pytest.raises(ValueError) as error:
         training._build_tf_config_for_ps([HOST1], HOST1, ps_task=True)
-    assert "Cannot have a ps task if there are no parameter servers in the cluster" in str(
-        error.value
+    assert (
+        "Cannot have a ps task if there are no parameter servers in the cluster"
+        in str(error.value)
     )
 
 
@@ -271,7 +294,9 @@ def test_log_model_missing_warning_no_model(logger):
 
 @patch("sagemaker_tensorflow_container.training.logger")
 def test_log_model_missing_warning_wrong_format(logger):
-    training._log_model_missing_warning(os.path.join(RESOURCE_PATH, "test_dir_wrong_model"))
+    training._log_model_missing_warning(
+        os.path.join(RESOURCE_PATH, "test_dir_wrong_model")
+    )
     logger.warn.assert_called_with(
         "Your model will NOT be servable with SageMaker TensorFlow Serving container. "
         "The model artifact was not saved in the TensorFlow "
@@ -282,16 +307,22 @@ def test_log_model_missing_warning_wrong_format(logger):
 
 @patch("sagemaker_tensorflow_container.training.logger")
 def test_log_model_missing_warning_wrong_parent_dir(logger):
-    training._log_model_missing_warning(os.path.join(RESOURCE_PATH, "test_dir_wrong_parent_dir"))
+    training._log_model_missing_warning(
+        os.path.join(RESOURCE_PATH, "test_dir_wrong_parent_dir")
+    )
     logger.warn.assert_called_with(
         "Your model will NOT be servable with SageMaker TensorFlow Serving containers. "
-        'The SavedModel bundle is under directory "{}", not a numeric name.'.format("not-digit")
+        'The SavedModel bundle is under directory "{}", not a numeric name.'.format(
+            "not-digit"
+        )
     )
 
 
 @patch("sagemaker_tensorflow_container.training.logger")
 def test_log_model_missing_warning_correct(logger):
-    training._log_model_missing_warning(os.path.join(RESOURCE_PATH, "test_dir_correct_model"))
+    training._log_model_missing_warning(
+        os.path.join(RESOURCE_PATH, "test_dir_correct_model")
+    )
     logger.warn.assert_not_called()
 
 
@@ -323,7 +354,10 @@ def test_main(
 @patch("sagemaker_tensorflow_container.training.train")
 @patch("logging.Logger.setLevel")
 @patch("sagemaker_training.environment.Environment")
-@patch("sagemaker_training.environment.read_hyperparameters", return_value={"model_dir": MODEL_DIR})
+@patch(
+    "sagemaker_training.environment.read_hyperparameters",
+    return_value={"model_dir": MODEL_DIR},
+)
 @patch("sagemaker_tensorflow_container.s3_utils.configure")
 def test_main_simple_training_model_dir(
     configure_s3_env,
@@ -361,7 +395,9 @@ def test_main_tuning_model_dir(
     training_env.return_value = single_machine_training_env
     os.environ["SAGEMAKER_REGION"] = REGION
     training.main()
-    expected_model_dir = "{}/{}/model".format(MODEL_DIR, single_machine_training_env.job_name)
+    expected_model_dir = "{}/{}/model".format(
+        MODEL_DIR, single_machine_training_env.job_name
+    )
     configure_s3_env.assert_called_once_with(expected_model_dir, REGION)