Refactor code

imujjwal96 · imujjwal96 · commit f4f3e51098c9 · 2019-06-30T14:21:55.000Z
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -1282,25 +1282,13 @@ def logs_for_job(  # noqa: C901 - suppress complexity warning for this method
         """
 
         description = self.sagemaker_client.describe_training_job(TrainingJobName=job_name)
-        print(secondary_training_status_message(description, None), end="")
-        instance_count = description["ResourceConfig"]["InstanceCount"]
-        status = description["TrainingJobStatus"]
+        print(secondary_training_status_message(description, None), end='')
 
-        stream_names = []  # The list of log streams
-        positions = {}  # The current position in each stream, map of stream name -> position
-
-        # Increase retries allowed (from default of 4), as we don't want waiting for a training job
-        # to be interrupted by a transient exception.
-        config = botocore.config.Config(retries={"max_attempts": 15})
-        client = self.boto_session.client("logs", config=config)
-        log_group = "/aws/sagemaker/TrainingJobs"
+        instance_count, stream_names, positions, client, log_group, dot, color_wrap = \
+            _logs_initializer(self, description, job='Training')
 
         state = _get_initial_job_state(description, 'TrainingJobStatus', wait)
 
-        dot = False
-
-        color_wrap = sagemaker.logs.ColorWrap()
-
         # The loop below implements a state machine that alternates between checking the job status and
         # reading whatever is available in the logs at this point. Note, that if we were called with
         # wait == False, we never check the job status.
@@ -1371,23 +1359,12 @@ def logs_for_transform_job(self, job_name, wait=False, poll=10):  # noqa: C901 -
         """
 
         description = self.sagemaker_client.describe_transform_job(TransformJobName=job_name)
-        instance_count = description['TransformResources']['InstanceCount']
 
-        stream_names = []  # The list of log streams
-        positions = {}     # The current position in each stream, map of stream name -> position
-
-        # Increase retries allowed (from default of 4), as we don't want waiting for a training job
-        # to be interrupted by a transient exception.
-        config = botocore.config.Config(retries={'max_attempts': 15})
-        client = self.boto_session.client('logs', config=config)
-        log_group = '/aws/sagemaker/TransformJobs'
+        instance_count, stream_names, positions, client, log_group, dot, color_wrap = \
+            _logs_initializer(self, description, job='Transform')
 
         state = _get_initial_job_state(description, 'TransformJobStatus', wait)
 
-        dot = False
-
-        color_wrap = sagemaker.logs.ColorWrap()
-
         # The loop below implements a state machine that alternates between checking the job status and
         # reading whatever is available in the logs at this point. Note, that if we were called with
         # wait == False, we never check the job status.
@@ -1831,6 +1808,28 @@ def _get_initial_job_state(description, status_key, wait):
     return LogState.TAILING if wait and not job_already_completed else LogState.COMPLETE
 
 
+def _logs_initializer(sagemaker_session, description, job):
+    if job == 'Training':
+        instance_count = description['ResourceConfig']['InstanceCount']
+    elif job == 'Transform':
+        instance_count = description['TransformResources']['InstanceCount']
+
+    stream_names = []  # The list of log streams
+    positions = {}     # The current position in each stream, map of stream name -> position
+
+    # Increase retries allowed (from default of 4), as we don't want waiting for a training job
+    # to be interrupted by a transient exception.
+    config = botocore.config.Config(retries={'max_attempts': 15})
+    client = sagemaker_session.boto_session.client('logs', config=config)
+    log_group = '/aws/sagemaker/' + job + 'Jobs'
+
+    dot = False
+
+    color_wrap = sagemaker.logs.ColorWrap()
+
+    return instance_count, stream_names, positions, client, log_group, dot, color_wrap
+
+
 def _flush_log_streams(stream_names, instance_count, client, log_group, job_name, positions, dot, color_wrap):
     if len(stream_names) < instance_count:
         # Log streams are created whenever a container starts writing to stdout/err, so this list
diff --git a/tests/integ/test_transformer.py b/tests/integ/test_transformer.py
@@ -352,4 +352,3 @@ def _create_transformer_and_transform_job(
         logs=logs,
     )
     return transformer
-
diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py
@@ -935,9 +935,6 @@ def test_logs_for_job_full_lifecycle(time, cw, sagemaker_session_full_lifecycle)
     ]
 
 
-MODEL_NAME = "some-model"
-
-
 @patch('sagemaker.logs.ColorWrap')
 def test_logs_for_transform_job_no_wait(cw, sagemaker_session_complete):
     ims = sagemaker_session_complete
@@ -989,7 +986,6 @@ def test_logs_for_transform_job_full_lifecycle(time, cw, sagemaker_session_full_
 
 
 MODEL_NAME = 'some-model'
->>>>>>> feature: Estimator.fit like logs for transformer
 PRIMARY_CONTAINER = {
     "Environment": {},
     "Image": IMAGE,

Original file line number	Diff line number	Diff line change
`@@ -352,4 +352,3 @@ def _create_transformer_and_transform_job(`
`352`	`352`	`logs=logs,`
`353`	`353`	`)`
`354`	`354`	`return transformer`
`355`		`-`