Refactor code

imujjwal96 · imujjwal96 · commit 1dcb705c5379 · 2019-06-08T07:30:25.000Z
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -1123,23 +1123,12 @@ def logs_for_job(self, job_name, wait=False, poll=10):  # noqa: C901 - suppress
 
         description = self.sagemaker_client.describe_training_job(TrainingJobName=job_name)
         print(secondary_training_status_message(description, None), end='')
-        instance_count = description['ResourceConfig']['InstanceCount']
-
-        stream_names = []  # The list of log streams
-        positions = {}     # The current position in each stream, map of stream name -> position
 
-        # Increase retries allowed (from default of 4), as we don't want waiting for a training job
-        # to be interrupted by a transient exception.
-        config = botocore.config.Config(retries={'max_attempts': 15})
-        client = self.boto_session.client('logs', config=config)
-        log_group = '/aws/sagemaker/TrainingJobs'
+        instance_count, stream_names, positions, client, log_group, dot, color_wrap = \
+            _logs_initializer(self, description, job='Training')
 
         state = _get_initial_job_state(description, 'TrainingJobStatus', wait)
 
-        dot = False
-
-        color_wrap = sagemaker.logs.ColorWrap()
-
         # The loop below implements a state machine that alternates between checking the job status and
         # reading whatever is available in the logs at this point. Note, that if we were called with
         # wait == False, we never check the job status.
@@ -1208,23 +1197,12 @@ def logs_for_transform_job(self, job_name, wait=False, poll=10):  # noqa: C901 -
         """
 
         description = self.sagemaker_client.describe_transform_job(TransformJobName=job_name)
-        instance_count = description['TransformResources']['InstanceCount']
 
-        stream_names = []  # The list of log streams
-        positions = {}     # The current position in each stream, map of stream name -> position
-
-        # Increase retries allowed (from default of 4), as we don't want waiting for a training job
-        # to be interrupted by a transient exception.
-        config = botocore.config.Config(retries={'max_attempts': 15})
-        client = self.boto_session.client('logs', config=config)
-        log_group = '/aws/sagemaker/TransformJobs'
+        instance_count, stream_names, positions, client, log_group, dot, color_wrap = \
+            _logs_initializer(self, description, job='Transform')
 
         state = _get_initial_job_state(description, 'TransformJobStatus', wait)
 
-        dot = False
-
-        color_wrap = sagemaker.logs.ColorWrap()
-
         # The loop below implements a state machine that alternates between checking the job status and
         # reading whatever is available in the logs at this point. Note, that if we were called with
         # wait == False, we never check the job status.
@@ -1643,6 +1621,28 @@ def _get_initial_job_state(description, status_key, wait):
     return LogState.TAILING if wait and not job_already_completed else LogState.COMPLETE
 
 
+def _logs_initializer(sagemaker_session, description, job):
+    if job == 'Training':
+        instance_count = description['ResourceConfig']['InstanceCount']
+    elif job == 'Transform':
+        instance_count = description['TransformResources']['InstanceCount']
+
+    stream_names = []  # The list of log streams
+    positions = {}     # The current position in each stream, map of stream name -> position
+
+    # Increase retries allowed (from default of 4), as we don't want waiting for a training job
+    # to be interrupted by a transient exception.
+    config = botocore.config.Config(retries={'max_attempts': 15})
+    client = sagemaker_session.boto_session.client('logs', config=config)
+    log_group = '/aws/sagemaker/' + job + 'Jobs'
+
+    dot = False
+
+    color_wrap = sagemaker.logs.ColorWrap()
+
+    return instance_count, stream_names, positions, client, log_group, dot, color_wrap
+
+
 def _flush_log_streams(stream_names, instance_count, client, log_group, job_name, positions, dot, color_wrap):
     if len(stream_names) < instance_count:
         # Log streams are created whenever a container starts writing to stdout/err, so this list