aws · knakad · Jul 22, 2019 · Jul 22, 2019 · Jul 22, 2019 · Jul 22, 2019
@@ -83,7 +83,6 @@ disable=
     too-many-arguments,
     invalid-name,
     too-many-instance-attributes,
-    line-too-long, # We let Flake8 take care of this # TODO: Fix these and stop relying on flake8
     len-as-condition, # TODO: Enable this check once pylint 2.4.0 is released and consumed due to the fix in https://github.com/PyCQA/pylint/issues/2684
     import-error, # Since we run Pylint before any of our builds in tox, this will always fail
     protected-access, # TODO: Fix access
@@ -228,8 +227,9 @@ max-nested-blocks=5
 # Maximum number of characters on a single line.
 max-line-length=100
 
-# Regexp for a line that is allowed to be longer than the limit.
-ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+# Regexp for a line that is allowed to be longer than the limit. Can only be a single regex.
+# The following matches any semblance of a url of any sort.
+ignore-long-lines=^\s*.*(# )?(<?https?:\/\/)?[-a-zA-Z0-9@:%%._\+~#=><]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%%_\+.~#?&\/\/=<>]*\S*>?$
 
 # Allow the body of an if to be on the same line as the test if there is no
 # else.

@@ -128,7 +128,7 @@ def __init__(
                     (default: None). If specified, the estimator will create a channel pointing to
                     the model so the training job can download it. This model
                     can be a 'model.tar.gz' from a previous training job, or
-                    other artifacts coming from a different source. More
+                    other artifacts coming from a different source.
                     More information:
                     https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization
 

@@ -83,8 +83,8 @@ class LinearLearner(AmazonAlgorithmEstimatorBase):
             "softmax_loss",
             "auto",
         ),
-        '"logistic", "squared_loss", "absolute_loss", "hinge_loss", "eps_insensitive_squared_loss", '
-        '"eps_insensitive_absolute_loss", "quantile_loss", "huber_loss", "softmax_loss" or "auto"',
+        '"logistic", "squared_loss", "absolute_loss", "hinge_loss", "eps_insensitive_squared_loss",'
+        ' "eps_insensitive_absolute_loss", "quantile_loss", "huber_loss", "softmax_loss" or "auto"',
         str,
     )
     wd = hp("wd", ge(0), "A float greater-than or equal to 0", float)
@@ -369,7 +369,8 @@ def __init__(
             num_classes is None or num_classes < 3
         ):
             raise ValueError(
-                "For predictor_type 'multiclass_classifier', 'num_classes' should be set to a value greater than 2."
+                "For predictor_type 'multiclass_classifier', 'num_classes' should be set to a "
+                "value greater than 2."
             )
 
     def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT):

@@ -176,7 +176,7 @@ def description(self, force_refresh=False):
         if force_refresh:
             self.clear_cache()
         if not self._tuning_job_describe_result:
-            self._tuning_job_describe_result = self._sage_client.describe_hyper_parameter_tuning_job(
+            self._tuning_job_describe_result = self._sage_client.describe_hyper_parameter_tuning_job(  # noqa: E501 # pylint: disable=line-too-long
                 HyperParameterTuningJobName=self.name
             )
         return self._tuning_job_describe_result
@@ -288,10 +288,12 @@ def _determine_timeinterval(self):
         description = self._sage_client.describe_training_job(TrainingJobName=self.name)
         start_time = self._start_time or description[u"TrainingStartTime"]  # datetime object
         # Incrementing end time by 1 min since CloudWatch drops seconds before finding the logs.
-        # This results in logs being searched in the time range in which the correct log line was not present.
+        # This results in logs being searched in the time range in which the correct log line was
+        # not present.
         # Example - Log time - 2018-10-22 08:25:55
-        #           Here calculated end time would also be 2018-10-22 08:25:55 (without 1 min addition)
-        #           CW will consider end time as 2018-10-22 08:25 and will not be able to search the correct log.
+        #       Here calculated end time would also be 2018-10-22 08:25:55 (without 1 min addition)
+        #       CW will consider end time as 2018-10-22 08:25 and will not be able to search the
+        #           correct log.
         end_time = self._end_time or description.get(
             u"TrainingEndTime", datetime.datetime.utcnow()
         ) + datetime.timedelta(minutes=1)

@@ -177,8 +177,9 @@ def create_model(
                 the model. Default: use subnets and security groups from this Estimator.
                 * 'Subnets' (list[str]): List of subnet ids.
                 * 'SecurityGroupIds' (list[str]): List of security group ids.
-            entry_point (str): Path (absolute or relative) to the local Python source file which should be executed
-                as the entry point to training. If not specified, the training entry point is used.
+            entry_point (str): Path (absolute or relative) to the local Python source file which
+                should be executed as the entry point to training. If not specified, the training
+                entry point is used.
             source_dir (str): Path (absolute or relative) to a directory with any other serving
                 source code dependencies aside from the entry point file.
                 If not specified, the model source directory from training is used.

@@ -279,15 +279,13 @@ def fit(self, inputs=None, wait=True, logs=True, job_name=None):
 
                 * (str) the S3 location where training data is saved.
 
-                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) If using multiple channels for
-                      training data, you can specify a dict mapping channel
-                      names to strings or :func:`~sagemaker.session.s3_input`
-                      objects.
-
-                * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide
-                      additional information as well as the path to the training
-                      dataset. See :func:`sagemaker.session.s3_input` for full
-                      details.
+                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) If using multiple
+                    channels for training data, you can specify a dict mapping channel names to
+                    strings or :func:`~sagemaker.session.s3_input` objects.
+
+                * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can
+                    provide additional information as well as the path to the training dataset.
+                    See :func:`sagemaker.session.s3_input` for full details.
             wait (bool): Whether the call should wait until the job completes
                 (default: True).
             logs (bool): Whether to show the logs produced by the job. Only
@@ -990,9 +988,9 @@ def create_model(
     ):
         """Create a model to deploy.
 
-        The serializer, deserializer, content_type, and accept arguments are only used to define a default
-        RealTimePredictor. They are ignored if an explicit predictor class is passed in. Other arguments
-        are passed through to the Model class.
+        The serializer, deserializer, content_type, and accept arguments are only used to define a
+        default RealTimePredictor. They are ignored if an explicit predictor class is passed in.
+        Other arguments are passed through to the Model class.
 
         Args:
             role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``,
@@ -1562,8 +1560,9 @@ def transformer(
                 worker per vCPU.
             volume_kms_key (str): Optional. KMS key ID for encrypting the volume
                 attached to the ML compute instance (default: None).
-            entry_point (str): Path (absolute or relative) to the local Python source file which should be executed
-                as the entry point to training. If not specified, the training entry point is used.
+            entry_point (str): Path (absolute or relative) to the local Python source file which
+                should be executed as the entry point to training. If not specified, the training
+                entry point is used.
 
         Returns:
             sagemaker.transformer.Transformer: a ``Transformer`` object that can be used to start a

@@ -185,8 +185,9 @@ def create_image_uri(
     else:
         family = instance_type.split(".")[1]
 
-        # For some frameworks, we have optimized images for specific families, e.g c5 or p3. In those cases,
-        # we use the family name in the image tag. In other cases, we use 'cpu' or 'gpu'.
+        # For some frameworks, we have optimized images for specific families, e.g c5 or p3.
+        # In those cases, we use the family name in the image tag. In other cases, we use
+        # 'cpu' or 'gpu'.
         if family in optimized_families:
             device_type = family
         elif family[0] in ["g", "p"]:
@@ -340,6 +341,7 @@ def _list_files_to_compress(script, directory):
 
 
 def framework_name_from_image(image_name):
+    # noinspection LongLine
     """Extract the framework and Python version from the image name.
 
     Args:
@@ -357,15 +359,15 @@ def framework_name_from_image(image_name):
         tuple: A tuple containing:
             str: The framework name str: The Python version str: The image tag
             str: If the image is script mode
-    """
+        """
     sagemaker_pattern = re.compile(ECR_URI_PATTERN)
     sagemaker_match = sagemaker_pattern.match(image_name)
     if sagemaker_match is None:
         return None, None, None, None
     # extract framework, python version and image tag
     # We must support both the legacy and current image name format.
     name_pattern = re.compile(
-        r"^(?:sagemaker(?:-rl)?-)?(tensorflow|mxnet|chainer|pytorch|scikit-learn)(?:-)?(scriptmode|training)?:(.*)-(.*?)-(py2|py3)$"  # noqa: E501
+        r"^(?:sagemaker(?:-rl)?-)?(tensorflow|mxnet|chainer|pytorch|scikit-learn)(?:-)?(scriptmode|training)?:(.*)-(.*?)-(py2|py3)$"  # noqa: E501 # pylint: disable=line-too-long
     )
     legacy_name_pattern = re.compile(r"^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu):(.*)$")
 

@@ -256,7 +256,8 @@ def _get_container_environment(self, **kwargs):
         # we only do 1 max concurrent transform in Local Mode
         if "MaxConcurrentTransforms" in kwargs and int(kwargs["MaxConcurrentTransforms"]) > 1:
             logger.warning(
-                "Local Mode only supports 1 ConcurrentTransform. Setting MaxConcurrentTransforms to 1"
+                "Local Mode only supports 1 ConcurrentTransform. Setting MaxConcurrentTransforms "
+                "to 1"
             )
         environment["SAGEMAKER_MAX_CONCURRENT_TRANSFORMS"] = "1"
 
@@ -287,8 +288,9 @@ def _get_required_defaults(self, **kwargs):
 
     def _get_working_directory(self):
         """Placeholder docstring"""
-        # Root dir to use for intermediate data location. To make things simple we will write here regardless
-        # of the final destination. At the end the files will either be moved or uploaded to S3 and deleted.
+        # Root dir to use for intermediate data location. To make things simple we will write here
+        # regardless of the final destination. At the end the files will either be moved or
+        # uploaded to S3 and deleted.
         root_dir = get_config_value("local.container_root", self.local_session.config)
         if root_dir:
             root_dir = os.path.abspath(root_dir)
@@ -313,8 +315,8 @@ def _prepare_data_transformation(self, input_data, batch_strategy):
 
     def _perform_batch_inference(self, input_data, output_data, **kwargs):
         # Transform the input data to feed the serving container. We need to first gather the files
-        # from S3 or Local FileSystem. Split them as required (Line, RecordIO, None) and finally batch them
-        # according to the batch strategy and limit the request size.
+        # from S3 or Local FileSystem. Split them as required (Line, RecordIO, None) and finally
+        # batch them according to the batch strategy and limit the request size.
 
         """
         Args:

@@ -81,8 +81,8 @@ def __init__(self, instance_type, instance_count, image, sagemaker_session=None)
         self.instance_type = instance_type
         self.instance_count = instance_count
         self.image = image
-        # Since we are using a single docker network, Generate a random suffix to attach to the container names.
-        #  This way multiple jobs can run in parallel.
+        # Since we are using a single docker network, Generate a random suffix to attach to the
+        # container names. This way multiple jobs can run in parallel.
         suffix = "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(5))
         self.hosts = [
             "{}-{}-{}".format(CONTAINER_PREFIX, i, suffix)
@@ -160,8 +160,8 @@ def train(self, input_data_config, output_data_config, hyperparameters, job_name
             dirs_to_delete = [data_dir, shared_dir]
             self._cleanup(dirs_to_delete)
 
-        # Print our Job Complete line to have a similar experience to training on SageMaker where you
-        # see this line at the end.
+        # Print our Job Complete line to have a similar experience to training on SageMaker where
+        # you see this line at the end.
         print("===== Job Complete =====")
         return artifacts
 
@@ -702,10 +702,10 @@ def _delete_tree(path):
     try:
         shutil.rmtree(path)
     except OSError as exc:
-        # on Linux, when docker writes to any mounted volume, it uses the container's user. In most cases
-        # this is root. When the container exits and we try to delete them we can't because root owns those
-        # files. We expect this to happen, so we handle EACCESS. Any other error we will raise the
-        # exception up.
+        # on Linux, when docker writes to any mounted volume, it uses the container's user. In most
+        # cases this is root. When the container exits and we try to delete them we can't because
+        # root owns those files. We expect this to happen, so we handle EACCESS. Any other error
+        # we will raise the exception up.
         if exc.errno == errno.EACCES:
             logger.warning("Failed to delete: %s Please remove it manually.", path)
         else:
@@ -724,13 +724,14 @@ def _aws_credentials(session):
         secret_key = creds.secret_key
         token = creds.token
 
-        # The presence of a token indicates the credentials are short-lived and as such are risky to be used as they
-        # might expire while running.
+        # The presence of a token indicates the credentials are short-lived and as such are risky
+        # to be used as they might expire while running.
         # Long-lived credentials are available either through
         # 1. boto session
-        # 2. EC2 Metadata Service (SageMaker Notebook instances or EC2 instances with roles attached them)
-        # Short-lived credentials available via boto session are permitted to support running on machines with no
-        # EC2 Metadata Service but a warning is provided about their danger
+        # 2. EC2 Metadata Service (SageMaker Notebook instances or EC2 instances with roles
+        #       attached them)
+        # Short-lived credentials available via boto session are permitted to support running on
+        # machines with no EC2 Metadata Service but a warning is provided about their danger
         if token is None:
             logger.info("Using the long-lived AWS credentials found in session")
             return [
@@ -739,15 +740,17 @@ def _aws_credentials(session):
             ]
         if not _aws_credentials_available_in_metadata_service():
             logger.warning(
-                "Using the short-lived AWS credentials found in session. They might expire while running."
+                "Using the short-lived AWS credentials found in session. They might expire while "
+                "running."
             )
             return [
                 "AWS_ACCESS_KEY_ID=%s" % (str(access_key)),
                 "AWS_SECRET_ACCESS_KEY=%s" % (str(secret_key)),
                 "AWS_SESSION_TOKEN=%s" % (str(token)),
             ]
         logger.info(
-            "No AWS credentials found in session but credentials from EC2 Metadata Service are available."
+            "No AWS credentials found in session but credentials from EC2 Metadata Service are "
+            "available."
         )
         return None
     except Exception as e:  # pylint: disable=broad-except

@@ -82,9 +82,8 @@ def create_training_job(
           OutputDataConfig(dict): Identifies the location where you want to save the results of
             model training.
           ResourceConfig(dict): Identifies the resources to use for local model training.
-          HyperParameters(dict) [optional]: Specifies these algorithm-specific parameters to influence the
-            quality of
-        the final model.
+          HyperParameters(dict) [optional]: Specifies these algorithm-specific parameters to
+            influence the quality of the final model.
           **kwargs:
 
         Returns:

@@ -406,9 +406,9 @@ def deploy(
                 this model completes (default: True).
 
         Returns:
-            callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on
-                the created endpoint name, if ``self.predictor_cls`` is not
-                None. Otherwise, return None.
+            callable[string, sagemaker.session.Session] or None: Invocation of
+                ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls``
+                is not None. Otherwise, return None.
         """
         if not self.sagemaker_session:
             if instance_type in ("local", "local_gpu"):