aws · mvsusp · Nov 16, 2018 · Nov 16, 2018 · Nov 16, 2018 · Nov 16, 2018
@@ -14,6 +14,7 @@ CHANGELOG
 * feature: HyperparameterTuner: Make input channels optional
 * feature: Add support for Chainer 5.0
 * feature: Estimator: add support for MetricDefinitions
+* feature: source_dir accepts a list of directories
 
 1.14.2
 ======

@@ -145,10 +145,10 @@ Optional arguments
 
 The following are optional arguments. When you create a ``Chainer`` object, you can specify these as keyword arguments.
 
--  ``source_dir`` Path (absolute or relative) to a directory with any
-   other training source code dependencies including the entry point
-   file. Structure within this directory will be preserved when training
-   on SageMaker.
+-  ``source_dir`` Single path (absolute or relative) or a list of paths
+    to directories with any other training source code dependencies
+    aside from the entry point file (default: None). The structures
+    within this directories are preserved when training on Amazon SageMaker.
 -  ``hyperparameters`` Hyperparameters that will be used for training.
    Will be made accessible as a dict[str, str] to the training code on
    SageMaker. For convenience, accepts other types besides str, but

@@ -64,9 +64,9 @@ def __init__(self, entry_point, use_mpi=None, num_processes=None, process_slots_
                 set to the number of GPUs on the instance (on GPU instances), or one (on CPU instances).
             additional_mpi_options (str): String of options to the 'mpirun' command used to run the entry point.
                 For example, '-X NCCL_DEBUG=WARN' will pass that option string to the mpirun command.
-            source_dir (str): Path (absolute or relative) to a directory with any other training
-                source code dependencies aside from tne entry point file (default: None). Structure within this
-                directory are preserved when training on Amazon SageMaker.
+            source_dir (str or [str]): Single path (absolute or relative) or a list of paths to directories with
+                any other training source code dependencies aside from the entry point file (default: None).
+                The structures within this directories are preserved when training on Amazon SageMaker.
             hyperparameters (dict): Hyperparameters that will be used for training (default: None).
                 The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
                 For convenience, this accepts other types for keys and values, but ``str()`` will be called

@@ -632,9 +632,9 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
         Args:
             entry_point (str): Path (absolute or relative) to the Python source file which should be executed
                 as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
-            source_dir (str): Path (absolute or relative) to a directory with any other training
-                source code dependencies aside from tne entry point file (default: None). Structure within this
-                directory are preserved when training on Amazon SageMaker.
+            source_dir (str or [str]): Single path (absolute or relative) or a list of paths to directories with
+                any other training source code dependencies aside from the entry point file (default: None).
+                The structures within this directories are preserved when training on Amazon SageMaker.
             hyperparameters (dict): Hyperparameters that will be used for training (default: None).
                 The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
                 For convenience, this accepts other types for keys and values, but ``str()`` will be called
@@ -651,6 +651,14 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
             **kwargs: Additional kwargs passed to the ``EstimatorBase`` constructor.
         """
         super(Framework, self).__init__(**kwargs)
+
+        if isinstance(source_dir, list):
+            self.source_dir = source_dir[0]
+            self._additional_files = source_dir[1:]
+        else:
+            self.source_dir = source_dir
+            self._additional_files = []
+
         self.source_dir = source_dir
         self.entry_point = entry_point
         if enable_cloudwatch_metrics:
@@ -718,7 +726,8 @@ def _stage_user_code_in_s3(self):
                                   bucket=code_bucket,
                                   s3_key_prefix=code_s3_prefix,
                                   script=self.entry_point,
-                                  directory=self.source_dir)
+                                  directory=self.source_dir,
+                                  additional_files=self._additional_files)
 
     def _model_source_dir(self):
         """Get the appropriate value to pass as source_dir to model constructor on deploying

@@ -107,7 +107,7 @@ def validate_source_dir(script, directory):
     return True
 
 
-def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory):
+def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory, additional_files=None):
     """Pack and upload source files to S3 only if directory is empty or local.
 
     Note:
@@ -118,31 +118,43 @@ def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory):
         bucket (str): S3 bucket to which the compressed file is uploaded.
         s3_key_prefix (str): Prefix for the S3 key.
         script (str): Script filename.
-        directory (str): Directory containing the source file. If it starts with "s3://", no action is taken.
+        directory (str or None): Directory containing the source file. If it starts with "s3://", no action is taken.
 
     Returns:
         sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and script name.
     """
-    if directory:
-        if directory.lower().startswith("s3://"):
-            return UploadedCode(s3_prefix=directory, script_name=os.path.basename(script))
-        else:
-            script_name = script
-            source_files = [os.path.join(directory, name) for name in os.listdir(directory)]
+    key = '%s/sourcedir.tar.gz' % s3_key_prefix
+
+    if directory and directory.lower().startswith("s3://"):
+        return UploadedCode(s3_prefix=directory, script_name=os.path.basename(script))
     else:
-        # If no directory is specified, the script parameter needs to be a valid relative path.
-        os.path.exists(script)
-        script_name = os.path.basename(script)
-        source_files = [script]
+        source_files = _list_root_files(script, directory, additional_files)
+        _upload_code(session, bucket, key, source_files)
+
+        script_name = script if directory else os.path.basename(script)
+        return UploadedCode(s3_prefix='s3://%s/%s' % (bucket, key), script_name=script_name)
 
-    s3 = session.resource('s3')
-    key = '{}/{}'.format(s3_key_prefix, 'sourcedir.tar.gz')
 
+def _upload_code(session, bucket, key, source_files):
     tar_file = sagemaker.utils.create_tar_file(source_files)
-    s3.Object(bucket, key).upload_file(tar_file)
-    os.remove(tar_file)
 
-    return UploadedCode(s3_prefix='s3://{}/{}'.format(bucket, key), script_name=script_name)
+    try:
+        session.resource('s3').Object(bucket, key).upload_file(tar_file)
+    finally:
+        os.remove(tar_file)
+
+
+def _list_root_files(script, directory, additional_files):
+    additional_files = additional_files or []
+    basedir = directory if directory else os.path.dirname(script)
+    files = [basedir] + additional_files
+
+    for file in files:
+        if os.path.isfile(file):
+            yield file
+        else:
+            for name in os.listdir(file):
+                yield os.path.join(file, name)
 
 
 def framework_name_from_image(image_name):

@@ -136,9 +136,8 @@ def __init__(self, model_data, image, role, entry_point, source_dir=None, predic
             role (str): An IAM role name or ARN for SageMaker to access AWS resources on your behalf.
             entry_point (str): Path (absolute or relative) to the Python source file which should be executed
                 as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5.
-            source_dir (str): Path (absolute or relative) to a directory with any other training
-                source code dependencies aside from tne entry point file (default: None). Structure within this
-                directory will be preserved when training on SageMaker.
+            source_dir (str or [str]): Single path (absolute or relative) or a list of paths to directories with
+                any other training source code dependencies aside from the entry point file (default: None).
                 If the directory points to S3, no code will be uploaded and the S3 location will be used instead.
             predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create
                a predictor (default: None). If not None, ``deploy`` will return the result of invoking
@@ -158,8 +157,14 @@ def __init__(self, model_data, image, role, entry_point, source_dir=None, predic
         """
         super(FrameworkModel, self).__init__(model_data, image, role, predictor_cls=predictor_cls, env=env, name=name,
                                              sagemaker_session=sagemaker_session, **kwargs)
+        if isinstance(source_dir, list):
+            self.source_dir = source_dir[0]
+            self._additional_files = source_dir[1:]
+        else:
+            self.source_dir = source_dir
+            self._additional_files = []
+
         self.entry_point = entry_point
-        self.source_dir = source_dir
         self.enable_cloudwatch_metrics = enable_cloudwatch_metrics
         self.container_log_level = container_log_level
         if code_location:

@@ -267,10 +267,10 @@ Optional arguments
 
 The following are optional arguments. When you create an ``MXNet`` object, you can specify these as keyword arguments.
 
--  ``source_dir`` Path (absolute or relative) to a directory with any
-   other training source code dependencies including the entry point
-   file. Structure within this directory will be preserved when training
-   on SageMaker.
+-  ``source_dir`` Single path (absolute or relative) or a list of paths
+    to directories with any other training source code dependencies
+    aside from the entry point file (default: None). The structures
+    within this directories are preserved when training on Amazon SageMaker.
 -  ``hyperparameters`` Hyperparameters that will be used for training.
    Will be made accessible as a dict[str, str] to the training code on
    SageMaker. For convenience, accepts other types besides str, but

@@ -50,9 +50,9 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_versio
         Args:
             entry_point (str): Path (absolute or relative) to the Python source file which should be executed
                 as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
-            source_dir (str): Path (absolute or relative) to a directory with any other training
-                source code dependencies aside from tne entry point file (default: None). Structure within this
-                directory are preserved when training on Amazon SageMaker.
+            source_dir (str or [str]): Single path (absolute or relative) or a list of paths to directories with
+                any other training source code dependencies aside from the entry point file (default: None).
+                The structures within this directories are preserved when training on Amazon SageMaker.
             hyperparameters (dict): Hyperparameters that will be used for training (default: None).
                 The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
                 For convenience, this accepts other types for keys and values, but ``str()`` will be called

@@ -171,10 +171,10 @@ Optional arguments
 
 The following are optional arguments. When you create a ``PyTorch`` object, you can specify these as keyword arguments.
 
--  ``source_dir`` Path (absolute or relative) to a directory with any
-   other training source code dependencies including the entry point
-   file. Structure within this directory will be preserved when training
-   on SageMaker.
+-  ``source_dir`` Single path (absolute or relative) or a list of paths
+    to directories with any other training source code dependencies
+    aside from the entry point file (default: None). The structures
+    within this directories are preserved when training on Amazon SageMaker.
 -  ``hyperparameters`` Hyperparameters that will be used for training.
    Will be made accessible as a dict[str, str] to the training code on
    SageMaker. For convenience, accepts other types besides strings, but

@@ -47,9 +47,9 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, py_versio
         Args:
             entry_point (str): Path (absolute or relative) to the Python source file which should be executed
                 as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
-            source_dir (str): Path (absolute or relative) to a directory with any other training
-                source code dependencies aside from tne entry point file (default: None). Structure within this
-                directory are preserved when training on Amazon SageMaker.
+            source_dir (str or [str]): Single path (absolute or relative) or a list of paths to directories with
+                any other training source code dependencies aside from the entry point file (default: None).
+                The structures within this directories are preserved when training on Amazon SageMaker.
             hyperparameters (dict): Hyperparameters that will be used for training (default: None).
                 The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
                 For convenience, this accepts other types for keys and values, but ``str()`` will be called

@@ -405,10 +405,10 @@ Optional Arguments
 The following are optional arguments. When you create a ``TensorFlow`` object,
 you can specify these as keyword arguments.
 
--  ``source_dir (str)`` Path (absolute or relative) to a directory with any
-   other training source code dependencies including the entry point
-   file. Structure within this directory will be preserved when training
-   on SageMaker.
+-  ``source_dir (str)`` Single path (absolute or relative) or a list of paths
+    to directories with any other training source code dependencies
+    aside from the entry point file (default: None). The structures
+    within this directories are preserved when training on Amazon SageMaker.
 -  ``requirements_file (str)`` Path to a ``requirements.txt`` file. The path should
    be within and relative to ``source_dir``. This is a file containing a list of items to be
    installed using pip install. Details on the format can be found in the