12
12
# language governing permissions and limitations under the License.
13
13
from __future__ import absolute_import
14
14
15
+ from collections import namedtuple
16
+
15
17
import os
16
18
import re
19
+ import sagemaker .utils
17
20
import shutil
18
21
import tempfile
19
- from collections import namedtuple
20
22
from six .moves .urllib .parse import urlparse
21
23
22
- import sagemaker .utils
23
-
24
24
_TAR_SOURCE_FILENAME = 'source.tar.gz'
25
25
26
26
UploadedCode = namedtuple ('UserCode' , ['s3_prefix' , 'script_name' ])
@@ -112,24 +112,32 @@ def validate_source_dir(script, directory):
112
112
113
113
114
114
def tar_and_upload_dir (session , bucket , s3_key_prefix , script , directory , dependencies = None ):
115
- """Pack and upload source files to S3 only if directory is empty or local.
115
+ """Package source files and upload a compress tar file to S3. The S3 location will be
116
+ ``s3://<bucket>/s3_key_prefix/sourcedir.tar.gz``.
117
+
118
+ If directory is an S3 URI, an UploadedCode object will be returned, but nothing will be
119
+ uploaded to S3 (this allow reuse of code already in S3).
120
+
121
+ If directory is None, the script will be added to the archive at ``./<basename of script>``.
116
122
117
- Note:
118
- If the directory points to S3 no action is taken.
123
+ If directory is not None, the (recursive) contents of the directory will be added to
124
+ the archive. directory is treated as the base path of the archive, and the script name is
125
+ assumed to be a filename or relative path inside the directory.
119
126
120
127
Args:
121
128
session (boto3.Session): Boto session used to access S3.
122
129
bucket (str): S3 bucket to which the compressed file is uploaded.
123
130
s3_key_prefix (str): Prefix for the S3 key.
124
- script (str): Script filename.
125
- directory (str or None ): Directory containing the source file. If it starts with
126
- "s3://", no action is taken.
127
- dependencies (List[str]): A list of paths to directories (absolute or relative)
131
+ script (str): Script filename or path .
132
+ directory (str): Optional. Directory containing the source file. If it starts with "s3://",
133
+ no action is taken.
134
+ dependencies (List[str]): Optional. A list of paths to directories (absolute or relative)
128
135
containing additional libraries that will be copied into
129
136
/opt/ml/lib
130
137
131
138
Returns:
132
- sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and script name.
139
+ sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and
140
+ script name.
133
141
"""
134
142
if directory and directory .lower ().startswith ('s3://' ):
135
143
return UploadedCode (s3_prefix = directory , script_name = os .path .basename (script ))
@@ -141,7 +149,8 @@ def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory, depend
141
149
142
150
try :
143
151
source_files = _list_files_to_compress (script , directory ) + dependencies
144
- tar_file = sagemaker .utils .create_tar_file (source_files , os .path .join (tmp , _TAR_SOURCE_FILENAME ))
152
+ tar_file = sagemaker .utils .create_tar_file (source_files ,
153
+ os .path .join (tmp , _TAR_SOURCE_FILENAME ))
145
154
146
155
session .resource ('s3' ).Object (bucket , key ).upload_file (tar_file )
147
156
finally :
0 commit comments