Skip to content

Pass kms id as parameter for uploading code with Server side encryption #693

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 13, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
CHANGELOG
=========

1.18.5dev
======
1.18.5.dev
==========

* bug-fix: pass kms id as parameter for uploading code with Server side encryption
* feature: ``PipelineModel``: Create a Transformer from a PipelineModel

1.18.4
Expand Down
9 changes: 7 additions & 2 deletions src/sagemaker/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -863,18 +863,23 @@ def _stage_user_code_in_s3(self):

"""
if self.code_location is None:
code_bucket = self.sagemaker_session.default_bucket()
code_bucket, _ = parse_s3_url(self.output_path)
code_s3_prefix = '{}/source'.format(self._current_job_name)
else:
code_bucket, key_prefix = parse_s3_url(self.code_location)
code_s3_prefix = '/'.join(filter(None, [key_prefix, self._current_job_name, 'source']))

output_bucket, _ = parse_s3_url(self.output_path)

kms_key = self.output_kms_key if code_bucket == output_bucket else None

return tar_and_upload_dir(session=self.sagemaker_session.boto_session,
bucket=code_bucket,
s3_key_prefix=code_s3_prefix,
script=self.entry_point,
directory=self.source_dir,
dependencies=self.dependencies)
dependencies=self.dependencies,
kms_key=kms_key)

def _model_source_dir(self):
"""Get the appropriate value to pass as source_dir to model constructor on deploying
Expand Down
11 changes: 9 additions & 2 deletions src/sagemaker/fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ def validate_source_dir(script, directory):
return True


def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory, dependencies=None):
def tar_and_upload_dir(session, bucket, s3_key_prefix, script,
directory=None, dependencies=None, kms_key=None):
"""Package source files and upload a compress tar file to S3. The S3 location will be
``s3://<bucket>/s3_key_prefix/sourcedir.tar.gz``.

Expand All @@ -159,6 +160,7 @@ def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory, depend
dependencies (List[str]): Optional. A list of paths to directories (absolute or relative)
containing additional libraries that will be copied into
/opt/ml/lib
kms_key (str): Optional. KMS key ID used to upload objects to the bucket (default: None).

Returns:
sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and
Expand All @@ -177,7 +179,12 @@ def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory, depend
tar_file = sagemaker.utils.create_tar_file(source_files,
os.path.join(tmp, _TAR_SOURCE_FILENAME))

session.resource('s3').Object(bucket, key).upload_file(tar_file)
if kms_key:
extra_args = {'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': kms_key}
else:
extra_args = None

session.resource('s3').Object(bucket, key).upload_file(tar_file, ExtraArgs=extra_args)
finally:
shutil.rmtree(tmp)

Expand Down
77 changes: 77 additions & 0 deletions tests/integ/kms_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# language governing permissions and limitations under the License.
from __future__ import absolute_import

from botocore import exceptions

KEY_ALIAS = "SageMakerIntegTestKmsKey"
KEY_POLICY = '''
{{
Expand Down Expand Up @@ -56,3 +58,78 @@ def get_or_create_kms_key(kms_client, account_id):
return kms_key_arn
else:
return _create_kms_key(kms_client, account_id)


KMS_BUCKET_POLICY = """{
"Version": "2012-10-17",
"Id": "PutObjPolicy",
"Statement": [
{
"Sid": "DenyIncorrectEncryptionHeader",
"Effect": "Deny",
"Principal": "*",
"Action": "s3:PutObject",
"Resource": "arn:aws:s3:::%s/*",
"Condition": {
"StringNotEquals": {
"s3:x-amz-server-side-encryption": "aws:kms"
}
}
},
{
"Sid": "DenyUnEncryptedObjectUploads",
"Effect": "Deny",
"Principal": "*",
"Action": "s3:PutObject",
"Resource": "arn:aws:s3:::%s/*",
"Condition": {
"Null": {
"s3:x-amz-server-side-encryption": "true"
}
}
}
]
}"""


def get_or_create_bucket_with_encryption(boto_session):
account = boto_session.client('sts').get_caller_identity()['Account']
kms_key_arn = get_or_create_kms_key(boto_session.client('kms'), account)

region = boto_session.region_name
bucket_name = 'sagemaker-{}-{}-with-kms'.format(region, account)

s3 = boto_session.client('s3')
try:
# 'us-east-1' cannot be specified because it is the default region:
# https://github.com/boto/boto3/issues/125
if region == 'us-east-1':
s3.create_bucket(Bucket=bucket_name)
else:
s3.create_bucket(Bucket=bucket_name,
CreateBucketConfiguration={'LocationConstraint': region})

except exceptions.ClientError as e:
if e.response['Error']['Code'] != 'BucketAlreadyOwnedByYou':
raise

s3.put_bucket_encryption(
Bucket=bucket_name,
ServerSideEncryptionConfiguration={
'Rules': [
{
'ApplyServerSideEncryptionByDefault': {
'SSEAlgorithm': 'aws:kms',
'KMSMasterKeyID': kms_key_arn
}
},
]
}
)

s3.put_bucket_policy(
Bucket=bucket_name,
Policy=KMS_BUCKET_POLICY % (bucket_name, bucket_name)
)

return 's3://' + bucket_name, kms_key_arn
30 changes: 30 additions & 0 deletions tests/integ/test_tf_script_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@
from __future__ import absolute_import

import os
import time

import pytest

import boto3
from sagemaker.tensorflow import TensorFlow
from six.moves.urllib.parse import urlparse
import tests.integ as integ
from tests.integ import kms_utils
import tests.integ.timeout as timeout

RESOURCE_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'tensorflow_mnist')
Expand Down Expand Up @@ -52,6 +55,33 @@ def test_mnist(sagemaker_session, instance_type):
['graph.pbtxt', 'model.ckpt-0.index', 'model.ckpt-0.meta', 'saved_model.pb'])


def test_server_side_encryption(sagemaker_session):

bucket_with_kms, kms_key = kms_utils.get_or_create_bucket_with_encryption(sagemaker_session.boto_session)

output_path = os.path.join(bucket_with_kms, 'test-server-side-encryption', time.strftime('%y%m%d-%H%M'))

estimator = TensorFlow(entry_point=SCRIPT,
role='SageMakerRole',
train_instance_count=1,
train_instance_type='ml.c5.xlarge',
sagemaker_session=sagemaker_session,
py_version='py3',
framework_version='1.11',
base_job_name='test-server-side-encryption',
code_location=output_path,
output_path=output_path,
model_dir='/opt/ml/model',
output_kms_key=kms_key)

inputs = estimator.sagemaker_session.upload_data(
path=os.path.join(RESOURCE_PATH, 'data'),
key_prefix='scriptmode/mnist')

with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
estimator.fit(inputs)


@pytest.mark.canary_quick
@pytest.mark.skipif(integ.PYTHON_VERSION != 'py3', reason="Script Mode tests are only configured to run with Python 3")
def test_mnist_distributed(sagemaker_session, instance_type):
Expand Down
58 changes: 58 additions & 0 deletions tests/unit/test_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,64 @@ def test_container_log_level(sagemaker_session):
assert train_kwargs['hyperparameters']['sagemaker_container_log_level'] == '10'


@patch('sagemaker.utils')
def test_same_code_location_keeps_kms_key(utils, sagemaker_session):
fw = DummyFramework(entry_point=SCRIPT_PATH,
role='DummyRole',
sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT,
train_instance_type=INSTANCE_TYPE,
output_kms_key='kms-key')

fw.fit(wait=False)

extra_args = {'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'kms-key'}
obj = sagemaker_session.boto_session.resource('s3').Object

obj.assert_called_with('mybucket', '%s/source/sourcedir.tar.gz' % fw._current_job_name)

obj().upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=extra_args)


@patch('sagemaker.utils')
def test_different_code_location_kms_key(utils, sagemaker_session):
fw = DummyFramework(entry_point=SCRIPT_PATH,
role='DummyRole',
sagemaker_session=sagemaker_session,
code_location='s3://another-location',
train_instance_count=INSTANCE_COUNT,
train_instance_type=INSTANCE_TYPE,
output_kms_key='kms-key')

fw.fit(wait=False)

obj = sagemaker_session.boto_session.resource('s3').Object

obj.assert_called_with('another-location', '%s/source/sourcedir.tar.gz' % fw._current_job_name)

obj().upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=None)


@patch('sagemaker.utils')
def test_default_code_location_uses_output_path(utils, sagemaker_session):
fw = DummyFramework(entry_point=SCRIPT_PATH,
role='DummyRole',
sagemaker_session=sagemaker_session,
output_path='s3://output_path',
train_instance_count=INSTANCE_COUNT,
train_instance_type=INSTANCE_TYPE,
output_kms_key='kms-key')

fw.fit(wait=False)

obj = sagemaker_session.boto_session.resource('s3').Object

obj.assert_called_with('output_path', '%s/source/sourcedir.tar.gz' % fw._current_job_name)

extra_args = {'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'kms-key'}
obj().upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=extra_args)


def test_wait_without_logs(sagemaker_session):
training_job = _TrainingJob(sagemaker_session, JOB_NAME)

Expand Down
17 changes: 17 additions & 0 deletions tests/unit/test_fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,23 @@ def test_tar_and_upload_dir_s3(sagemaker_session):
assert result == fw_utils.UploadedCode('s3://m', 'mnist.py')


@patch('sagemaker.utils')
def test_tar_and_upload_dir_s3_with_kms(utils, sagemaker_session):

result = fw_utils.tar_and_upload_dir(sagemaker_session,
'mybucker',
'something/source',
'mnist.py',
kms_key='kms-key')

assert result == fw_utils.UploadedCode('s3://mybucker/something/source/sourcedir.tar.gz',
'mnist.py')

extra_args = {'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'kms-key'}
obj = sagemaker_session.resource('s3').Object('', '')
obj.upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=extra_args)


def test_validate_source_dir_does_not_exits(sagemaker_session):
script = 'mnist.py'
directory = ' !@#$%^&*()path probably in not there.!@#$%^&*()'
Expand Down