From 8dd4153f8fc6cd42861b9f87b38060d1e68a2158 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Tue, 28 Jan 2020 17:05:03 -0800 Subject: [PATCH 1/8] modify session and kms_utils to check for S3 bucket before creation Previously, the code would create the bucket and enact business logic based on the exceptions thrown. This commit makes it such that the code checks that the bucket exists before trying to create it. In addition to being cleaner, this also avoids issues if customers have no S3 bucket creation permissions. --- src/sagemaker/session.py | 53 ++++++++++++++++--------------- tests/integ/kms_utils.py | 38 ++++++++++++++-------- tests/unit/test_default_bucket.py | 5 +-- 3 files changed, 55 insertions(+), 41 deletions(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 9eab2222df..5c34966b95 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -343,34 +343,35 @@ def default_bucket(self): default_bucket = "sagemaker-{}-{}".format(region, account) s3 = self.boto_session.resource("s3") - try: - # 'us-east-1' cannot be specified because it is the default region: - # https://github.com/boto/boto3/issues/125 - if region == "us-east-1": - s3.create_bucket(Bucket=default_bucket) - else: - s3.create_bucket( - Bucket=default_bucket, CreateBucketConfiguration={"LocationConstraint": region} - ) + bucket = self.boto_session.resource("s3").Bucket(name=default_bucket) + if bucket.creation_date is None: + try: + if region == "us-east-1": + # 'us-east-1' cannot be specified because it is the default region: + # https://github.com/boto/boto3/issues/125 + s3.create_bucket(Bucket=default_bucket) + else: + s3.create_bucket( + Bucket=default_bucket, + CreateBucketConfiguration={"LocationConstraint": region}, + ) - LOGGER.info("Created S3 bucket: %s", default_bucket) - except ClientError as e: - error_code = e.response["Error"]["Code"] - message = e.response["Error"]["Message"] + LOGGER.info("Created S3 bucket: %s", default_bucket) + except ClientError as e: + error_code = e.response["Error"]["Code"] + message = e.response["Error"]["Message"] - if error_code == "BucketAlreadyOwnedByYou": - pass - elif ( - error_code == "OperationAborted" and "conflicting conditional operation" in message - ): - # If this bucket is already being concurrently created, we don't need to create it - # again. - pass - elif error_code == "TooManyBuckets": - # Succeed if the default bucket exists - s3.meta.client.head_bucket(Bucket=default_bucket) - else: - raise + if error_code == "BucketAlreadyOwnedByYou": + pass + elif ( + error_code == "OperationAborted" + and "conflicting conditional operation" in message + ): + # If this bucket is already being concurrently created, we don't need to create + # it again. + pass + else: + raise self._default_bucket = default_bucket diff --git a/tests/integ/kms_utils.py b/tests/integ/kms_utils.py index f41123c73c..04aed3b876 100644 --- a/tests/integ/kms_utils.py +++ b/tests/integ/kms_utils.py @@ -174,19 +174,31 @@ def bucket_with_encryption(boto_session, sagemaker_role): bucket_name = "sagemaker-{}-{}-with-kms".format(region, account) s3 = boto_session.client("s3") - try: - # 'us-east-1' cannot be specified because it is the default region: - # https://github.com/boto/boto3/issues/125 - if region == "us-east-1": - s3.create_bucket(Bucket=bucket_name) - else: - s3.create_bucket( - Bucket=bucket_name, CreateBucketConfiguration={"LocationConstraint": region} - ) - - except exceptions.ClientError as e: - if e.response["Error"]["Code"] != "BucketAlreadyOwnedByYou": - raise + bucket = boto_session.resource("s3").Bucket(name=bucket_name) + if bucket.creation_date is None: + try: + if region == "us-east-1": + # 'us-east-1' cannot be specified because it is the default region: + # https://github.com/boto/boto3/issues/125 + s3.create_bucket(Bucket=bucket_name) + else: + s3.create_bucket( + Bucket=bucket_name, CreateBucketConfiguration={"LocationConstraint": region} + ) + except exceptions.ClientError as e: + error_code = e.response["Error"]["Code"] + message = e.response["Error"]["Message"] + + if error_code == "BucketAlreadyOwnedByYou": + pass + elif ( + error_code == "OperationAborted" and "conflicting conditional operation" in message + ): + # If this bucket is already being concurrently created, we don't need to create it + # again. + pass + else: + raise s3.put_bucket_encryption( Bucket=bucket_name, diff --git a/tests/unit/test_default_bucket.py b/tests/unit/test_default_bucket.py index 6b72a21e90..50ceda7c56 100644 --- a/tests/unit/test_default_bucket.py +++ b/tests/unit/test_default_bucket.py @@ -26,8 +26,9 @@ def sagemaker_session(): boto_mock = Mock(name="boto_session", region_name=REGION) boto_mock.client("sts").get_caller_identity.return_value = {"Account": ACCOUNT_ID} - ims = sagemaker.Session(boto_session=boto_mock) - return ims + sagemaker_session = sagemaker.Session(boto_session=boto_mock) + sagemaker_session.boto_session.resource("s3").Bucket().creation_date = None + return sagemaker_session def test_default_bucket_s3_create_call(sagemaker_session): From c6f0aa7720ae753ff961751a35c9e1d3cf74c402 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Tue, 28 Jan 2020 17:21:57 -0800 Subject: [PATCH 2/8] add region name to s3 clients and resources --- src/sagemaker/session.py | 4 ++-- tests/integ/kms_utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 5c34966b95..f340ba2c6d 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -342,8 +342,8 @@ def default_bucket(self): ).get_caller_identity()["Account"] default_bucket = "sagemaker-{}-{}".format(region, account) - s3 = self.boto_session.resource("s3") - bucket = self.boto_session.resource("s3").Bucket(name=default_bucket) + s3 = self.boto_session.resource("s3", region_name=region) + bucket = self.boto_session.resource("s3", region_name=region).Bucket(name=default_bucket) if bucket.creation_date is None: try: if region == "us-east-1": diff --git a/tests/integ/kms_utils.py b/tests/integ/kms_utils.py index 04aed3b876..d8dd2677b7 100644 --- a/tests/integ/kms_utils.py +++ b/tests/integ/kms_utils.py @@ -173,8 +173,8 @@ def bucket_with_encryption(boto_session, sagemaker_role): region = boto_session.region_name bucket_name = "sagemaker-{}-{}-with-kms".format(region, account) - s3 = boto_session.client("s3") - bucket = boto_session.resource("s3").Bucket(name=bucket_name) + s3 = boto_session.client("s3", region_name=region) + bucket = boto_session.resource("s3", region_name=region).Bucket(name=bucket_name) if bucket.creation_date is None: try: if region == "us-east-1": From 7454f043bb7cdd81c4ac731acb193c27492ee2d7 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Tue, 28 Jan 2020 17:51:11 -0800 Subject: [PATCH 3/8] refactor s3 bucket creation --- src/sagemaker/session.py | 29 ++++++++++++++------- tests/integ/kms_utils.py | 41 +++++++----------------------- tests/integ/test_tf_script_mode.py | 3 +-- 3 files changed, 30 insertions(+), 43 deletions(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index f340ba2c6d..b4f1d4d0ea 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -342,21 +342,36 @@ def default_bucket(self): ).get_caller_identity()["Account"] default_bucket = "sagemaker-{}-{}".format(region, account) + self.create_s3_bucket_if_it_does_not_exist(bucket_name=default_bucket, region=region) + + self._default_bucket = default_bucket + + return self._default_bucket + + def create_s3_bucket_if_it_does_not_exist(self, bucket_name, region): + """Creates an S3 Bucket if it does not exist. + Also swallows a few common exceptions that indicate that the bucket already exists, or + that it is being created. + + Args: + bucket_name (str): Name of the S3 bucket to be created. + region (str): The region in which to create the bucket. + + """ s3 = self.boto_session.resource("s3", region_name=region) - bucket = self.boto_session.resource("s3", region_name=region).Bucket(name=default_bucket) + bucket = self.boto_session.resource("s3", region_name=region).Bucket(name=bucket_name) if bucket.creation_date is None: try: if region == "us-east-1": # 'us-east-1' cannot be specified because it is the default region: # https://github.com/boto/boto3/issues/125 - s3.create_bucket(Bucket=default_bucket) + s3.create_bucket(Bucket=bucket_name) else: s3.create_bucket( - Bucket=default_bucket, - CreateBucketConfiguration={"LocationConstraint": region}, + Bucket=bucket_name, CreateBucketConfiguration={"LocationConstraint": region} ) - LOGGER.info("Created S3 bucket: %s", default_bucket) + LOGGER.info("Created S3 bucket: %s", bucket_name) except ClientError as e: error_code = e.response["Error"]["Code"] message = e.response["Error"]["Message"] @@ -373,10 +388,6 @@ def default_bucket(self): else: raise - self._default_bucket = default_bucket - - return self._default_bucket - def train( # noqa: C901 self, input_mode, diff --git a/tests/integ/kms_utils.py b/tests/integ/kms_utils.py index d8dd2677b7..d3e890edc1 100644 --- a/tests/integ/kms_utils.py +++ b/tests/integ/kms_utils.py @@ -15,8 +15,6 @@ import contextlib import json -from botocore import exceptions - from sagemaker import utils PRINCIPAL_TEMPLATE = ( @@ -158,7 +156,8 @@ def get_or_create_kms_key( @contextlib.contextmanager -def bucket_with_encryption(boto_session, sagemaker_role): +def bucket_with_encryption(sagemaker_session, sagemaker_role): + boto_session = sagemaker_session.boto_session region = boto_session.region_name sts_client = boto_session.client( "sts", region_name=region, endpoint_url=utils.sts_regional_endpoint(region) @@ -173,34 +172,10 @@ def bucket_with_encryption(boto_session, sagemaker_role): region = boto_session.region_name bucket_name = "sagemaker-{}-{}-with-kms".format(region, account) - s3 = boto_session.client("s3", region_name=region) - bucket = boto_session.resource("s3", region_name=region).Bucket(name=bucket_name) - if bucket.creation_date is None: - try: - if region == "us-east-1": - # 'us-east-1' cannot be specified because it is the default region: - # https://github.com/boto/boto3/issues/125 - s3.create_bucket(Bucket=bucket_name) - else: - s3.create_bucket( - Bucket=bucket_name, CreateBucketConfiguration={"LocationConstraint": region} - ) - except exceptions.ClientError as e: - error_code = e.response["Error"]["Code"] - message = e.response["Error"]["Message"] - - if error_code == "BucketAlreadyOwnedByYou": - pass - elif ( - error_code == "OperationAborted" and "conflicting conditional operation" in message - ): - # If this bucket is already being concurrently created, we don't need to create it - # again. - pass - else: - raise - - s3.put_bucket_encryption( + sagemaker_session.create_bucket_if_it_does_not_exist(bucket_name=bucket_name, region=region) + + s3_client = boto_session.client("s3", region_name=region) + s3_client.put_bucket_encryption( Bucket=bucket_name, ServerSideEncryptionConfiguration={ "Rules": [ @@ -214,7 +189,9 @@ def bucket_with_encryption(boto_session, sagemaker_role): }, ) - s3.put_bucket_policy(Bucket=bucket_name, Policy=KMS_BUCKET_POLICY % (bucket_name, bucket_name)) + s3_client.put_bucket_policy( + Bucket=bucket_name, Policy=KMS_BUCKET_POLICY % (bucket_name, bucket_name) + ) yield "s3://" + bucket_name, kms_key_arn diff --git a/tests/integ/test_tf_script_mode.py b/tests/integ/test_tf_script_mode.py index d5facb371e..792f09b5bb 100644 --- a/tests/integ/test_tf_script_mode.py +++ b/tests/integ/test_tf_script_mode.py @@ -83,8 +83,7 @@ def test_mnist_with_checkpoint_config(sagemaker_session, instance_type, tf_full_ def test_server_side_encryption(sagemaker_session, tf_full_version): - boto_session = sagemaker_session.boto_session - with kms_utils.bucket_with_encryption(boto_session, ROLE) as (bucket_with_kms, kms_key): + with kms_utils.bucket_with_encryption(sagemaker_session, ROLE) as (bucket_with_kms, kms_key): output_path = os.path.join( bucket_with_kms, "test-server-side-encryption", time.strftime("%y%m%d-%H%M") ) From 93db859bb63dac49805bd0545b3d1fbeb1ac83db Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Wed, 29 Jan 2020 08:59:43 -0800 Subject: [PATCH 4/8] fix method name typo --- tests/integ/kms_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/kms_utils.py b/tests/integ/kms_utils.py index d3e890edc1..29140f8b75 100644 --- a/tests/integ/kms_utils.py +++ b/tests/integ/kms_utils.py @@ -172,7 +172,7 @@ def bucket_with_encryption(sagemaker_session, sagemaker_role): region = boto_session.region_name bucket_name = "sagemaker-{}-{}-with-kms".format(region, account) - sagemaker_session.create_bucket_if_it_does_not_exist(bucket_name=bucket_name, region=region) + sagemaker_session.create_s3_bucket_if_it_does_not_exist(bucket_name=bucket_name, region=region) s3_client = boto_session.client("s3", region_name=region) s3_client.put_bucket_encryption( From 7fffe7811d4966deaf8db0a5c6925bca0c41e01b Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Wed, 29 Jan 2020 11:40:22 -0800 Subject: [PATCH 5/8] address comments --- src/sagemaker/session.py | 11 +++++++---- tests/integ/kms_utils.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index b4f1d4d0ea..bfa5c85baf 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -342,26 +342,29 @@ def default_bucket(self): ).get_caller_identity()["Account"] default_bucket = "sagemaker-{}-{}".format(region, account) - self.create_s3_bucket_if_it_does_not_exist(bucket_name=default_bucket, region=region) + self._create_s3_bucket_if_it_does_not_exist(bucket_name=default_bucket, region=region) self._default_bucket = default_bucket return self._default_bucket - def create_s3_bucket_if_it_does_not_exist(self, bucket_name, region): + def _create_s3_bucket_if_it_does_not_exist(self, bucket_name, region): """Creates an S3 Bucket if it does not exist. - Also swallows a few common exceptions that indicate that the bucket already exists, or + Also swallows a few common exceptions that indicate that the bucket already exists or that it is being created. Args: bucket_name (str): Name of the S3 bucket to be created. region (str): The region in which to create the bucket. + Raises: + ClientError: If boto S3 throws an unexpected exception. + """ - s3 = self.boto_session.resource("s3", region_name=region) bucket = self.boto_session.resource("s3", region_name=region).Bucket(name=bucket_name) if bucket.creation_date is None: try: + s3 = self.boto_session.resource("s3", region_name=region) if region == "us-east-1": # 'us-east-1' cannot be specified because it is the default region: # https://github.com/boto/boto3/issues/125 diff --git a/tests/integ/kms_utils.py b/tests/integ/kms_utils.py index 29140f8b75..95b0edc859 100644 --- a/tests/integ/kms_utils.py +++ b/tests/integ/kms_utils.py @@ -172,7 +172,7 @@ def bucket_with_encryption(sagemaker_session, sagemaker_role): region = boto_session.region_name bucket_name = "sagemaker-{}-{}-with-kms".format(region, account) - sagemaker_session.create_s3_bucket_if_it_does_not_exist(bucket_name=bucket_name, region=region) + sagemaker_session._create_s3_bucket_if_it_does_not_exist(bucket_name=bucket_name, region=region) s3_client = boto_session.client("s3", region_name=region) s3_client.put_bucket_encryption( From 6d2786ffe91559fbbaa6d56419d43371fb5c36ab Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Wed, 29 Jan 2020 11:45:08 -0800 Subject: [PATCH 6/8] fix docstring --- src/sagemaker/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index bfa5c85baf..a4177eb0f4 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -358,7 +358,7 @@ def _create_s3_bucket_if_it_does_not_exist(self, bucket_name, region): region (str): The region in which to create the bucket. Raises: - ClientError: If boto S3 throws an unexpected exception. + ClientError: If S3 throws an unexpected exception. """ bucket = self.boto_session.resource("s3", region_name=region).Bucket(name=bucket_name) From 4380bd63049287b0c4777123ebf2ad547353dc36 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Wed, 29 Jan 2020 11:51:04 -0800 Subject: [PATCH 7/8] update docstring Co-Authored-By: Lauren Yu <6631887+laurenyu@users.noreply.github.com> --- src/sagemaker/session.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index a4177eb0f4..da374fb69a 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -358,7 +358,9 @@ def _create_s3_bucket_if_it_does_not_exist(self, bucket_name, region): region (str): The region in which to create the bucket. Raises: - ClientError: If S3 throws an unexpected exception. + botocore.exceptions.ClientError: If S3 throws an unexpected exception during bucket creation. + If the exception is due to the bucket already existing or + already being created, no exception is raised. """ bucket = self.boto_session.resource("s3", region_name=region).Bucket(name=bucket_name) From ab9b497e0c9e4cce1d46753045829c4a4b5a848e Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Wed, 29 Jan 2020 12:48:37 -0800 Subject: [PATCH 8/8] fix docstring --- src/sagemaker/session.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index da374fb69a..854c431e87 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -358,7 +358,8 @@ def _create_s3_bucket_if_it_does_not_exist(self, bucket_name, region): region (str): The region in which to create the bucket. Raises: - botocore.exceptions.ClientError: If S3 throws an unexpected exception during bucket creation. + botocore.exceptions.ClientError: If S3 throws an unexpected exception during bucket + creation. If the exception is due to the bucket already existing or already being created, no exception is raised.