Skip to content

Commit fe4092a

Browse files
committed
Merge remote-tracking branch 'origin' into fix/intelligent-defaults-volume-kms
2 parents 55e93d9 + e2624af commit fe4092a

File tree

106 files changed

+2227
-261
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+2227
-261
lines changed

src/sagemaker/amazon/amazon_estimator.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from six.moves.urllib.parse import urlparse
2222

23-
from sagemaker import image_uris
23+
from sagemaker import image_uris, s3_utils
2424
from sagemaker.amazon import validation
2525
from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa
2626
from sagemaker.amazon.common import write_numpy_to_dense_tensor
@@ -93,8 +93,15 @@ def __init__(
9393
enable_network_isolation=enable_network_isolation,
9494
**kwargs
9595
)
96-
data_location = data_location or "s3://{}/sagemaker-record-sets/".format(
97-
self.sagemaker_session.default_bucket()
96+
97+
data_location = data_location or (
98+
s3_utils.s3_path_join(
99+
"s3://",
100+
self.sagemaker_session.default_bucket(),
101+
self.sagemaker_session.default_bucket_prefix,
102+
"sagemaker-record-sets",
103+
with_end_slash=True,
104+
)
98105
)
99106
self._data_location = data_location
100107

src/sagemaker/automl/automl.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from typing import Optional, List, Dict
1818
from six import string_types
1919

20-
from sagemaker import Model, PipelineModel
20+
from sagemaker import Model, PipelineModel, s3
2121
from sagemaker.automl.candidate_estimator import CandidateEstimator
2222
from sagemaker.config import (
2323
AUTO_ML_ROLE_ARN_PATH,
@@ -676,7 +676,12 @@ def _prepare_for_auto_ml_job(self, job_name=None):
676676
self.current_job_name = name_from_base(base_name, max_length=32)
677677

678678
if self.output_path is None:
679-
self.output_path = "s3://{}/".format(self.sagemaker_session.default_bucket())
679+
self.output_path = s3.s3_path_join(
680+
"s3://",
681+
self.sagemaker_session.default_bucket(),
682+
self.sagemaker_session.default_bucket_prefix,
683+
with_end_slash=True,
684+
)
680685

681686
@classmethod
682687
def _get_supported_inference_keys(cls, container, default=None):

src/sagemaker/config/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@
9393
AUTO_ML_VOLUME_KMS_KEY_ID_PATH,
9494
AUTO_ML_INTER_CONTAINER_ENCRYPTION_PATH,
9595
ENDPOINT_CONFIG_DATA_CAPTURE_KMS_KEY_ID_PATH,
96+
SESSION_DEFAULT_S3_BUCKET_PATH,
97+
SESSION_DEFAULT_S3_OBJECT_KEY_PREFIX_PATH,
9698
MONITORING_SCHEDULE_CONFIG,
9799
MONITORING_JOB_DEFINITION,
98100
MONITORING_OUTPUT_CONFIG,
@@ -131,4 +133,9 @@
131133
EXECUTION_ROLE_ARN,
132134
ASYNC_INFERENCE_CONFIG,
133135
SCHEMA_VERSION,
136+
PYTHON_SDK,
137+
MODULES,
138+
DEFAULT_S3_BUCKET,
139+
DEFAULT_S3_OBJECT_KEY_PREFIX,
140+
SESSION,
134141
)

src/sagemaker/config/config_schema.py

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,17 @@
8989
OBJECT = "object"
9090
ADDITIONAL_PROPERTIES = "additionalProperties"
9191
ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION = "EnableInterContainerTrafficEncryption"
92+
SESSION = "Session"
93+
DEFAULT_S3_BUCKET = "DefaultS3Bucket"
94+
DEFAULT_S3_OBJECT_KEY_PREFIX = "DefaultS3ObjectKeyPrefix"
9295

9396

9497
def _simple_path(*args: str):
9598
"""Appends an arbitrary number of strings to use as path constants"""
9699
return ".".join(args)
97100

98101

102+
# Paths for reference elsewhere in the code.
99103
COMPILATION_JOB_VPC_CONFIG_PATH = _simple_path(SAGEMAKER, COMPILATION_JOB, VPC_CONFIG)
100104
COMPILATION_JOB_KMS_KEY_ID_PATH = _simple_path(
101105
SAGEMAKER, COMPILATION_JOB, OUTPUT_CONFIG, KMS_KEY_ID
@@ -231,7 +235,6 @@ def _simple_path(*args: str):
231235
MODEL_PACKAGE_VALIDATION_PROFILES_PATH = _simple_path(
232236
SAGEMAKER, MODEL_PACKAGE, VALIDATION_SPECIFICATION, VALIDATION_PROFILES
233237
)
234-
235238
REMOTE_FUNCTION_DEPENDENCIES = _simple_path(
236239
SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, DEPENDENCIES
237240
)
@@ -274,9 +277,6 @@ def _simple_path(*args: str):
274277
REMOTE_FUNCTION_ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION = _simple_path(
275278
SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION
276279
)
277-
278-
# Paths for reference elsewhere in the SDK.
279-
# Names include the schema version since the paths could change with other schema versions
280280
MONITORING_SCHEDULE_INTER_CONTAINER_ENCRYPTION_PATH = _simple_path(
281281
SAGEMAKER,
282282
MONITORING_SCHEDULE,
@@ -298,6 +298,13 @@ def _simple_path(*args: str):
298298
TRAINING_JOB_INTER_CONTAINER_ENCRYPTION_PATH = _simple_path(
299299
SAGEMAKER, TRAINING_JOB, ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION
300300
)
301+
SESSION_DEFAULT_S3_BUCKET_PATH = _simple_path(
302+
SAGEMAKER, PYTHON_SDK, MODULES, SESSION, DEFAULT_S3_BUCKET
303+
)
304+
SESSION_DEFAULT_S3_OBJECT_KEY_PREFIX_PATH = _simple_path(
305+
SAGEMAKER, PYTHON_SDK, MODULES, SESSION, DEFAULT_S3_OBJECT_KEY_PREFIX
306+
)
307+
301308

302309
SAGEMAKER_PYTHON_SDK_CONFIG_SCHEMA = {
303310
"$schema": "https://json-schema.org/draft/2020-12/schema",
@@ -447,6 +454,15 @@ def _simple_path(*args: str):
447454
"s3Uri": {TYPE: "string", "pattern": "^(https|s3)://([^/]+)/?(.*)$", "maxLength": 1024},
448455
# Regex is taken from https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AlgorithmSpecification.html#sagemaker-Type-AlgorithmSpecification-ContainerEntrypoint
449456
"preExecutionCommand": {TYPE: "string", "pattern": r".*"},
457+
# Regex based on https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_PipelineDefinitionS3Location.html
458+
# except with an additional ^ and $ for the beginning and the end to closer align to
459+
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html
460+
"s3Bucket": {
461+
TYPE: "string",
462+
"pattern": r"^[a-z0-9][\.\-a-z0-9]{1,61}[a-z0-9]$",
463+
"minLength": 3,
464+
"maxLength": 63,
465+
},
450466
},
451467
PROPERTIES: {
452468
SCHEMA_VERSION: {
@@ -477,6 +493,29 @@ def _simple_path(*args: str):
477493
TYPE: OBJECT,
478494
ADDITIONAL_PROPERTIES: False,
479495
PROPERTIES: {
496+
SESSION: {
497+
TYPE: OBJECT,
498+
ADDITIONAL_PROPERTIES: False,
499+
PROPERTIES: {
500+
DEFAULT_S3_BUCKET: {
501+
"description": "sets `default_bucket` of Session",
502+
"$ref": "#/definitions/s3Bucket",
503+
},
504+
DEFAULT_S3_OBJECT_KEY_PREFIX: {
505+
"description": (
506+
"sets `default_bucket_prefix` of Session"
507+
),
508+
TYPE: "string",
509+
# S3 guidelines:
510+
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
511+
# Note that the PythonSDK at the time of writing
512+
# tends to collapse multiple "/" in a row to one "/"
513+
# (even though S3 allows multiple "/" in a row)
514+
"minLength": 1,
515+
"maxLength": 1024,
516+
},
517+
},
518+
},
480519
REMOTE_FUNCTION: {
481520
TYPE: OBJECT,
482521
ADDITIONAL_PROPERTIES: False,
@@ -504,9 +543,9 @@ def _simple_path(*args: str):
504543
VOLUME_KMS_KEY_ID: {"$ref": "#/definitions/kmsKeyId"},
505544
VPC_CONFIG: {"$ref": "#/definitions/vpcConfig"},
506545
},
507-
}
546+
},
508547
},
509-
}
548+
},
510549
},
511550
},
512551
# Feature Group

src/sagemaker/djl_inference/model.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from sagemaker.deserializers import JSONDeserializer, BaseDeserializer
2828
from sagemaker.djl_inference import defaults
2929
from sagemaker.model import FrameworkModel
30+
from sagemaker.s3_utils import s3_path_join
3031
from sagemaker.serializers import JSONSerializer, BaseSerializer
3132
from sagemaker.session import Session
3233
from sagemaker.utils import _tmpdir, _create_or_update_code_dir
@@ -502,12 +503,16 @@ def partition(
502503
self.key_prefix, self.name, self.image_uri
503504
)
504505
if s3_output_uri is None:
505-
bucket = self.bucket or self.sagemaker_session.default_bucket()
506-
s3_output_uri = f"s3://{bucket}/{deploy_key_prefix}"
506+
bucket, deploy_key_prefix = s3.determine_bucket_and_prefix(
507+
bucket=self.bucket,
508+
key_prefix=deploy_key_prefix,
509+
sagemaker_session=self.sagemaker_session,
510+
)
511+
s3_output_uri = s3_path_join("s3://", bucket, deploy_key_prefix)
507512
else:
508-
s3_output_uri = f"{s3_output_uri}/{deploy_key_prefix}"
513+
s3_output_uri = s3_path_join(s3_output_uri, deploy_key_prefix)
509514

510-
self.save_mp_checkpoint_path = f"{s3_output_uri}/aot-partitioned-checkpoints"
515+
self.save_mp_checkpoint_path = s3_path_join(s3_output_uri, "aot-partitioned-checkpoints")
511516

512517
container_def = self._upload_model_to_s3(upload_as_tar=False)
513518
estimator = _create_estimator(
@@ -673,7 +678,11 @@ def _upload_model_to_s3(self, upload_as_tar: bool = True):
673678
deploy_key_prefix = fw_utils.model_code_key_prefix(
674679
self.key_prefix, self.name, self.image_uri
675680
)
676-
bucket = self.bucket or self.sagemaker_session.default_bucket()
681+
bucket, deploy_key_prefix = s3.determine_bucket_and_prefix(
682+
bucket=self.bucket,
683+
key_prefix=deploy_key_prefix,
684+
sagemaker_session=self.sagemaker_session,
685+
)
677686
if upload_as_tar:
678687
uploaded_code = fw_utils.tar_and_upload_dir(
679688
self.sagemaker_session.boto_session,
@@ -686,10 +695,9 @@ def _upload_model_to_s3(self, upload_as_tar: bool = True):
686695
)
687696
model_data_url = uploaded_code.s3_prefix
688697
else:
689-
key_prefix = f"{deploy_key_prefix}/aot-model"
690698
model_data_url = S3Uploader.upload(
691699
tmp_code_dir,
692-
"s3://%s/%s" % (bucket, key_prefix),
700+
s3_path_join("s3://", bucket, deploy_key_prefix, "aot-model"),
693701
self.model_kms_key,
694702
self.sagemaker_session,
695703
)

src/sagemaker/estimator.py

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from six.moves.urllib.parse import urlparse
2828

2929
import sagemaker
30-
from sagemaker import git_utils, image_uris, vpc_utils
30+
from sagemaker import git_utils, image_uris, vpc_utils, s3
3131
from sagemaker.analytics import TrainingJobAnalytics
3232
from sagemaker.config import (
3333
TRAINING_JOB_VOLUME_KMS_KEY_ID_PATH,
@@ -696,6 +696,9 @@ def __init__(
696696
enable_network_isolation=self._enable_network_isolation,
697697
)
698698

699+
# Internal flag
700+
self._is_output_path_set_from_default_bucket_and_prefix = False
701+
699702
@abstractmethod
700703
def training_image_uri(self):
701704
"""Return the Docker image to use for training.
@@ -796,7 +799,13 @@ def _prepare_for_training(self, job_name=None):
796799
if self.sagemaker_session.local_mode and local_code:
797800
self.output_path = ""
798801
else:
799-
self.output_path = "s3://{}/".format(self.sagemaker_session.default_bucket())
802+
self.output_path = s3.s3_path_join(
803+
"s3://",
804+
self.sagemaker_session.default_bucket(),
805+
self.sagemaker_session.default_bucket_prefix,
806+
with_end_slash=True,
807+
)
808+
self._is_output_path_set_from_default_bucket_and_prefix = True
800809

801810
if self.git_config:
802811
updated_paths = git_utils.git_clone_repo(
@@ -871,7 +880,8 @@ def _stage_user_code_in_s3(self) -> UploadedCode:
871880
if is_pipeline_variable(self.output_path):
872881
if self.code_location is None:
873882
code_bucket = self.sagemaker_session.default_bucket()
874-
code_s3_prefix = self._assign_s3_prefix()
883+
key_prefix = self.sagemaker_session.default_bucket_prefix
884+
code_s3_prefix = self._assign_s3_prefix(key_prefix)
875885
kms_key = None
876886
else:
877887
code_bucket, key_prefix = parse_s3_url(self.code_location)
@@ -884,16 +894,30 @@ def _stage_user_code_in_s3(self) -> UploadedCode:
884894
if local_mode:
885895
if self.code_location is None:
886896
code_bucket = self.sagemaker_session.default_bucket()
887-
code_s3_prefix = self._assign_s3_prefix()
897+
key_prefix = self.sagemaker_session.default_bucket_prefix
898+
code_s3_prefix = self._assign_s3_prefix(key_prefix)
888899
kms_key = None
889900
else:
890901
code_bucket, key_prefix = parse_s3_url(self.code_location)
891902
code_s3_prefix = self._assign_s3_prefix(key_prefix)
892903
kms_key = None
893904
else:
894905
if self.code_location is None:
895-
code_bucket, _ = parse_s3_url(self.output_path)
896-
code_s3_prefix = self._assign_s3_prefix()
906+
code_bucket, possible_key_prefix = parse_s3_url(self.output_path)
907+
908+
if self._is_output_path_set_from_default_bucket_and_prefix:
909+
# Only include possible_key_prefix if the output_path was created from the
910+
# Session's default bucket and prefix. In that scenario, possible_key_prefix
911+
# will either be "" or Session.default_bucket_prefix.
912+
# Note: We cannot do `if (code_bucket == session.default_bucket() and
913+
# key_prefix == session.default_bucket_prefix)` instead because the user
914+
# could have passed in equivalent values themselves to output_path. And
915+
# including the prefix in that case could result in a potentially backwards
916+
# incompatible behavior change for the end user.
917+
code_s3_prefix = self._assign_s3_prefix(possible_key_prefix)
918+
else:
919+
code_s3_prefix = self._assign_s3_prefix()
920+
897921
kms_key = self.output_kms_key
898922
else:
899923
code_bucket, key_prefix = parse_s3_url(self.code_location)
@@ -929,18 +953,13 @@ def _assign_s3_prefix(self, key_prefix=""):
929953
"""
930954
from sagemaker.workflow.utilities import _pipeline_config
931955

932-
code_s3_prefix = "/".join(filter(None, [key_prefix, self._current_job_name, "source"]))
956+
code_s3_prefix = s3.s3_path_join(key_prefix, self._current_job_name, "source")
933957
if _pipeline_config and _pipeline_config.code_hash:
934-
code_s3_prefix = "/".join(
935-
filter(
936-
None,
937-
[
938-
key_prefix,
939-
_pipeline_config.pipeline_name,
940-
"code",
941-
_pipeline_config.code_hash,
942-
],
943-
)
958+
code_s3_prefix = s3.s3_path_join(
959+
key_prefix,
960+
_pipeline_config.pipeline_name,
961+
"code",
962+
_pipeline_config.code_hash,
944963
)
945964
return code_s3_prefix
946965

@@ -1084,8 +1103,12 @@ def _set_source_s3_uri(self, rule):
10841103
if "source_s3_uri" in (rule.rule_parameters or {}):
10851104
parse_result = urlparse(rule.rule_parameters["source_s3_uri"])
10861105
if parse_result.scheme != "s3":
1087-
desired_s3_uri = os.path.join(
1088-
"s3://", self.sagemaker_session.default_bucket(), rule.name, str(uuid.uuid4())
1106+
desired_s3_uri = s3.s3_path_join(
1107+
"s3://",
1108+
self.sagemaker_session.default_bucket(),
1109+
self.sagemaker_session.default_bucket_prefix,
1110+
rule.name,
1111+
str(uuid.uuid4()),
10891112
)
10901113
s3_uri = S3Uploader.upload(
10911114
local_path=rule.rule_parameters["source_s3_uri"],

src/sagemaker/experiments/_helper.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import botocore
2121

22+
from sagemaker import s3
2223
from sagemaker.experiments._utils import is_already_exist_error
2324

2425
logger = logging.getLogger(__name__)
@@ -75,8 +76,17 @@ def upload_artifact(self, file_path):
7576
raise ValueError(
7677
"{} does not exist or is not a file. Please supply a file path.".format(file_path)
7778
)
78-
if not self.artifact_bucket:
79-
self.artifact_bucket = self.sagemaker_session.default_bucket()
79+
80+
# If self.artifact_bucket is falsy, it will be set to sagemaker_session.default_bucket.
81+
# In that case, and if sagemaker_session.default_bucket_prefix exists, self.artifact_prefix
82+
# needs to be updated too (because not updating self.artifact_prefix would result in
83+
# different behavior the 1st time this method is called vs the 2nd).
84+
self.artifact_bucket, self.artifact_prefix = s3.determine_bucket_and_prefix(
85+
bucket=self.artifact_bucket,
86+
key_prefix=self.artifact_prefix,
87+
sagemaker_session=self.sagemaker_session,
88+
)
89+
8090
artifact_name = os.path.basename(file_path)
8191
artifact_s3_key = "{}/{}/{}".format(
8292
self.artifact_prefix, self.trial_component_name, artifact_name
@@ -96,8 +106,17 @@ def upload_object_artifact(self, artifact_name, artifact_object, file_extension=
96106
Returns:
97107
str: The s3 URI of the uploaded file and the version of the file.
98108
"""
99-
if not self.artifact_bucket:
100-
self.artifact_bucket = self.sagemaker_session.default_bucket()
109+
110+
# If self.artifact_bucket is falsy, it will be set to sagemaker_session.default_bucket.
111+
# In that case, and if sagemaker_session.default_bucket_prefix exists, self.artifact_prefix
112+
# needs to be updated too (because not updating self.artifact_prefix would result in
113+
# different behavior the 1st time this method is called vs the 2nd).
114+
self.artifact_bucket, self.artifact_prefix = s3.determine_bucket_and_prefix(
115+
bucket=self.artifact_bucket,
116+
key_prefix=self.artifact_prefix,
117+
sagemaker_session=self.sagemaker_session,
118+
)
119+
101120
if file_extension:
102121
artifact_name = (
103122
artifact_name + ("" if file_extension.startswith(".") else ".") + file_extension

0 commit comments

Comments
 (0)