Skip to content

Commit 81ae9da

Browse files
authored
fix: use pathlib.PurePosixPath for S3 URLs and Unix paths (#1763)
This is the Python 3.4+ fix that partially addressed by: - e86da62 - edd10aa - 06a00d4 - d2f3984 - 1463743
1 parent f335d0f commit 81ae9da

File tree

10 files changed

+138
-91
lines changed

10 files changed

+138
-91
lines changed

src/sagemaker/local/utils.py

+4-18
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from distutils.dir_util import copy_tree
2020
from six.moves.urllib.parse import urlparse
2121

22+
from sagemaker import s3
23+
2224

2325
def copy_directory_structure(destination_directory, relative_path):
2426
"""Create all the intermediate directories required for relative_path to
@@ -62,8 +64,8 @@ def move_to_destination(source, destination, job_name, sagemaker_session):
6264
final_uri = destination
6365
elif parsed_uri.scheme == "s3":
6466
bucket = parsed_uri.netloc
65-
path = _create_s3_prefix(parsed_uri.path, job_name)
66-
final_uri = "s3://%s/%s" % (bucket, path)
67+
path = s3.s3_path_join(parsed_uri.path, job_name)
68+
final_uri = s3.s3_path_join("s3://", bucket, path)
6769
sagemaker_session.upload_data(source, bucket, path)
6870
else:
6971
raise ValueError("Invalid destination URI, must be s3:// or file://, got: %s" % destination)
@@ -72,22 +74,6 @@ def move_to_destination(source, destination, job_name, sagemaker_session):
7274
return final_uri
7375

7476

75-
def _create_s3_prefix(path, job_name):
76-
"""Constructs a path out of the given path and job name to be
77-
used as an S3 prefix.
78-
79-
Args:
80-
path (str): the original path. If the path is only ``"/"``,
81-
then it is ignored.
82-
job_name (str): the job name to be appended to the path.
83-
84-
Returns:
85-
str: an S3 prefix of the form ``"path/job_name"``
86-
"""
87-
path = path.strip("/")
88-
return job_name if path == "" else "/".join((path, job_name))
89-
90-
9177
def recursive_copy(source, destination):
9278
"""A wrapper around distutils.dir_util.copy_tree but won't throw any
9379
exception when the source directory does not exist.

src/sagemaker/model_monitor/data_capture_config.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
"""
1717
from __future__ import print_function, absolute_import
1818

19-
import os
20-
19+
from sagemaker import s3
2120
from sagemaker.session import Session
2221

2322
_MODEL_MONITOR_S3_PATH = "model-monitor"
@@ -67,7 +66,7 @@ def __init__(
6766
self.destination_s3_uri = destination_s3_uri
6867
if self.destination_s3_uri is None:
6968
sagemaker_session = sagemaker_session or Session()
70-
self.destination_s3_uri = os.path.join(
69+
self.destination_s3_uri = s3.s3_path_join(
7170
"s3://",
7271
sagemaker_session.default_bucket(),
7372
_MODEL_MONITOR_S3_PATH,

src/sagemaker/model_monitor/model_monitoring.py

+50-35
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,19 @@
1919
import copy
2020
import json
2121
import os
22+
import pathlib
2223
import logging
2324
import uuid
2425

2526
from six import string_types
2627
from six.moves.urllib.parse import urlparse
2728
from botocore.exceptions import ClientError
2829

29-
from sagemaker import image_uris
30+
from sagemaker import image_uris, s3
3031
from sagemaker.exceptions import UnexpectedStatusException
3132
from sagemaker.model_monitor.monitoring_files import Constraints, ConstraintViolations, Statistics
3233
from sagemaker.network import NetworkConfig
3334
from sagemaker.processing import Processor, ProcessingInput, ProcessingJob, ProcessingOutput
34-
from sagemaker.s3 import S3Uploader
3535
from sagemaker.session import Session
3636
from sagemaker.utils import name_from_base, retries
3737

@@ -809,8 +809,10 @@ def _normalize_endpoint_input(self, endpoint_input):
809809
if isinstance(endpoint_input, string_types):
810810
endpoint_input = EndpointInput(
811811
endpoint_name=endpoint_input,
812-
destination=os.path.join(
813-
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _CONTAINER_ENDPOINT_INPUT_PATH
812+
destination=str(
813+
pathlib.PurePosixPath(
814+
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _CONTAINER_ENDPOINT_INPUT_PATH
815+
)
814816
),
815817
)
816818

@@ -842,13 +844,13 @@ def _normalize_baseline_inputs(self, baseline_inputs=None):
842844
# and save the S3 uri in the ProcessingInput source.
843845
parse_result = urlparse(file_input.source)
844846
if parse_result.scheme != "s3":
845-
s3_uri = os.path.join(
847+
s3_uri = s3.s3_path_join(
846848
"s3://",
847849
self.sagemaker_session.default_bucket(),
848850
self.latest_baselining_job_name,
849851
file_input.input_name,
850852
)
851-
S3Uploader.upload(
853+
s3.S3Uploader.upload(
852854
local_path=file_input.source,
853855
desired_s3_uri=s3_uri,
854856
sagemaker_session=self.sagemaker_session,
@@ -869,7 +871,7 @@ def _normalize_processing_output(self, output=None):
869871
"""
870872
# If the output is a string, turn it into a ProcessingOutput object.
871873
if isinstance(output, string_types):
872-
s3_uri = os.path.join(
874+
s3_uri = s3.s3_path_join(
873875
"s3://",
874876
self.sagemaker_session.default_bucket(),
875877
self.latest_baselining_job_name,
@@ -893,7 +895,7 @@ def _normalize_monitoring_output(self, output=None):
893895
"""
894896
# If the output is a string, turn it into a ProcessingOutput object.
895897
if output.destination is None:
896-
output.destination = os.path.join(
898+
output.destination = s3.s3_path_join(
897899
"s3://",
898900
self.sagemaker_session.default_bucket(),
899901
self.monitoring_schedule_name,
@@ -914,7 +916,7 @@ def _s3_uri_from_local_path(self, path):
914916
"""
915917
parse_result = urlparse(path)
916918
if parse_result.scheme != "s3":
917-
s3_uri = os.path.join(
919+
s3_uri = s3.s3_path_join(
918920
"s3://",
919921
self.sagemaker_session.default_bucket(),
920922
_MODEL_MONITOR_S3_PATH,
@@ -923,10 +925,10 @@ def _s3_uri_from_local_path(self, path):
923925
_INPUT_S3_PATH,
924926
str(uuid.uuid4()),
925927
)
926-
S3Uploader.upload(
928+
s3.S3Uploader.upload(
927929
local_path=path, desired_s3_uri=s3_uri, sagemaker_session=self.sagemaker_session
928930
)
929-
path = os.path.join(s3_uri, os.path.basename(path))
931+
path = s3.s3_path_join(s3_uri, os.path.basename(path))
930932
return path
931933

932934
def _wait_for_schedule_changes_to_apply(self):
@@ -1074,8 +1076,10 @@ def suggest_baseline(
10741076

10751077
normalized_baseline_dataset_input = self._upload_and_convert_to_processing_input(
10761078
source=baseline_dataset,
1077-
destination=os.path.join(
1078-
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _BASELINE_DATASET_INPUT_NAME
1079+
destination=str(
1080+
pathlib.PurePosixPath(
1081+
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _BASELINE_DATASET_INPUT_NAME
1082+
)
10791083
),
10801084
name=_BASELINE_DATASET_INPUT_NAME,
10811085
)
@@ -1085,34 +1089,44 @@ def suggest_baseline(
10851089

10861090
normalized_record_preprocessor_script_input = self._upload_and_convert_to_processing_input(
10871091
source=record_preprocessor_script,
1088-
destination=os.path.join(
1089-
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME
1092+
destination=str(
1093+
pathlib.PurePosixPath(
1094+
_CONTAINER_BASE_PATH,
1095+
_CONTAINER_INPUT_PATH,
1096+
_RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME,
1097+
)
10901098
),
10911099
name=_RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME,
10921100
)
10931101

10941102
record_preprocessor_script_container_path = None
10951103
if normalized_record_preprocessor_script_input is not None:
1096-
record_preprocessor_script_container_path = os.path.join(
1097-
normalized_record_preprocessor_script_input.destination,
1098-
os.path.basename(record_preprocessor_script),
1104+
record_preprocessor_script_container_path = str(
1105+
pathlib.PurePosixPath(
1106+
normalized_record_preprocessor_script_input.destination,
1107+
os.path.basename(record_preprocessor_script),
1108+
)
10991109
)
11001110

11011111
normalized_post_processor_script_input = self._upload_and_convert_to_processing_input(
11021112
source=post_analytics_processor_script,
1103-
destination=os.path.join(
1104-
_CONTAINER_BASE_PATH,
1105-
_CONTAINER_INPUT_PATH,
1106-
_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
1113+
destination=str(
1114+
pathlib.PurePosixPath(
1115+
_CONTAINER_BASE_PATH,
1116+
_CONTAINER_INPUT_PATH,
1117+
_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
1118+
)
11071119
),
11081120
name=_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
11091121
)
11101122

11111123
post_processor_script_container_path = None
11121124
if normalized_post_processor_script_input is not None:
1113-
post_processor_script_container_path = os.path.join(
1114-
normalized_post_processor_script_input.destination,
1115-
os.path.basename(post_analytics_processor_script),
1125+
post_processor_script_container_path = str(
1126+
pathlib.PurePosixPath(
1127+
normalized_post_processor_script_input.destination,
1128+
os.path.basename(post_analytics_processor_script),
1129+
)
11161130
)
11171131

11181132
normalized_baseline_output = self._normalize_baseline_output(output_s3_uri=output_s3_uri)
@@ -1631,7 +1645,7 @@ def _normalize_baseline_output(self, output_s3_uri=None):
16311645
sagemaker.processing.ProcessingOutput: The normalized ProcessingOutput object.
16321646
16331647
"""
1634-
s3_uri = output_s3_uri or os.path.join(
1648+
s3_uri = output_s3_uri or s3.s3_path_join(
16351649
"s3://",
16361650
self.sagemaker_session.default_bucket(),
16371651
_MODEL_MONITOR_S3_PATH,
@@ -1640,7 +1654,7 @@ def _normalize_baseline_output(self, output_s3_uri=None):
16401654
_RESULTS_S3_PATH,
16411655
)
16421656
return ProcessingOutput(
1643-
source=os.path.join(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH),
1657+
source=str(pathlib.PurePosixPath(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH)),
16441658
destination=s3_uri,
16451659
output_name=_DEFAULT_OUTPUT_NAME,
16461660
)
@@ -1655,7 +1669,7 @@ def _normalize_monitoring_output(self, output_s3_uri=None):
16551669
sagemaker.model_monitor.MonitoringOutput: The normalized MonitoringOutput object.
16561670
16571671
"""
1658-
s3_uri = output_s3_uri or os.path.join(
1672+
s3_uri = output_s3_uri or s3.s3_path_join(
16591673
"s3://",
16601674
self.sagemaker_session.default_bucket(),
16611675
_MODEL_MONITOR_S3_PATH,
@@ -1664,7 +1678,8 @@ def _normalize_monitoring_output(self, output_s3_uri=None):
16641678
_RESULTS_S3_PATH,
16651679
)
16661680
output = MonitoringOutput(
1667-
source=os.path.join(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH), destination=s3_uri
1681+
source=str(pathlib.PurePosixPath(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH)),
1682+
destination=s3_uri,
16681683
)
16691684

16701685
return output
@@ -1741,7 +1756,7 @@ def _upload_and_convert_to_processing_input(self, source, destination, name):
17411756
parse_result = urlparse(url=source)
17421757

17431758
if parse_result.scheme != "s3":
1744-
s3_uri = os.path.join(
1759+
s3_uri = s3.s3_path_join(
17451760
"s3://",
17461761
self.sagemaker_session.default_bucket(),
17471762
_MODEL_MONITOR_S3_PATH,
@@ -1750,7 +1765,7 @@ def _upload_and_convert_to_processing_input(self, source, destination, name):
17501765
_INPUT_S3_PATH,
17511766
name,
17521767
)
1753-
S3Uploader.upload(
1768+
s3.S3Uploader.upload(
17541769
local_path=source, desired_s3_uri=s3_uri, sagemaker_session=self.sagemaker_session
17551770
)
17561771
source = s3_uri
@@ -1839,7 +1854,7 @@ def baseline_statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_k
18391854
try:
18401855
baselining_job_output_s3_path = self.outputs[0].destination
18411856
return Statistics.from_s3_uri(
1842-
statistics_file_s3_uri=os.path.join(baselining_job_output_s3_path, file_name),
1857+
statistics_file_s3_uri=s3.s3_path_join(baselining_job_output_s3_path, file_name),
18431858
kms_key=kms_key,
18441859
sagemaker_session=self.sagemaker_session,
18451860
)
@@ -1877,7 +1892,7 @@ def suggested_constraints(self, file_name=CONSTRAINTS_JSON_DEFAULT_FILE_NAME, km
18771892
try:
18781893
baselining_job_output_s3_path = self.outputs[0].destination
18791894
return Constraints.from_s3_uri(
1880-
constraints_file_s3_uri=os.path.join(baselining_job_output_s3_path, file_name),
1895+
constraints_file_s3_uri=s3.s3_path_join(baselining_job_output_s3_path, file_name),
18811896
kms_key=kms_key,
18821897
sagemaker_session=self.sagemaker_session,
18831898
)
@@ -1993,7 +2008,7 @@ def statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_key=None):
19932008
try:
19942009
baselining_job_output_s3_path = self.outputs[0].destination
19952010
return Statistics.from_s3_uri(
1996-
statistics_file_s3_uri=os.path.join(baselining_job_output_s3_path, file_name),
2011+
statistics_file_s3_uri=s3.s3_path_join(baselining_job_output_s3_path, file_name),
19972012
kms_key=kms_key,
19982013
sagemaker_session=self.sagemaker_session,
19992014
)
@@ -2033,7 +2048,7 @@ def constraint_violations(
20332048
try:
20342049
baselining_job_output_s3_path = self.outputs[0].destination
20352050
return ConstraintViolations.from_s3_uri(
2036-
constraint_violations_file_s3_uri=os.path.join(
2051+
constraint_violations_file_s3_uri=s3.s3_path_join(
20372052
baselining_job_output_s3_path, file_name
20382053
),
20392054
kms_key=kms_key,

src/sagemaker/model_monitor/monitoring_files.py

+11-12
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,8 @@
2222

2323
from botocore.exceptions import ClientError
2424

25+
from sagemaker import s3
2526
from sagemaker.session import Session
26-
from sagemaker.s3 import S3Downloader
27-
from sagemaker.s3 import S3Uploader
2827

2928
NO_SUCH_KEY_CODE = "NoSuchKey"
3029

@@ -68,7 +67,7 @@ def save(self, new_save_location_s3_uri=None):
6867
if new_save_location_s3_uri is not None:
6968
self.file_s3_uri = new_save_location_s3_uri
7069

71-
return S3Uploader.upload_string_as_file_body(
70+
return s3.S3Uploader.upload_string_as_file_body(
7271
body=json.dumps(self.body_dict),
7372
desired_s3_uri=self.file_s3_uri,
7473
kms_key=self.kms_key,
@@ -119,7 +118,7 @@ def from_s3_uri(cls, statistics_file_s3_uri, kms_key=None, sagemaker_session=Non
119118
"""
120119
try:
121120
body_dict = json.loads(
122-
S3Downloader.read_file(
121+
s3.S3Downloader.read_file(
123122
s3_uri=statistics_file_s3_uri, sagemaker_session=sagemaker_session
124123
)
125124
)
@@ -158,10 +157,10 @@ def from_string(
158157
"""
159158
sagemaker_session = sagemaker_session or Session()
160159
file_name = file_name or "statistics.json"
161-
desired_s3_uri = os.path.join(
160+
desired_s3_uri = s3.s3_path_join(
162161
"s3://", sagemaker_session.default_bucket(), "monitoring", str(uuid.uuid4()), file_name
163162
)
164-
s3_uri = S3Uploader.upload_string_as_file_body(
163+
s3_uri = s3.S3Uploader.upload_string_as_file_body(
165164
body=statistics_file_string,
166165
desired_s3_uri=desired_s3_uri,
167166
kms_key=kms_key,
@@ -245,7 +244,7 @@ def from_s3_uri(cls, constraints_file_s3_uri, kms_key=None, sagemaker_session=No
245244
"""
246245
try:
247246
body_dict = json.loads(
248-
S3Downloader.read_file(
247+
s3.S3Downloader.read_file(
249248
s3_uri=constraints_file_s3_uri, sagemaker_session=sagemaker_session
250249
)
251250
)
@@ -287,10 +286,10 @@ def from_string(
287286
"""
288287
sagemaker_session = sagemaker_session or Session()
289288
file_name = file_name or "constraints.json"
290-
desired_s3_uri = os.path.join(
289+
desired_s3_uri = s3.s3_path_join(
291290
"s3://", sagemaker_session.default_bucket(), "monitoring", str(uuid.uuid4()), file_name
292291
)
293-
s3_uri = S3Uploader.upload_string_as_file_body(
292+
s3_uri = s3.S3Uploader.upload_string_as_file_body(
294293
body=constraints_file_string,
295294
desired_s3_uri=desired_s3_uri,
296295
kms_key=kms_key,
@@ -399,7 +398,7 @@ def from_s3_uri(cls, constraint_violations_file_s3_uri, kms_key=None, sagemaker_
399398
"""
400399
try:
401400
body_dict = json.loads(
402-
S3Downloader.read_file(
401+
s3.S3Downloader.read_file(
403402
s3_uri=constraint_violations_file_s3_uri, sagemaker_session=sagemaker_session
404403
)
405404
)
@@ -442,10 +441,10 @@ def from_string(
442441
"""
443442
sagemaker_session = sagemaker_session or Session()
444443
file_name = file_name or "constraint_violations.json"
445-
desired_s3_uri = os.path.join(
444+
desired_s3_uri = s3.s3_path_join(
446445
"s3://", sagemaker_session.default_bucket(), "monitoring", str(uuid.uuid4()), file_name
447446
)
448-
s3_uri = S3Uploader.upload_string_as_file_body(
447+
s3_uri = s3.S3Uploader.upload_string_as_file_body(
449448
body=constraint_violations_file_string,
450449
desired_s3_uri=desired_s3_uri,
451450
kms_key=kms_key,

0 commit comments

Comments
 (0)