19
19
import copy
20
20
import json
21
21
import os
22
+ import pathlib
22
23
import logging
23
24
import uuid
24
25
25
26
from six import string_types
26
27
from six .moves .urllib .parse import urlparse
27
28
from botocore .exceptions import ClientError
28
29
29
- from sagemaker import image_uris
30
+ from sagemaker import image_uris , s3
30
31
from sagemaker .exceptions import UnexpectedStatusException
31
32
from sagemaker .model_monitor .monitoring_files import Constraints , ConstraintViolations , Statistics
32
33
from sagemaker .network import NetworkConfig
33
34
from sagemaker .processing import Processor , ProcessingInput , ProcessingJob , ProcessingOutput
34
- from sagemaker .s3 import S3Uploader
35
35
from sagemaker .session import Session
36
36
from sagemaker .utils import name_from_base , retries
37
37
@@ -809,8 +809,10 @@ def _normalize_endpoint_input(self, endpoint_input):
809
809
if isinstance (endpoint_input , string_types ):
810
810
endpoint_input = EndpointInput (
811
811
endpoint_name = endpoint_input ,
812
- destination = os .path .join (
813
- _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _CONTAINER_ENDPOINT_INPUT_PATH
812
+ destination = str (
813
+ pathlib .PurePosixPath (
814
+ _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _CONTAINER_ENDPOINT_INPUT_PATH
815
+ )
814
816
),
815
817
)
816
818
@@ -842,13 +844,13 @@ def _normalize_baseline_inputs(self, baseline_inputs=None):
842
844
# and save the S3 uri in the ProcessingInput source.
843
845
parse_result = urlparse (file_input .source )
844
846
if parse_result .scheme != "s3" :
845
- s3_uri = os . path . join (
847
+ s3_uri = s3 . s3_path_join (
846
848
"s3://" ,
847
849
self .sagemaker_session .default_bucket (),
848
850
self .latest_baselining_job_name ,
849
851
file_input .input_name ,
850
852
)
851
- S3Uploader .upload (
853
+ s3 . S3Uploader .upload (
852
854
local_path = file_input .source ,
853
855
desired_s3_uri = s3_uri ,
854
856
sagemaker_session = self .sagemaker_session ,
@@ -869,7 +871,7 @@ def _normalize_processing_output(self, output=None):
869
871
"""
870
872
# If the output is a string, turn it into a ProcessingOutput object.
871
873
if isinstance (output , string_types ):
872
- s3_uri = os . path . join (
874
+ s3_uri = s3 . s3_path_join (
873
875
"s3://" ,
874
876
self .sagemaker_session .default_bucket (),
875
877
self .latest_baselining_job_name ,
@@ -893,7 +895,7 @@ def _normalize_monitoring_output(self, output=None):
893
895
"""
894
896
# If the output is a string, turn it into a ProcessingOutput object.
895
897
if output .destination is None :
896
- output .destination = os . path . join (
898
+ output .destination = s3 . s3_path_join (
897
899
"s3://" ,
898
900
self .sagemaker_session .default_bucket (),
899
901
self .monitoring_schedule_name ,
@@ -914,7 +916,7 @@ def _s3_uri_from_local_path(self, path):
914
916
"""
915
917
parse_result = urlparse (path )
916
918
if parse_result .scheme != "s3" :
917
- s3_uri = os . path . join (
919
+ s3_uri = s3 . s3_path_join (
918
920
"s3://" ,
919
921
self .sagemaker_session .default_bucket (),
920
922
_MODEL_MONITOR_S3_PATH ,
@@ -923,10 +925,10 @@ def _s3_uri_from_local_path(self, path):
923
925
_INPUT_S3_PATH ,
924
926
str (uuid .uuid4 ()),
925
927
)
926
- S3Uploader .upload (
928
+ s3 . S3Uploader .upload (
927
929
local_path = path , desired_s3_uri = s3_uri , sagemaker_session = self .sagemaker_session
928
930
)
929
- path = os . path . join (s3_uri , os .path .basename (path ))
931
+ path = s3 . s3_path_join (s3_uri , os .path .basename (path ))
930
932
return path
931
933
932
934
def _wait_for_schedule_changes_to_apply (self ):
@@ -1074,8 +1076,10 @@ def suggest_baseline(
1074
1076
1075
1077
normalized_baseline_dataset_input = self ._upload_and_convert_to_processing_input (
1076
1078
source = baseline_dataset ,
1077
- destination = os .path .join (
1078
- _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _BASELINE_DATASET_INPUT_NAME
1079
+ destination = str (
1080
+ pathlib .PurePosixPath (
1081
+ _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _BASELINE_DATASET_INPUT_NAME
1082
+ )
1079
1083
),
1080
1084
name = _BASELINE_DATASET_INPUT_NAME ,
1081
1085
)
@@ -1085,34 +1089,44 @@ def suggest_baseline(
1085
1089
1086
1090
normalized_record_preprocessor_script_input = self ._upload_and_convert_to_processing_input (
1087
1091
source = record_preprocessor_script ,
1088
- destination = os .path .join (
1089
- _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME
1092
+ destination = str (
1093
+ pathlib .PurePosixPath (
1094
+ _CONTAINER_BASE_PATH ,
1095
+ _CONTAINER_INPUT_PATH ,
1096
+ _RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME ,
1097
+ )
1090
1098
),
1091
1099
name = _RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME ,
1092
1100
)
1093
1101
1094
1102
record_preprocessor_script_container_path = None
1095
1103
if normalized_record_preprocessor_script_input is not None :
1096
- record_preprocessor_script_container_path = os .path .join (
1097
- normalized_record_preprocessor_script_input .destination ,
1098
- os .path .basename (record_preprocessor_script ),
1104
+ record_preprocessor_script_container_path = str (
1105
+ pathlib .PurePosixPath (
1106
+ normalized_record_preprocessor_script_input .destination ,
1107
+ os .path .basename (record_preprocessor_script ),
1108
+ )
1099
1109
)
1100
1110
1101
1111
normalized_post_processor_script_input = self ._upload_and_convert_to_processing_input (
1102
1112
source = post_analytics_processor_script ,
1103
- destination = os .path .join (
1104
- _CONTAINER_BASE_PATH ,
1105
- _CONTAINER_INPUT_PATH ,
1106
- _POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME ,
1113
+ destination = str (
1114
+ pathlib .PurePosixPath (
1115
+ _CONTAINER_BASE_PATH ,
1116
+ _CONTAINER_INPUT_PATH ,
1117
+ _POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME ,
1118
+ )
1107
1119
),
1108
1120
name = _POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME ,
1109
1121
)
1110
1122
1111
1123
post_processor_script_container_path = None
1112
1124
if normalized_post_processor_script_input is not None :
1113
- post_processor_script_container_path = os .path .join (
1114
- normalized_post_processor_script_input .destination ,
1115
- os .path .basename (post_analytics_processor_script ),
1125
+ post_processor_script_container_path = str (
1126
+ pathlib .PurePosixPath (
1127
+ normalized_post_processor_script_input .destination ,
1128
+ os .path .basename (post_analytics_processor_script ),
1129
+ )
1116
1130
)
1117
1131
1118
1132
normalized_baseline_output = self ._normalize_baseline_output (output_s3_uri = output_s3_uri )
@@ -1631,7 +1645,7 @@ def _normalize_baseline_output(self, output_s3_uri=None):
1631
1645
sagemaker.processing.ProcessingOutput: The normalized ProcessingOutput object.
1632
1646
1633
1647
"""
1634
- s3_uri = output_s3_uri or os . path . join (
1648
+ s3_uri = output_s3_uri or s3 . s3_path_join (
1635
1649
"s3://" ,
1636
1650
self .sagemaker_session .default_bucket (),
1637
1651
_MODEL_MONITOR_S3_PATH ,
@@ -1640,7 +1654,7 @@ def _normalize_baseline_output(self, output_s3_uri=None):
1640
1654
_RESULTS_S3_PATH ,
1641
1655
)
1642
1656
return ProcessingOutput (
1643
- source = os . path . join (_CONTAINER_BASE_PATH , _CONTAINER_OUTPUT_PATH ),
1657
+ source = str ( pathlib . PurePosixPath (_CONTAINER_BASE_PATH , _CONTAINER_OUTPUT_PATH ) ),
1644
1658
destination = s3_uri ,
1645
1659
output_name = _DEFAULT_OUTPUT_NAME ,
1646
1660
)
@@ -1655,7 +1669,7 @@ def _normalize_monitoring_output(self, output_s3_uri=None):
1655
1669
sagemaker.model_monitor.MonitoringOutput: The normalized MonitoringOutput object.
1656
1670
1657
1671
"""
1658
- s3_uri = output_s3_uri or os . path . join (
1672
+ s3_uri = output_s3_uri or s3 . s3_path_join (
1659
1673
"s3://" ,
1660
1674
self .sagemaker_session .default_bucket (),
1661
1675
_MODEL_MONITOR_S3_PATH ,
@@ -1664,7 +1678,8 @@ def _normalize_monitoring_output(self, output_s3_uri=None):
1664
1678
_RESULTS_S3_PATH ,
1665
1679
)
1666
1680
output = MonitoringOutput (
1667
- source = os .path .join (_CONTAINER_BASE_PATH , _CONTAINER_OUTPUT_PATH ), destination = s3_uri
1681
+ source = str (pathlib .PurePosixPath (_CONTAINER_BASE_PATH , _CONTAINER_OUTPUT_PATH )),
1682
+ destination = s3_uri ,
1668
1683
)
1669
1684
1670
1685
return output
@@ -1741,7 +1756,7 @@ def _upload_and_convert_to_processing_input(self, source, destination, name):
1741
1756
parse_result = urlparse (url = source )
1742
1757
1743
1758
if parse_result .scheme != "s3" :
1744
- s3_uri = os . path . join (
1759
+ s3_uri = s3 . s3_path_join (
1745
1760
"s3://" ,
1746
1761
self .sagemaker_session .default_bucket (),
1747
1762
_MODEL_MONITOR_S3_PATH ,
@@ -1750,7 +1765,7 @@ def _upload_and_convert_to_processing_input(self, source, destination, name):
1750
1765
_INPUT_S3_PATH ,
1751
1766
name ,
1752
1767
)
1753
- S3Uploader .upload (
1768
+ s3 . S3Uploader .upload (
1754
1769
local_path = source , desired_s3_uri = s3_uri , sagemaker_session = self .sagemaker_session
1755
1770
)
1756
1771
source = s3_uri
@@ -1839,7 +1854,7 @@ def baseline_statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_k
1839
1854
try :
1840
1855
baselining_job_output_s3_path = self .outputs [0 ].destination
1841
1856
return Statistics .from_s3_uri (
1842
- statistics_file_s3_uri = os . path . join (baselining_job_output_s3_path , file_name ),
1857
+ statistics_file_s3_uri = s3 . s3_path_join (baselining_job_output_s3_path , file_name ),
1843
1858
kms_key = kms_key ,
1844
1859
sagemaker_session = self .sagemaker_session ,
1845
1860
)
@@ -1877,7 +1892,7 @@ def suggested_constraints(self, file_name=CONSTRAINTS_JSON_DEFAULT_FILE_NAME, km
1877
1892
try :
1878
1893
baselining_job_output_s3_path = self .outputs [0 ].destination
1879
1894
return Constraints .from_s3_uri (
1880
- constraints_file_s3_uri = os . path . join (baselining_job_output_s3_path , file_name ),
1895
+ constraints_file_s3_uri = s3 . s3_path_join (baselining_job_output_s3_path , file_name ),
1881
1896
kms_key = kms_key ,
1882
1897
sagemaker_session = self .sagemaker_session ,
1883
1898
)
@@ -1993,7 +2008,7 @@ def statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_key=None):
1993
2008
try :
1994
2009
baselining_job_output_s3_path = self .outputs [0 ].destination
1995
2010
return Statistics .from_s3_uri (
1996
- statistics_file_s3_uri = os . path . join (baselining_job_output_s3_path , file_name ),
2011
+ statistics_file_s3_uri = s3 . s3_path_join (baselining_job_output_s3_path , file_name ),
1997
2012
kms_key = kms_key ,
1998
2013
sagemaker_session = self .sagemaker_session ,
1999
2014
)
@@ -2033,7 +2048,7 @@ def constraint_violations(
2033
2048
try :
2034
2049
baselining_job_output_s3_path = self .outputs [0 ].destination
2035
2050
return ConstraintViolations .from_s3_uri (
2036
- constraint_violations_file_s3_uri = os . path . join (
2051
+ constraint_violations_file_s3_uri = s3 . s3_path_join (
2037
2052
baselining_job_output_s3_path , file_name
2038
2053
),
2039
2054
kms_key = kms_key ,
0 commit comments