19
19
import copy
20
20
import json
21
21
import os
22
+ import pathlib
22
23
import logging
23
24
import uuid
24
25
25
26
from six import string_types
26
27
from six .moves .urllib .parse import urlparse
27
28
from botocore .exceptions import ClientError
28
29
30
+ from sagemaker import s3
29
31
from sagemaker .exceptions import UnexpectedStatusException
30
32
from sagemaker .model_monitor .monitoring_files import Constraints , ConstraintViolations , Statistics
31
33
from sagemaker .network import NetworkConfig
32
34
from sagemaker .processing import Processor , ProcessingInput , ProcessingJob , ProcessingOutput
33
- from sagemaker .s3 import S3Uploader
34
35
from sagemaker .session import Session
35
36
from sagemaker .utils import name_from_base , retries , get_ecr_image_uri_prefix
36
37
@@ -829,8 +830,10 @@ def _normalize_endpoint_input(self, endpoint_input):
829
830
if isinstance (endpoint_input , string_types ):
830
831
endpoint_input = EndpointInput (
831
832
endpoint_name = endpoint_input ,
832
- destination = os .path .join (
833
- _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _CONTAINER_ENDPOINT_INPUT_PATH
833
+ destination = str (
834
+ pathlib .PurePosixPath (
835
+ _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _CONTAINER_ENDPOINT_INPUT_PATH
836
+ )
834
837
),
835
838
)
836
839
@@ -862,13 +865,13 @@ def _normalize_baseline_inputs(self, baseline_inputs=None):
862
865
# and save the S3 uri in the ProcessingInput source.
863
866
parse_result = urlparse (file_input .source )
864
867
if parse_result .scheme != "s3" :
865
- s3_uri = os . path . join (
868
+ s3_uri = s3 . s3_path_join (
866
869
"s3://" ,
867
870
self .sagemaker_session .default_bucket (),
868
871
self .latest_baselining_job_name ,
869
872
file_input .input_name ,
870
873
)
871
- S3Uploader .upload (
874
+ s3 . S3Uploader .upload (
872
875
local_path = file_input .source ,
873
876
desired_s3_uri = s3_uri ,
874
877
sagemaker_session = self .sagemaker_session ,
@@ -889,7 +892,7 @@ def _normalize_processing_output(self, output=None):
889
892
"""
890
893
# If the output is a string, turn it into a ProcessingOutput object.
891
894
if isinstance (output , string_types ):
892
- s3_uri = os . path . join (
895
+ s3_uri = s3 . s3_path_join (
893
896
"s3://" ,
894
897
self .sagemaker_session .default_bucket (),
895
898
self .latest_baselining_job_name ,
@@ -913,7 +916,7 @@ def _normalize_monitoring_output(self, output=None):
913
916
"""
914
917
# If the output is a string, turn it into a ProcessingOutput object.
915
918
if output .destination is None :
916
- output .destination = os . path . join (
919
+ output .destination = s3 . s3_path_join (
917
920
"s3://" ,
918
921
self .sagemaker_session .default_bucket (),
919
922
self .monitoring_schedule_name ,
@@ -934,7 +937,7 @@ def _s3_uri_from_local_path(self, path):
934
937
"""
935
938
parse_result = urlparse (path )
936
939
if parse_result .scheme != "s3" :
937
- s3_uri = os . path . join (
940
+ s3_uri = s3 . s3_path_join (
938
941
"s3://" ,
939
942
self .sagemaker_session .default_bucket (),
940
943
_MODEL_MONITOR_S3_PATH ,
@@ -943,10 +946,10 @@ def _s3_uri_from_local_path(self, path):
943
946
_INPUT_S3_PATH ,
944
947
str (uuid .uuid4 ()),
945
948
)
946
- S3Uploader .upload (
949
+ s3 . S3Uploader .upload (
947
950
local_path = path , desired_s3_uri = s3_uri , sagemaker_session = self .sagemaker_session
948
951
)
949
- path = os . path . join (s3_uri , os .path .basename (path ))
952
+ path = s3 . s3_path_join (s3_uri , os .path .basename (path ))
950
953
return path
951
954
952
955
def _wait_for_schedule_changes_to_apply (self ):
@@ -1094,8 +1097,10 @@ def suggest_baseline(
1094
1097
1095
1098
normalized_baseline_dataset_input = self ._upload_and_convert_to_processing_input (
1096
1099
source = baseline_dataset ,
1097
- destination = os .path .join (
1098
- _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _BASELINE_DATASET_INPUT_NAME
1100
+ destination = str (
1101
+ pathlib .PurePosixPath (
1102
+ _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _BASELINE_DATASET_INPUT_NAME
1103
+ )
1099
1104
),
1100
1105
name = _BASELINE_DATASET_INPUT_NAME ,
1101
1106
)
@@ -1105,34 +1110,44 @@ def suggest_baseline(
1105
1110
1106
1111
normalized_record_preprocessor_script_input = self ._upload_and_convert_to_processing_input (
1107
1112
source = record_preprocessor_script ,
1108
- destination = os .path .join (
1109
- _CONTAINER_BASE_PATH , _CONTAINER_INPUT_PATH , _RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME
1113
+ destination = str (
1114
+ pathlib .PurePosixPath (
1115
+ _CONTAINER_BASE_PATH ,
1116
+ _CONTAINER_INPUT_PATH ,
1117
+ _RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME ,
1118
+ )
1110
1119
),
1111
1120
name = _RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME ,
1112
1121
)
1113
1122
1114
1123
record_preprocessor_script_container_path = None
1115
1124
if normalized_record_preprocessor_script_input is not None :
1116
- record_preprocessor_script_container_path = os .path .join (
1117
- normalized_record_preprocessor_script_input .destination ,
1118
- os .path .basename (record_preprocessor_script ),
1125
+ record_preprocessor_script_container_path = str (
1126
+ pathlib .PurePosixPath (
1127
+ normalized_record_preprocessor_script_input .destination ,
1128
+ os .path .basename (record_preprocessor_script ),
1129
+ )
1119
1130
)
1120
1131
1121
1132
normalized_post_processor_script_input = self ._upload_and_convert_to_processing_input (
1122
1133
source = post_analytics_processor_script ,
1123
- destination = os .path .join (
1124
- _CONTAINER_BASE_PATH ,
1125
- _CONTAINER_INPUT_PATH ,
1126
- _POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME ,
1134
+ destination = str (
1135
+ pathlib .PurePosixPath (
1136
+ _CONTAINER_BASE_PATH ,
1137
+ _CONTAINER_INPUT_PATH ,
1138
+ _POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME ,
1139
+ )
1127
1140
),
1128
1141
name = _POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME ,
1129
1142
)
1130
1143
1131
1144
post_processor_script_container_path = None
1132
1145
if normalized_post_processor_script_input is not None :
1133
- post_processor_script_container_path = os .path .join (
1134
- normalized_post_processor_script_input .destination ,
1135
- os .path .basename (post_analytics_processor_script ),
1146
+ post_processor_script_container_path = str (
1147
+ pathlib .PurePosixPath (
1148
+ normalized_post_processor_script_input .destination ,
1149
+ os .path .basename (post_analytics_processor_script ),
1150
+ )
1136
1151
)
1137
1152
1138
1153
normalized_baseline_output = self ._normalize_baseline_output (output_s3_uri = output_s3_uri )
@@ -1651,7 +1666,7 @@ def _normalize_baseline_output(self, output_s3_uri=None):
1651
1666
sagemaker.processing.ProcessingOutput: The normalized ProcessingOutput object.
1652
1667
1653
1668
"""
1654
- s3_uri = output_s3_uri or os . path . join (
1669
+ s3_uri = output_s3_uri or s3 . s3_path_join (
1655
1670
"s3://" ,
1656
1671
self .sagemaker_session .default_bucket (),
1657
1672
_MODEL_MONITOR_S3_PATH ,
@@ -1660,7 +1675,7 @@ def _normalize_baseline_output(self, output_s3_uri=None):
1660
1675
_RESULTS_S3_PATH ,
1661
1676
)
1662
1677
return ProcessingOutput (
1663
- source = os . path . join (_CONTAINER_BASE_PATH , _CONTAINER_OUTPUT_PATH ),
1678
+ source = str ( pathlib . PurePosixPath (_CONTAINER_BASE_PATH , _CONTAINER_OUTPUT_PATH ) ),
1664
1679
destination = s3_uri ,
1665
1680
output_name = _DEFAULT_OUTPUT_NAME ,
1666
1681
)
@@ -1675,7 +1690,7 @@ def _normalize_monitoring_output(self, output_s3_uri=None):
1675
1690
sagemaker.model_monitor.MonitoringOutput: The normalized MonitoringOutput object.
1676
1691
1677
1692
"""
1678
- s3_uri = output_s3_uri or os . path . join (
1693
+ s3_uri = output_s3_uri or s3 . s3_path_join (
1679
1694
"s3://" ,
1680
1695
self .sagemaker_session .default_bucket (),
1681
1696
_MODEL_MONITOR_S3_PATH ,
@@ -1684,7 +1699,8 @@ def _normalize_monitoring_output(self, output_s3_uri=None):
1684
1699
_RESULTS_S3_PATH ,
1685
1700
)
1686
1701
output = MonitoringOutput (
1687
- source = os .path .join (_CONTAINER_BASE_PATH , _CONTAINER_OUTPUT_PATH ), destination = s3_uri
1702
+ source = str (pathlib .PurePosixPath (_CONTAINER_BASE_PATH , _CONTAINER_OUTPUT_PATH )),
1703
+ destination = s3_uri ,
1688
1704
)
1689
1705
1690
1706
return output
@@ -1761,7 +1777,7 @@ def _upload_and_convert_to_processing_input(self, source, destination, name):
1761
1777
parse_result = urlparse (url = source )
1762
1778
1763
1779
if parse_result .scheme != "s3" :
1764
- s3_uri = os . path . join (
1780
+ s3_uri = s3 . s3_path_join (
1765
1781
"s3://" ,
1766
1782
self .sagemaker_session .default_bucket (),
1767
1783
_MODEL_MONITOR_S3_PATH ,
@@ -1770,7 +1786,7 @@ def _upload_and_convert_to_processing_input(self, source, destination, name):
1770
1786
_INPUT_S3_PATH ,
1771
1787
name ,
1772
1788
)
1773
- S3Uploader .upload (
1789
+ s3 . S3Uploader .upload (
1774
1790
local_path = source , desired_s3_uri = s3_uri , sagemaker_session = self .sagemaker_session
1775
1791
)
1776
1792
source = s3_uri
@@ -1861,7 +1877,7 @@ def baseline_statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_k
1861
1877
try :
1862
1878
baselining_job_output_s3_path = self .outputs [0 ].destination
1863
1879
return Statistics .from_s3_uri (
1864
- statistics_file_s3_uri = os . path . join (baselining_job_output_s3_path , file_name ),
1880
+ statistics_file_s3_uri = s3 . s3_path_join (baselining_job_output_s3_path , file_name ),
1865
1881
kms_key = kms_key ,
1866
1882
sagemaker_session = self .sagemaker_session ,
1867
1883
)
@@ -1899,7 +1915,7 @@ def suggested_constraints(self, file_name=CONSTRAINTS_JSON_DEFAULT_FILE_NAME, km
1899
1915
try :
1900
1916
baselining_job_output_s3_path = self .outputs [0 ].destination
1901
1917
return Constraints .from_s3_uri (
1902
- constraints_file_s3_uri = os . path . join (baselining_job_output_s3_path , file_name ),
1918
+ constraints_file_s3_uri = s3 . s3_path_join (baselining_job_output_s3_path , file_name ),
1903
1919
kms_key = kms_key ,
1904
1920
sagemaker_session = self .sagemaker_session ,
1905
1921
)
@@ -2015,7 +2031,7 @@ def statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_key=None):
2015
2031
try :
2016
2032
baselining_job_output_s3_path = self .outputs [0 ].destination
2017
2033
return Statistics .from_s3_uri (
2018
- statistics_file_s3_uri = os . path . join (baselining_job_output_s3_path , file_name ),
2034
+ statistics_file_s3_uri = s3 . s3_path_join (baselining_job_output_s3_path , file_name ),
2019
2035
kms_key = kms_key ,
2020
2036
sagemaker_session = self .sagemaker_session ,
2021
2037
)
@@ -2055,7 +2071,7 @@ def constraint_violations(
2055
2071
try :
2056
2072
baselining_job_output_s3_path = self .outputs [0 ].destination
2057
2073
return ConstraintViolations .from_s3_uri (
2058
- constraint_violations_file_s3_uri = os . path . join (
2074
+ constraint_violations_file_s3_uri = s3 . s3_path_join (
2059
2075
baselining_job_output_s3_path , file_name
2060
2076
),
2061
2077
kms_key = kms_key ,
0 commit comments