Skip to content

Commit cef26ba

Browse files
committed
fix: use pathlib.PurePosixPath for S3 URLs and Unix paths
This is the Python 3.4+ fix that partially addressed by: - e86da62 - edd10aa - 06a00d4 - d2f3984 - 1463743
1 parent 95671e0 commit cef26ba

File tree

10 files changed

+147
-79
lines changed

10 files changed

+147
-79
lines changed

src/sagemaker/local/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from __future__ import absolute_import
1515

1616
import os
17+
import pathlib
1718
import shutil
1819

1920
from distutils.dir_util import copy_tree
@@ -84,8 +85,7 @@ def _create_s3_prefix(path, job_name):
8485
Returns:
8586
str: an S3 prefix of the form ``"path/job_name"``
8687
"""
87-
path = path.strip("/")
88-
return job_name if path == "" else "/".join((path, job_name))
88+
return job_name if path == "" else str(pathlib.PurePosixPath(path, job_name))
8989

9090

9191
def recursive_copy(source, destination):

src/sagemaker/model_monitor/data_capture_config.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
"""
1717
from __future__ import print_function, absolute_import
1818

19-
import os
20-
19+
from sagemaker import s3
2120
from sagemaker.session import Session
2221

2322
_MODEL_MONITOR_S3_PATH = "model-monitor"
@@ -67,7 +66,7 @@ def __init__(
6766
self.destination_s3_uri = destination_s3_uri
6867
if self.destination_s3_uri is None:
6968
sagemaker_session = sagemaker_session or Session()
70-
self.destination_s3_uri = os.path.join(
69+
self.destination_s3_uri = s3.s3_path_join(
7170
"s3://",
7271
sagemaker_session.default_bucket(),
7372
_MODEL_MONITOR_S3_PATH,

src/sagemaker/model_monitor/model_monitoring.py

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,19 @@
1919
import copy
2020
import json
2121
import os
22+
import pathlib
2223
import logging
2324
import uuid
2425

2526
from six import string_types
2627
from six.moves.urllib.parse import urlparse
2728
from botocore.exceptions import ClientError
2829

30+
from sagemaker import s3
2931
from sagemaker.exceptions import UnexpectedStatusException
3032
from sagemaker.model_monitor.monitoring_files import Constraints, ConstraintViolations, Statistics
3133
from sagemaker.network import NetworkConfig
3234
from sagemaker.processing import Processor, ProcessingInput, ProcessingJob, ProcessingOutput
33-
from sagemaker.s3 import S3Uploader
3435
from sagemaker.session import Session
3536
from sagemaker.utils import name_from_base, retries, get_ecr_image_uri_prefix
3637

@@ -829,8 +830,10 @@ def _normalize_endpoint_input(self, endpoint_input):
829830
if isinstance(endpoint_input, string_types):
830831
endpoint_input = EndpointInput(
831832
endpoint_name=endpoint_input,
832-
destination=os.path.join(
833-
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _CONTAINER_ENDPOINT_INPUT_PATH
833+
destination=str(
834+
pathlib.PurePosixPath(
835+
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _CONTAINER_ENDPOINT_INPUT_PATH
836+
)
834837
),
835838
)
836839

@@ -862,13 +865,13 @@ def _normalize_baseline_inputs(self, baseline_inputs=None):
862865
# and save the S3 uri in the ProcessingInput source.
863866
parse_result = urlparse(file_input.source)
864867
if parse_result.scheme != "s3":
865-
s3_uri = os.path.join(
868+
s3_uri = s3.s3_path_join(
866869
"s3://",
867870
self.sagemaker_session.default_bucket(),
868871
self.latest_baselining_job_name,
869872
file_input.input_name,
870873
)
871-
S3Uploader.upload(
874+
s3.S3Uploader.upload(
872875
local_path=file_input.source,
873876
desired_s3_uri=s3_uri,
874877
sagemaker_session=self.sagemaker_session,
@@ -889,7 +892,7 @@ def _normalize_processing_output(self, output=None):
889892
"""
890893
# If the output is a string, turn it into a ProcessingOutput object.
891894
if isinstance(output, string_types):
892-
s3_uri = os.path.join(
895+
s3_uri = s3.s3_path_join(
893896
"s3://",
894897
self.sagemaker_session.default_bucket(),
895898
self.latest_baselining_job_name,
@@ -913,7 +916,7 @@ def _normalize_monitoring_output(self, output=None):
913916
"""
914917
# If the output is a string, turn it into a ProcessingOutput object.
915918
if output.destination is None:
916-
output.destination = os.path.join(
919+
output.destination = s3.s3_path_join(
917920
"s3://",
918921
self.sagemaker_session.default_bucket(),
919922
self.monitoring_schedule_name,
@@ -934,7 +937,7 @@ def _s3_uri_from_local_path(self, path):
934937
"""
935938
parse_result = urlparse(path)
936939
if parse_result.scheme != "s3":
937-
s3_uri = os.path.join(
940+
s3_uri = s3.s3_path_join(
938941
"s3://",
939942
self.sagemaker_session.default_bucket(),
940943
_MODEL_MONITOR_S3_PATH,
@@ -943,10 +946,10 @@ def _s3_uri_from_local_path(self, path):
943946
_INPUT_S3_PATH,
944947
str(uuid.uuid4()),
945948
)
946-
S3Uploader.upload(
949+
s3.S3Uploader.upload(
947950
local_path=path, desired_s3_uri=s3_uri, sagemaker_session=self.sagemaker_session
948951
)
949-
path = os.path.join(s3_uri, os.path.basename(path))
952+
path = s3.s3_path_join(s3_uri, os.path.basename(path))
950953
return path
951954

952955
def _wait_for_schedule_changes_to_apply(self):
@@ -1094,8 +1097,10 @@ def suggest_baseline(
10941097

10951098
normalized_baseline_dataset_input = self._upload_and_convert_to_processing_input(
10961099
source=baseline_dataset,
1097-
destination=os.path.join(
1098-
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _BASELINE_DATASET_INPUT_NAME
1100+
destination=str(
1101+
pathlib.PurePosixPath(
1102+
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _BASELINE_DATASET_INPUT_NAME
1103+
)
10991104
),
11001105
name=_BASELINE_DATASET_INPUT_NAME,
11011106
)
@@ -1105,34 +1110,44 @@ def suggest_baseline(
11051110

11061111
normalized_record_preprocessor_script_input = self._upload_and_convert_to_processing_input(
11071112
source=record_preprocessor_script,
1108-
destination=os.path.join(
1109-
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME
1113+
destination=str(
1114+
pathlib.PurePosixPath(
1115+
_CONTAINER_BASE_PATH,
1116+
_CONTAINER_INPUT_PATH,
1117+
_RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME,
1118+
)
11101119
),
11111120
name=_RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME,
11121121
)
11131122

11141123
record_preprocessor_script_container_path = None
11151124
if normalized_record_preprocessor_script_input is not None:
1116-
record_preprocessor_script_container_path = os.path.join(
1117-
normalized_record_preprocessor_script_input.destination,
1118-
os.path.basename(record_preprocessor_script),
1125+
record_preprocessor_script_container_path = str(
1126+
pathlib.PurePosixPath(
1127+
normalized_record_preprocessor_script_input.destination,
1128+
os.path.basename(record_preprocessor_script),
1129+
)
11191130
)
11201131

11211132
normalized_post_processor_script_input = self._upload_and_convert_to_processing_input(
11221133
source=post_analytics_processor_script,
1123-
destination=os.path.join(
1124-
_CONTAINER_BASE_PATH,
1125-
_CONTAINER_INPUT_PATH,
1126-
_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
1134+
destination=str(
1135+
pathlib.PurePosixPath(
1136+
_CONTAINER_BASE_PATH,
1137+
_CONTAINER_INPUT_PATH,
1138+
_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
1139+
)
11271140
),
11281141
name=_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
11291142
)
11301143

11311144
post_processor_script_container_path = None
11321145
if normalized_post_processor_script_input is not None:
1133-
post_processor_script_container_path = os.path.join(
1134-
normalized_post_processor_script_input.destination,
1135-
os.path.basename(post_analytics_processor_script),
1146+
post_processor_script_container_path = str(
1147+
pathlib.PurePosixPath(
1148+
normalized_post_processor_script_input.destination,
1149+
os.path.basename(post_analytics_processor_script),
1150+
)
11361151
)
11371152

11381153
normalized_baseline_output = self._normalize_baseline_output(output_s3_uri=output_s3_uri)
@@ -1651,7 +1666,7 @@ def _normalize_baseline_output(self, output_s3_uri=None):
16511666
sagemaker.processing.ProcessingOutput: The normalized ProcessingOutput object.
16521667
16531668
"""
1654-
s3_uri = output_s3_uri or os.path.join(
1669+
s3_uri = output_s3_uri or s3.s3_path_join(
16551670
"s3://",
16561671
self.sagemaker_session.default_bucket(),
16571672
_MODEL_MONITOR_S3_PATH,
@@ -1660,7 +1675,7 @@ def _normalize_baseline_output(self, output_s3_uri=None):
16601675
_RESULTS_S3_PATH,
16611676
)
16621677
return ProcessingOutput(
1663-
source=os.path.join(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH),
1678+
source=str(pathlib.PurePosixPath(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH)),
16641679
destination=s3_uri,
16651680
output_name=_DEFAULT_OUTPUT_NAME,
16661681
)
@@ -1675,7 +1690,7 @@ def _normalize_monitoring_output(self, output_s3_uri=None):
16751690
sagemaker.model_monitor.MonitoringOutput: The normalized MonitoringOutput object.
16761691
16771692
"""
1678-
s3_uri = output_s3_uri or os.path.join(
1693+
s3_uri = output_s3_uri or s3.s3_path_join(
16791694
"s3://",
16801695
self.sagemaker_session.default_bucket(),
16811696
_MODEL_MONITOR_S3_PATH,
@@ -1684,7 +1699,8 @@ def _normalize_monitoring_output(self, output_s3_uri=None):
16841699
_RESULTS_S3_PATH,
16851700
)
16861701
output = MonitoringOutput(
1687-
source=os.path.join(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH), destination=s3_uri
1702+
source=str(pathlib.PurePosixPath(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH)),
1703+
destination=s3_uri,
16881704
)
16891705

16901706
return output
@@ -1761,7 +1777,7 @@ def _upload_and_convert_to_processing_input(self, source, destination, name):
17611777
parse_result = urlparse(url=source)
17621778

17631779
if parse_result.scheme != "s3":
1764-
s3_uri = os.path.join(
1780+
s3_uri = s3.s3_path_join(
17651781
"s3://",
17661782
self.sagemaker_session.default_bucket(),
17671783
_MODEL_MONITOR_S3_PATH,
@@ -1770,7 +1786,7 @@ def _upload_and_convert_to_processing_input(self, source, destination, name):
17701786
_INPUT_S3_PATH,
17711787
name,
17721788
)
1773-
S3Uploader.upload(
1789+
s3.S3Uploader.upload(
17741790
local_path=source, desired_s3_uri=s3_uri, sagemaker_session=self.sagemaker_session
17751791
)
17761792
source = s3_uri
@@ -1861,7 +1877,7 @@ def baseline_statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_k
18611877
try:
18621878
baselining_job_output_s3_path = self.outputs[0].destination
18631879
return Statistics.from_s3_uri(
1864-
statistics_file_s3_uri=os.path.join(baselining_job_output_s3_path, file_name),
1880+
statistics_file_s3_uri=s3.s3_path_join(baselining_job_output_s3_path, file_name),
18651881
kms_key=kms_key,
18661882
sagemaker_session=self.sagemaker_session,
18671883
)
@@ -1899,7 +1915,7 @@ def suggested_constraints(self, file_name=CONSTRAINTS_JSON_DEFAULT_FILE_NAME, km
18991915
try:
19001916
baselining_job_output_s3_path = self.outputs[0].destination
19011917
return Constraints.from_s3_uri(
1902-
constraints_file_s3_uri=os.path.join(baselining_job_output_s3_path, file_name),
1918+
constraints_file_s3_uri=s3.s3_path_join(baselining_job_output_s3_path, file_name),
19031919
kms_key=kms_key,
19041920
sagemaker_session=self.sagemaker_session,
19051921
)
@@ -2015,7 +2031,7 @@ def statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_key=None):
20152031
try:
20162032
baselining_job_output_s3_path = self.outputs[0].destination
20172033
return Statistics.from_s3_uri(
2018-
statistics_file_s3_uri=os.path.join(baselining_job_output_s3_path, file_name),
2034+
statistics_file_s3_uri=s3.s3_path_join(baselining_job_output_s3_path, file_name),
20192035
kms_key=kms_key,
20202036
sagemaker_session=self.sagemaker_session,
20212037
)
@@ -2055,7 +2071,7 @@ def constraint_violations(
20552071
try:
20562072
baselining_job_output_s3_path = self.outputs[0].destination
20572073
return ConstraintViolations.from_s3_uri(
2058-
constraint_violations_file_s3_uri=os.path.join(
2074+
constraint_violations_file_s3_uri=s3.s3_path_join(
20592075
baselining_job_output_s3_path, file_name
20602076
),
20612077
kms_key=kms_key,

src/sagemaker/model_monitor/monitoring_files.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,8 @@
2222

2323
from botocore.exceptions import ClientError
2424

25+
from sagemaker import s3
2526
from sagemaker.session import Session
26-
from sagemaker.s3 import S3Downloader
27-
from sagemaker.s3 import S3Uploader
2827

2928
NO_SUCH_KEY_CODE = "NoSuchKey"
3029

@@ -68,7 +67,7 @@ def save(self, new_save_location_s3_uri=None):
6867
if new_save_location_s3_uri is not None:
6968
self.file_s3_uri = new_save_location_s3_uri
7069

71-
return S3Uploader.upload_string_as_file_body(
70+
return s3.S3Uploader.upload_string_as_file_body(
7271
body=json.dumps(self.body_dict),
7372
desired_s3_uri=self.file_s3_uri,
7473
kms_key=self.kms_key,
@@ -119,7 +118,7 @@ def from_s3_uri(cls, statistics_file_s3_uri, kms_key=None, sagemaker_session=Non
119118
"""
120119
try:
121120
body_dict = json.loads(
122-
S3Downloader.read_file(
121+
s3.S3Downloader.read_file(
123122
s3_uri=statistics_file_s3_uri, sagemaker_session=sagemaker_session
124123
)
125124
)
@@ -158,10 +157,10 @@ def from_string(
158157
"""
159158
sagemaker_session = sagemaker_session or Session()
160159
file_name = file_name or "statistics.json"
161-
desired_s3_uri = os.path.join(
160+
desired_s3_uri = s3.s3_path_join(
162161
"s3://", sagemaker_session.default_bucket(), "monitoring", str(uuid.uuid4()), file_name
163162
)
164-
s3_uri = S3Uploader.upload_string_as_file_body(
163+
s3_uri = s3.S3Uploader.upload_string_as_file_body(
165164
body=statistics_file_string,
166165
desired_s3_uri=desired_s3_uri,
167166
kms_key=kms_key,
@@ -245,7 +244,7 @@ def from_s3_uri(cls, constraints_file_s3_uri, kms_key=None, sagemaker_session=No
245244
"""
246245
try:
247246
body_dict = json.loads(
248-
S3Downloader.read_file(
247+
s3.S3Downloader.read_file(
249248
s3_uri=constraints_file_s3_uri, sagemaker_session=sagemaker_session
250249
)
251250
)
@@ -287,10 +286,10 @@ def from_string(
287286
"""
288287
sagemaker_session = sagemaker_session or Session()
289288
file_name = file_name or "constraints.json"
290-
desired_s3_uri = os.path.join(
289+
desired_s3_uri = s3.s3_path_join(
291290
"s3://", sagemaker_session.default_bucket(), "monitoring", str(uuid.uuid4()), file_name
292291
)
293-
s3_uri = S3Uploader.upload_string_as_file_body(
292+
s3_uri = s3.S3Uploader.upload_string_as_file_body(
294293
body=constraints_file_string,
295294
desired_s3_uri=desired_s3_uri,
296295
kms_key=kms_key,
@@ -399,7 +398,7 @@ def from_s3_uri(cls, constraint_violations_file_s3_uri, kms_key=None, sagemaker_
399398
"""
400399
try:
401400
body_dict = json.loads(
402-
S3Downloader.read_file(
401+
s3.S3Downloader.read_file(
403402
s3_uri=constraint_violations_file_s3_uri, sagemaker_session=sagemaker_session
404403
)
405404
)
@@ -442,10 +441,10 @@ def from_string(
442441
"""
443442
sagemaker_session = sagemaker_session or Session()
444443
file_name = file_name or "constraint_violations.json"
445-
desired_s3_uri = os.path.join(
444+
desired_s3_uri = s3.s3_path_join(
446445
"s3://", sagemaker_session.default_bucket(), "monitoring", str(uuid.uuid4()), file_name
447446
)
448-
s3_uri = S3Uploader.upload_string_as_file_body(
447+
s3_uri = s3.S3Uploader.upload_string_as_file_body(
449448
body=constraint_violations_file_string,
450449
desired_s3_uri=desired_s3_uri,
451450
kms_key=kms_key,

0 commit comments

Comments
 (0)