Skip to content

Commit d979d1b

Browse files
Ruban Hussainknikure
authored andcommitted
change: SDK Defaults - switch from config printing to logging, avoid logging ARNs by default, and respect logging setup by the user
1 parent 32475b9 commit d979d1b

File tree

4 files changed

+232
-117
lines changed

4 files changed

+232
-117
lines changed

src/sagemaker/config/config.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from __future__ import absolute_import
2020

2121
import pathlib
22-
import logging
2322
import os
2423
from typing import List
2524
import boto3
@@ -29,8 +28,9 @@
2928
from botocore.utils import merge_dicts
3029
from six.moves.urllib.parse import urlparse
3130
from sagemaker.config.config_schema import SAGEMAKER_PYTHON_SDK_CONFIG_SCHEMA
31+
from sagemaker.config.config_utils import get_sagemaker_config_logger
3232

33-
logger = logging.getLogger("sagemaker")
33+
logger = get_sagemaker_config_logger()
3434

3535
_APP_NAME = "sagemaker"
3636
# The default config file location of the Administrator provided config file. This path can be
@@ -122,6 +122,9 @@ def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource
122122
if config_from_file:
123123
validate_sagemaker_config(config_from_file)
124124
merge_dicts(merged_config, config_from_file)
125+
logger.info("Fetched defaults config from location: %s", file_path)
126+
else:
127+
logger.debug("Fetched defaults config from location: %s, but it was empty", file_path)
125128
return merged_config
126129

127130

@@ -148,7 +151,7 @@ def _load_config_from_file(file_path: str) -> dict:
148151
f"Unable to load the config file from the location: {file_path}"
149152
f"Provide a valid file path"
150153
)
151-
logger.debug("Fetching config file from the path: %s", file_path)
154+
logger.debug("Fetching defaults config from location: %s", file_path)
152155
return yaml.safe_load(open(inferred_file_path, "r"))
153156

154157

@@ -164,7 +167,7 @@ def _load_config_from_s3(s3_uri, s3_resource_for_config) -> dict:
164167
)
165168
s3_resource_for_config = boto_session.resource("s3", region_name=boto_region_name)
166169

167-
logger.debug("Fetching config file from the S3 URI: %s", s3_uri)
170+
logger.debug("Fetching defaults config from location: %s", s3_uri)
168171
inferred_s3_uri = _get_inferred_s3_uri(s3_uri, s3_resource_for_config)
169172
parsed_url = urlparse(inferred_s3_uri)
170173
bucket, key_prefix = parsed_url.netloc, parsed_url.path.lstrip("/")

src/sagemaker/config/config_utils.py

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
"""This file contains util functions for the sagemaker Defaults Config.
14+
15+
These utils may be used inside or outside the config module.
16+
"""
17+
from __future__ import absolute_import
18+
19+
import logging
20+
import sys
21+
22+
23+
def get_sagemaker_config_logger():
24+
"""Return a logger with the name 'sagemaker.config'
25+
26+
If the logger to be returned has no level or handlers set, this will get level and handler
27+
attributes. (So if the SDK user has setup loggers in a certain way, that setup will not be
28+
changed by this function.) It is safe to make repeat calls to this function.
29+
"""
30+
sagemaker_config_logger = logging.getLogger("sagemaker.config")
31+
sagemaker_logger = logging.getLogger("sagemaker")
32+
33+
if sagemaker_config_logger.level == logging.NOTSET:
34+
sagemaker_config_logger.setLevel(logging.INFO)
35+
36+
# check sagemaker_logger here as well, so that if handlers were set for the parent logger
37+
# already, we dont change behavior for the child logger
38+
if len(sagemaker_config_logger.handlers) == 0 and len(sagemaker_logger.handlers) == 0:
39+
# use sys.stdout so logs dont show up with a red background in a notebook
40+
handler = logging.StreamHandler(sys.stdout)
41+
42+
formatter = logging.Formatter("%(name)s %(levelname)-4s - %(message)s")
43+
handler.setFormatter(formatter)
44+
sagemaker_config_logger.addHandler(handler)
45+
46+
# if a handler is being added, we dont want the root handler to also process the same events
47+
sagemaker_config_logger.propagate = False
48+
49+
return sagemaker_config_logger
50+
51+
52+
def _log_sagemaker_config_single_substitution(source_value, config_value, config_key_path: str):
53+
"""Informs the SDK user whether a config value was present and automatically substituted
54+
55+
Args:
56+
source_value: The value that will be used if it exists. Usually, this is user-provided
57+
input to a Class or to a session.py method, or None if no input was provided.
58+
config_value: The value fetched from sagemaker_config. If it exists, this is the value that
59+
will be used if direct_input is None.
60+
config_key_path: A string denoting the path of keys that point to the config value in the
61+
sagemaker_config.
62+
63+
Returns:
64+
None. Logs information to the "sagemaker.config" logger.
65+
"""
66+
logger = get_sagemaker_config_logger()
67+
68+
if config_value is not None:
69+
70+
if source_value is None:
71+
# Sagemaker Config value is going to be used. By default the user should know about
72+
# this scenario because the behavior they expect could change because of a new config
73+
# value being injected in.
74+
# However, it may not be safe to log ARNs to stdout by default. We can include more
75+
# diagnostic info if the user enabled DEBUG logs though.
76+
if logger.isEnabledFor(logging.DEBUG):
77+
logger.debug(
78+
"Applied value\n config key = %s\n config value that will be used = %s",
79+
config_key_path,
80+
config_value,
81+
)
82+
else:
83+
logger.info(
84+
"Applied value from config key = %s",
85+
config_key_path,
86+
)
87+
88+
# The cases below here are logged as just debug statements because this info can be useful
89+
# when debugging the config, but should not affect current behavior with/without the config.
90+
91+
elif source_value is not None and config_value == source_value:
92+
# Sagemaker Config had a value defined that is NOT going to be used here.
93+
# Either (1) the config value was already fetched and applied earlier, or
94+
# (2) the user happened to pass in the same value.
95+
logger.debug(
96+
(
97+
"Skipped value\n"
98+
" config key = %s\n"
99+
" config value = %s\n"
100+
" source value that will be used = %s"
101+
),
102+
config_key_path,
103+
config_value,
104+
source_value,
105+
)
106+
elif source_value is not None and config_value != source_value:
107+
# Sagemaker Config had a value defined that is NOT going to be used
108+
# and the config value has not already been applied earlier (we know because the values
109+
# are different).
110+
logger.debug(
111+
(
112+
"Skipped value\n"
113+
" config key = %s\n"
114+
" config value = %s\n"
115+
" source value that will be used = %s",
116+
),
117+
config_key_path,
118+
config_value,
119+
source_value,
120+
)
121+
else:
122+
# nothing was specified in the config and nothing is being automatically applied
123+
logger.debug("Skipped value because no value defined\n config key = %s", config_key_path)
124+
125+
126+
def _log_sagemaker_config_merge(
127+
source_value=None,
128+
config_value=None,
129+
merged_source_and_config_value=None,
130+
config_key_path: str = None,
131+
):
132+
"""Informs the SDK user whether a config value was present and automatically substituted
133+
134+
Args:
135+
source_value: The dict or object that would be used if no default values existed. Usually,
136+
this is user-provided input to a Class or to a session.py method, or None if no input
137+
was provided.
138+
config_value: The dict or object fetched from sagemaker_config. If it exists, this is the
139+
value that will be used if source_value is None.
140+
merged_source_and_config_value: The value that results from the merging of source_value and
141+
original_config_value. This will be the value used.
142+
config_key_path: A string denoting the path of keys that point to the config value in the
143+
sagemaker_config.
144+
145+
Returns:
146+
None. Logs information to the "sagemaker.config" logger.
147+
"""
148+
logger = get_sagemaker_config_logger()
149+
150+
if config_value:
151+
152+
if source_value != merged_source_and_config_value:
153+
# Sagemaker Config value(s) were used and affected the final object/dictionary. By
154+
# default the user should know about this scenario because the behavior they expect
155+
# could change because of new config values being injected in.
156+
# However, it may not be safe to log ARNs to stdout by default. We can include more
157+
# diagnostic info if the user enabled DEBUG logs though.
158+
if logger.isEnabledFor(logging.DEBUG):
159+
logger.debug(
160+
(
161+
"Applied value(s)\n"
162+
" config key = %s\n"
163+
" config value = %s\n"
164+
" source value = %s\n"
165+
" combined value that will be used = %s"
166+
),
167+
config_key_path,
168+
config_value,
169+
source_value,
170+
merged_source_and_config_value,
171+
)
172+
else:
173+
logger.info(
174+
"Applied value(s) from config key = %s",
175+
config_key_path,
176+
)
177+
178+
# The cases below here are logged as just debug statements because this info can be useful
179+
# when debugging the config, but should not affect current behavior with/without the config.
180+
181+
else:
182+
# Sagemaker Config had a value defined that is NOT going to be used here.
183+
# Either (1) the config value was already fetched and applied earlier, or
184+
# (2) the user happened to pass in the same values.
185+
logger.debug(
186+
(
187+
"Skipped value(s)\n"
188+
" config key = %s\n"
189+
" config value = %s\n"
190+
" source value that will be used = %s"
191+
),
192+
config_key_path,
193+
config_value,
194+
merged_source_and_config_value,
195+
)
196+
197+
else:
198+
# nothing was specified in the config and nothing is being automatically applied
199+
logger.debug("Skipped value because no value defined\n config key = %s", config_key_path)

src/sagemaker/session.py

Lines changed: 6 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
SESSION_DEFAULT_S3_BUCKET_PATH,
9696
SESSION_DEFAULT_S3_OBJECT_KEY_PREFIX_PATH,
9797
)
98+
from sagemaker.config.config_utils import _log_sagemaker_config_merge
9899
from sagemaker.deprecations import deprecated_class
99100
from sagemaker.inputs import ShuffleConfig, TrainingInput, BatchDataCaptureConfig
100101
from sagemaker.user_agent import prepend_user_agent
@@ -606,47 +607,6 @@ def _create_s3_bucket_if_it_does_not_exist(self, bucket_name, region):
606607
else:
607608
raise
608609

609-
def _print_message_on_sagemaker_config_usage(
610-
self, direct_input, config_value, config_path: str
611-
):
612-
"""Informs the SDK user whether a config value was present and automatically substituted
613-
614-
Args:
615-
direct_input: the value that would be used if no sagemaker_config or default values
616-
existed. Usually this will be user-provided input to a Class or to a
617-
session.py method, or None if no input was provided.
618-
config_value: the value fetched from sagemaker_config. This is usually the value that
619-
will be used if direct_input is None.
620-
config_path: a string denoting the path of keys that point to the config value in the
621-
sagemaker_config
622-
623-
Returns:
624-
No output (just prints information)
625-
"""
626-
627-
if config_value is not None:
628-
629-
if direct_input is not None and config_value != direct_input:
630-
# Sagemaker Config had a value defined that is NOT going to be used
631-
# and the config value has not already been applied earlier
632-
print(
633-
"[Sagemaker Config - skipped value]\n",
634-
"config key = {}\n".format(config_path),
635-
"config value = {}\n".format(config_value),
636-
"specified value that will be used = {}\n".format(direct_input),
637-
)
638-
639-
elif direct_input is None:
640-
# Sagemaker Config value is going to be used
641-
print(
642-
"[Sagemaker Config - applied value]\n",
643-
"config key = {}\n".format(config_path),
644-
"config value that will be used = {}\n".format(config_value),
645-
)
646-
647-
# There is no print statement needed if nothing was specified in the config and nothing is
648-
# being automatically applied
649-
650610
def _append_sagemaker_config_tags(self, tags: list, config_path_to_tags: str):
651611
"""Appends tags specified in the sagemaker_config to the given list of tags.
652612
@@ -677,12 +637,11 @@ def _append_sagemaker_config_tags(self, tags: list, config_path_to_tags: str):
677637
# user-provided tags.
678638
all_tags.append(config_tag)
679639

680-
print(
681-
"[Sagemaker Config - applied value]\n",
682-
"config key = {}\n".format(config_path_to_tags),
683-
"config value = {}\n".format(config_tags),
684-
"source value = {}\n".format(tags),
685-
"combined value that will be used = {}\n".format(all_tags),
640+
_log_sagemaker_config_merge(
641+
source_value=tags,
642+
config_value=config_tags,
643+
merged_source_and_config_value=all_tags,
644+
config_key_path=config_path_to_tags,
686645
)
687646

688647
return all_tags

0 commit comments

Comments
 (0)