Skip to content

change: SDK Defaults - switch from config printing to logging #3872

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions src/sagemaker/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from __future__ import absolute_import

import pathlib
import logging
import os
from typing import List
import boto3
Expand All @@ -29,8 +28,9 @@
from botocore.utils import merge_dicts
from six.moves.urllib.parse import urlparse
from sagemaker.config.config_schema import SAGEMAKER_PYTHON_SDK_CONFIG_SCHEMA
from sagemaker.config.config_utils import get_sagemaker_config_logger

logger = logging.getLogger("sagemaker")
logger = get_sagemaker_config_logger()

_APP_NAME = "sagemaker"
# The default config file location of the Administrator provided config file. This path can be
Expand Down Expand Up @@ -122,6 +122,9 @@ def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource
if config_from_file:
validate_sagemaker_config(config_from_file)
merge_dicts(merged_config, config_from_file)
logger.info("Fetched defaults config from location: %s", file_path)
else:
logger.debug("Fetched defaults config from location: %s, but it was empty", file_path)
return merged_config


Expand All @@ -148,7 +151,7 @@ def _load_config_from_file(file_path: str) -> dict:
f"Unable to load the config file from the location: {file_path}"
f"Provide a valid file path"
)
logger.debug("Fetching config file from the path: %s", file_path)
logger.debug("Fetching defaults config from location: %s", file_path)
return yaml.safe_load(open(inferred_file_path, "r"))


Expand All @@ -164,7 +167,7 @@ def _load_config_from_s3(s3_uri, s3_resource_for_config) -> dict:
)
s3_resource_for_config = boto_session.resource("s3", region_name=boto_region_name)

logger.debug("Fetching config file from the S3 URI: %s", s3_uri)
logger.debug("Fetching defaults config from location: %s", s3_uri)
inferred_s3_uri = _get_inferred_s3_uri(s3_uri, s3_resource_for_config)
parsed_url = urlparse(inferred_s3_uri)
bucket, key_prefix = parsed_url.netloc, parsed_url.path.lstrip("/")
Expand Down
199 changes: 199 additions & 0 deletions src/sagemaker/config/config_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
"""This file contains util functions for the sagemaker Defaults Config.

These utils may be used inside or outside the config module.
"""
from __future__ import absolute_import

import logging
import sys


def get_sagemaker_config_logger():
"""Return a logger with the name 'sagemaker.config'

If the logger to be returned has no level or handlers set, this will get level and handler
attributes. (So if the SDK user has setup loggers in a certain way, that setup will not be
changed by this function.) It is safe to make repeat calls to this function.
"""
sagemaker_config_logger = logging.getLogger("sagemaker.config")
sagemaker_logger = logging.getLogger("sagemaker")

if sagemaker_config_logger.level == logging.NOTSET:
sagemaker_config_logger.setLevel(logging.INFO)

# check sagemaker_logger here as well, so that if handlers were set for the parent logger
# already, we dont change behavior for the child logger
if len(sagemaker_config_logger.handlers) == 0 and len(sagemaker_logger.handlers) == 0:
# use sys.stdout so logs dont show up with a red background in a notebook
handler = logging.StreamHandler(sys.stdout)

formatter = logging.Formatter("%(name)s %(levelname)-4s - %(message)s")
handler.setFormatter(formatter)
sagemaker_config_logger.addHandler(handler)

# if a handler is being added, we dont want the root handler to also process the same events
sagemaker_config_logger.propagate = False

return sagemaker_config_logger


def _log_sagemaker_config_single_substitution(source_value, config_value, config_key_path: str):
"""Informs the SDK user whether a config value was present and automatically substituted

Args:
source_value: The value that will be used if it exists. Usually, this is user-provided
input to a Class or to a session.py method, or None if no input was provided.
config_value: The value fetched from sagemaker_config. If it exists, this is the value that
will be used if direct_input is None.
config_key_path: A string denoting the path of keys that point to the config value in the
sagemaker_config.

Returns:
None. Logs information to the "sagemaker.config" logger.
"""
logger = get_sagemaker_config_logger()

if config_value is not None:

if source_value is None:
# Sagemaker Config value is going to be used. By default the user should know about
# this scenario because the behavior they expect could change because of a new config
# value being injected in.
# However, it may not be safe to log ARNs to stdout by default. We can include more
# diagnostic info if the user enabled DEBUG logs though.
if logger.isEnabledFor(logging.DEBUG):
logger.debug(
"Applied value\n config key = %s\n config value that will be used = %s",
config_key_path,
config_value,
)
else:
logger.info(
"Applied value from config key = %s",
config_key_path,
)

# The cases below here are logged as just debug statements because this info can be useful
# when debugging the config, but should not affect current behavior with/without the config.

elif source_value is not None and config_value == source_value:
# Sagemaker Config had a value defined that is NOT going to be used here.
# Either (1) the config value was already fetched and applied earlier, or
# (2) the user happened to pass in the same value.
logger.debug(
(
"Skipped value\n"
" config key = %s\n"
" config value = %s\n"
" source value that will be used = %s"
),
config_key_path,
config_value,
source_value,
)
elif source_value is not None and config_value != source_value:
# Sagemaker Config had a value defined that is NOT going to be used
# and the config value has not already been applied earlier (we know because the values
# are different).
logger.debug(
(
"Skipped value\n"
" config key = %s\n"
" config value = %s\n"
" source value that will be used = %s",
),
config_key_path,
config_value,
source_value,
)
else:
# nothing was specified in the config and nothing is being automatically applied
logger.debug("Skipped value because no value defined\n config key = %s", config_key_path)


def _log_sagemaker_config_merge(
source_value=None,
config_value=None,
merged_source_and_config_value=None,
config_key_path: str = None,
):
"""Informs the SDK user whether a config value was present and automatically substituted

Args:
source_value: The dict or object that would be used if no default values existed. Usually,
this is user-provided input to a Class or to a session.py method, or None if no input
was provided.
config_value: The dict or object fetched from sagemaker_config. If it exists, this is the
value that will be used if source_value is None.
merged_source_and_config_value: The value that results from the merging of source_value and
original_config_value. This will be the value used.
config_key_path: A string denoting the path of keys that point to the config value in the
sagemaker_config.

Returns:
None. Logs information to the "sagemaker.config" logger.
"""
logger = get_sagemaker_config_logger()

if config_value:

if source_value != merged_source_and_config_value:
# Sagemaker Config value(s) were used and affected the final object/dictionary. By
# default the user should know about this scenario because the behavior they expect
# could change because of new config values being injected in.
# However, it may not be safe to log ARNs to stdout by default. We can include more
# diagnostic info if the user enabled DEBUG logs though.
if logger.isEnabledFor(logging.DEBUG):
logger.debug(
(
"Applied value(s)\n"
" config key = %s\n"
" config value = %s\n"
" source value = %s\n"
" combined value that will be used = %s"
),
config_key_path,
config_value,
source_value,
merged_source_and_config_value,
)
else:
logger.info(
"Applied value(s) from config key = %s",
config_key_path,
)

# The cases below here are logged as just debug statements because this info can be useful
# when debugging the config, but should not affect current behavior with/without the config.

else:
# Sagemaker Config had a value defined that is NOT going to be used here.
# Either (1) the config value was already fetched and applied earlier, or
# (2) the user happened to pass in the same values.
logger.debug(
(
"Skipped value(s)\n"
" config key = %s\n"
" config value = %s\n"
" source value that will be used = %s"
),
config_key_path,
config_value,
merged_source_and_config_value,
)

else:
# nothing was specified in the config and nothing is being automatically applied
logger.debug("Skipped value because no value defined\n config key = %s", config_key_path)
53 changes: 6 additions & 47 deletions src/sagemaker/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
SESSION_DEFAULT_S3_BUCKET_PATH,
SESSION_DEFAULT_S3_OBJECT_KEY_PREFIX_PATH,
)
from sagemaker.config.config_utils import _log_sagemaker_config_merge
from sagemaker.deprecations import deprecated_class
from sagemaker.inputs import ShuffleConfig, TrainingInput, BatchDataCaptureConfig
from sagemaker.user_agent import prepend_user_agent
Expand Down Expand Up @@ -606,47 +607,6 @@ def _create_s3_bucket_if_it_does_not_exist(self, bucket_name, region):
else:
raise

def _print_message_on_sagemaker_config_usage(
self, direct_input, config_value, config_path: str
):
"""Informs the SDK user whether a config value was present and automatically substituted

Args:
direct_input: the value that would be used if no sagemaker_config or default values
existed. Usually this will be user-provided input to a Class or to a
session.py method, or None if no input was provided.
config_value: the value fetched from sagemaker_config. This is usually the value that
will be used if direct_input is None.
config_path: a string denoting the path of keys that point to the config value in the
sagemaker_config

Returns:
No output (just prints information)
"""

if config_value is not None:

if direct_input is not None and config_value != direct_input:
# Sagemaker Config had a value defined that is NOT going to be used
# and the config value has not already been applied earlier
print(
"[Sagemaker Config - skipped value]\n",
"config key = {}\n".format(config_path),
"config value = {}\n".format(config_value),
"specified value that will be used = {}\n".format(direct_input),
)

elif direct_input is None:
# Sagemaker Config value is going to be used
print(
"[Sagemaker Config - applied value]\n",
"config key = {}\n".format(config_path),
"config value that will be used = {}\n".format(config_value),
)

# There is no print statement needed if nothing was specified in the config and nothing is
# being automatically applied

def _append_sagemaker_config_tags(self, tags: list, config_path_to_tags: str):
"""Appends tags specified in the sagemaker_config to the given list of tags.

Expand Down Expand Up @@ -677,12 +637,11 @@ def _append_sagemaker_config_tags(self, tags: list, config_path_to_tags: str):
# user-provided tags.
all_tags.append(config_tag)

print(
"[Sagemaker Config - applied value]\n",
"config key = {}\n".format(config_path_to_tags),
"config value = {}\n".format(config_tags),
"source value = {}\n".format(tags),
"combined value that will be used = {}\n".format(all_tags),
_log_sagemaker_config_merge(
source_value=tags,
config_value=config_tags,
merged_source_and_config_value=all_tags,
config_key_path=config_path_to_tags,
)

return all_tags
Expand Down
Loading