Skip to content

Commit ed043dc

Browse files
pyrookapadamstx
authored andcommitted
feat: redact secrets in debug logging (#209)
Signed-off-by: Norbert Biczo <[email protected]>
1 parent 67b126c commit ed043dc

File tree

7 files changed

+243
-50
lines changed

7 files changed

+243
-50
lines changed

ibm_cloud_sdk_core/base_service.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import logging
2020
import json as json_import
2121
from http.cookiejar import CookieJar
22-
from http.client import HTTPConnection
22+
from http import client
2323
from os.path import basename
2424
from typing import Dict, List, Optional, Tuple, Union
2525
from urllib3.util.retry import Retry
@@ -43,7 +43,10 @@
4343
GzipStream,
4444
)
4545
from .private_helpers import _build_user_agent
46-
from .logger import get_logger
46+
from .logger import (
47+
get_logger,
48+
LoggingFilter,
49+
)
4750

4851
logger = get_logger()
4952

@@ -89,7 +92,7 @@ def __init__(
8992
self,
9093
*,
9194
service_url: str = None,
92-
authenticator: Authenticator = None,
95+
authenticator: Optional[Authenticator] = None,
9396
disable_ssl_verification: bool = False,
9497
enable_gzip_compression: bool = False,
9598
) -> None:
@@ -113,7 +116,10 @@ def __init__(
113116
self.http_client.mount('https://', self.http_adapter)
114117
# If debug logging is requested, then trigger HTTP message logging as well.
115118
if logger.isEnabledFor(logging.DEBUG):
116-
HTTPConnection.debuglevel = 1
119+
client.HTTPConnection.debuglevel = 1
120+
# Replace the `print` function in the HTTPClient module to
121+
# use the debug logger instead of the bare Python print.
122+
client.print = lambda *args: logger.debug(LoggingFilter.filter_message(" ".join(args)))
117123

118124
def enable_retries(self, max_retries: int = 4, retry_interval: float = 30.0) -> None:
119125
"""Enable automatic retries on the underlying http client used by the BaseService instance.

ibm_cloud_sdk_core/logger.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,71 @@
1515
# limitations under the License.
1616

1717
import logging
18+
import re
19+
1820

1921
# This is the name of the primary logger used by the library.
2022
LOGGER_NAME = 'ibm-cloud-sdk-core'
23+
# Keywords that are redacted.
24+
REDACTED_KEYWORDS = [
25+
"apikey",
26+
"api_key",
27+
"passcode",
28+
"password",
29+
"token",
30+
"aadClientId",
31+
"aadClientSecret",
32+
"auth",
33+
"auth_provider_x509_cert_url",
34+
"auth_uri",
35+
"client_email",
36+
"client_id",
37+
"client_x509_cert_url",
38+
"key",
39+
"project_id",
40+
"secret",
41+
"subscriptionId",
42+
"tenantId",
43+
"thumbprint",
44+
"token_uri",
45+
]
46+
47+
48+
class LoggingFilter:
49+
"""Functions used to filter messages before they are logged."""
50+
51+
redacted_tokens = "|".join(REDACTED_KEYWORDS)
52+
auth_header_pattern = re.compile(r"(?m)(Authorization|X-Auth\S*): ((.*?)(\r\n.*)|(.*))")
53+
property_settings_pattern = re.compile(r"(?i)(" + redacted_tokens + r")=[^&]*(&|$)")
54+
json_field_pattern = re.compile(r'(?i)"([^"]*(' + redacted_tokens + r')[^"_]*)":\s*"[^\,]*"')
55+
56+
@classmethod
57+
def redact_secrets(cls, text: str) -> str:
58+
"""Replaces values of potential secret keywords with a placeholder value.
59+
Args:
60+
text (str): the string to check and process
61+
62+
Returns:
63+
str: the safe, redacted string with all secrets masked out
64+
"""
65+
66+
placeholder = "[redacted]"
67+
redacted = cls.auth_header_pattern.sub(r"\1: " + placeholder + r"\4", text)
68+
redacted = cls.property_settings_pattern.sub(r"\1=" + placeholder + r"\2", redacted)
69+
redacted = cls.json_field_pattern.sub(r'"\1":"' + placeholder + r'"', redacted)
70+
return redacted
71+
72+
@classmethod
73+
def filter_message(cls, s: str) -> str:
74+
"""Filters 's' prior to logging it as a debug message"""
75+
# Redact secrets
76+
s = LoggingFilter.redact_secrets(s)
77+
78+
# Replace CRLF characters with an actual newline to make the message more readable.
79+
s = s.replace('\\r\\n', '\n')
80+
return s
2181

2282

23-
def get_logger():
83+
def get_logger() -> logging.Logger:
2484
"""Returns the primary logger object instance used by the library."""
2585
return logging.getLogger(LOGGER_NAME)

test/test_base_service.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,6 @@ def test_reserved_keys(caplog):
851851
assert response.get_result().request.url == 'https://gateway.watsonplatform.net/test/api'
852852
assert response.get_result().request.method == 'GET'
853853
assert response.get_result().request.hooks == {'response': []}
854-
print('caplog.record_tuples:', caplog.record_tuples)
855854
assert caplog.record_tuples[0][2] == '"method" has been removed from the request'
856855
assert caplog.record_tuples[1][2] == '"url" has been removed from the request'
857856
assert caplog.record_tuples[2][2] == '"cookies" has been removed from the request'

test/test_http_adapter.py

Lines changed: 5 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
11
# pylint: disable=missing-docstring
22
import logging
33
import os
4-
import threading
5-
import warnings
6-
from http.server import HTTPServer, SimpleHTTPRequestHandler
7-
from ssl import SSLContext, PROTOCOL_TLSv1_1, PROTOCOL_TLSv1_2
8-
from typing import Callable
4+
from ssl import PROTOCOL_TLSv1_1, PROTOCOL_TLSv1_2
95

106
import pytest
11-
import urllib3
127
from requests import exceptions
138

149
from ibm_cloud_sdk_core.base_service import BaseService
1510
from ibm_cloud_sdk_core.authenticators import NoAuthAuthenticator
1611
from .utils.logger_utils import setup_test_logger
12+
from .utils.http_utils import local_server
13+
1714

1815
setup_test_logger(logging.ERROR)
1916

@@ -33,40 +30,7 @@
3330
key = os.path.join(os.path.dirname(__file__), '../resources/test_ssl.key')
3431

3532

36-
def _local_server(tls_version: int, port: int) -> Callable:
37-
def decorator(test_function: Callable) -> Callable:
38-
def inner():
39-
# Disable warnings caused by the self-signed certificate.
40-
urllib3.disable_warnings()
41-
42-
# Build the SSL context for the server.
43-
ssl_context = SSLContext(tls_version)
44-
ssl_context.load_cert_chain(certfile=cert, keyfile=key)
45-
46-
# Create and start the server on a separate thread.
47-
server = HTTPServer(('localhost', port), SimpleHTTPRequestHandler)
48-
server.socket = ssl_context.wrap_socket(server.socket, server_side=True)
49-
t = threading.Thread(target=server.serve_forever)
50-
t.start()
51-
52-
# We run everything in a big try-except-finally block to make sure we always
53-
# shutdown the HTTP server gracefully.
54-
try:
55-
test_function()
56-
except Exception: # pylint: disable=try-except-raise
57-
raise
58-
finally:
59-
server.shutdown()
60-
t.join()
61-
# Re-enable warnings.
62-
warnings.resetwarnings()
63-
64-
return inner
65-
66-
return decorator
67-
68-
69-
@_local_server(PROTOCOL_TLSv1_1, 3333)
33+
@local_server(3333, PROTOCOL_TLSv1_1, cert, key)
7034
def test_tls_v1_1():
7135
service = BaseService(service_url='https://localhost:3333', authenticator=NoAuthAuthenticator())
7236
prepped = service.prepare_request('GET', url='/')
@@ -78,7 +42,7 @@ def test_tls_v1_1():
7842
assert exception.type is exceptions.SSLError or exception.type is exceptions.ConnectionError
7943

8044

81-
@_local_server(PROTOCOL_TLSv1_2, 3334)
45+
@local_server(3334, PROTOCOL_TLSv1_2, cert, key)
8246
def test_tls_v1_2():
8347
service = BaseService(service_url='https://localhost:3334', authenticator=NoAuthAuthenticator())
8448

test/test_logger.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# coding: utf-8
2+
3+
# Copyright 2024 IBM All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# pylint: disable=missing-docstring
18+
19+
import logging
20+
21+
from ibm_cloud_sdk_core import base_service
22+
from ibm_cloud_sdk_core.authenticators import NoAuthAuthenticator
23+
from ibm_cloud_sdk_core.logger import LoggingFilter
24+
from .utils.http_utils import local_server
25+
26+
27+
def test_redact_secrets():
28+
redact_secrets = LoggingFilter.redact_secrets
29+
30+
assert "secret" not in redact_secrets("Authorization: Bearer secret")
31+
assert "secret" not in redact_secrets("Authorization: Basic secret")
32+
assert "secret" not in redact_secrets("X-Authorization: secret")
33+
assert "secret" not in redact_secrets("ApIKey=secret")
34+
assert "secret" not in redact_secrets("ApI_Key=secret")
35+
assert "secret" not in redact_secrets("passCode=secret")
36+
assert "secret" not in redact_secrets("PASSword=secret")
37+
assert "secret" not in redact_secrets("toKen=secret")
38+
assert "secret" not in redact_secrets("client_id=secret")
39+
assert "secret" not in redact_secrets("client_x509_cert_url=secret")
40+
assert "secret" not in redact_secrets("client_id=secret")
41+
assert "secret" not in redact_secrets("key=secret")
42+
assert "secret" not in redact_secrets("project_id=secret")
43+
assert "DaSecret" not in redact_secrets("secret=DaSecret")
44+
assert "secret" not in redact_secrets("subscriptionId=secret")
45+
assert "secret" not in redact_secrets("tenantId=secret")
46+
assert "secret" not in redact_secrets("thumbprint=secret")
47+
assert "secret" not in redact_secrets("token_uri=secret")
48+
assert "secret" not in redact_secrets('xxx "apIKEy": "secret",xxx')
49+
assert "secret" not in redact_secrets('xxx "apI_KEy": "secret",xxx')
50+
assert "secret" not in redact_secrets('xxx "pAsSCoDe": "secret",xxx')
51+
assert "secret" not in redact_secrets('xxx "passWORD": "secret",xxx')
52+
assert "secret" not in redact_secrets('xxx {"token": "secret"},xxx')
53+
assert "secret" not in redact_secrets('xxx "aadClientId": "secret",xxx')
54+
assert "secret" not in redact_secrets('xxx "aadClientSecret": "secret",xxx')
55+
assert "secret" not in redact_secrets('xxx "auth": "secret",xxx')
56+
assert "secret" not in redact_secrets('xxx "auth_provider_x509_cert_url": "secret",xxx')
57+
assert "secret" not in redact_secrets('xxx "auth_uri": "secret",xxx')
58+
assert "secret" not in redact_secrets('xxx "client_email": "secret",xxx')
59+
# Now use a real-world example, to validate the correct behavior.
60+
assert (
61+
redact_secrets(
62+
'GET / HTTP/1.1\r\nHost: localhost:3335\r\n'
63+
+ 'User-Agent: ibm-python-sdk-core-3.20.6 os.name=Darwin os.version=23.6.0 python.version=3.11.10\r\n'
64+
+ 'Accept-Encoding: gzip, deflate\r\nAccept: */*\r\nAuthorization: token-foo-bar-123\r\n'
65+
+ 'Connection: keep-alive\r\n\r\n'
66+
)
67+
== 'GET / HTTP/1.1\r\nHost: localhost:3335\r\n'
68+
+ 'User-Agent: ibm-python-sdk-core-3.20.6 os.name=Darwin os.version=23.6.0 python.version=3.11.10\r\n'
69+
+ 'Accept-Encoding: gzip, deflate\r\nAccept: */*\r\nAuthorization: [redacted]\r\nConnection: keep-alive\r\n\r\n'
70+
)
71+
72+
73+
# Simulate a real-world scenario.
74+
@local_server(3335)
75+
def test_redact_secrets_log(caplog):
76+
# Since we use a real BaseService here, we need to set the logging level
77+
# to DEBUG in its module, to simulate the real behavior.
78+
original_logging_level = base_service.logger.level
79+
base_service.logger.setLevel(logging.DEBUG)
80+
81+
try:
82+
service = base_service.BaseService(service_url="http://localhost:3335", authenticator=NoAuthAuthenticator())
83+
prepped = service.prepare_request('GET', url='/', headers={'Authorization': 'token-foo-bar-123'})
84+
res = service.send(prepped)
85+
except Exception as ex:
86+
raise ex
87+
finally:
88+
# And now we restore the logger's level to the original value.
89+
base_service.logger.setLevel(original_logging_level)
90+
91+
assert res is not None
92+
# Make sure the request has been logged and the token is redacted.
93+
assert "Authorization" in caplog.text
94+
assert "token" not in caplog.text

test/utils/http_utils.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# coding: utf-8
2+
3+
# Copyright 2024 IBM All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import functools
18+
import threading
19+
import warnings
20+
from typing import Callable, Optional
21+
from http.server import HTTPServer, SimpleHTTPRequestHandler
22+
from ssl import SSLContext
23+
24+
import urllib3
25+
26+
27+
def local_server(
28+
port: int, tls_version: Optional[int] = None, cert: Optional[str] = None, key: Optional[str] = None
29+
) -> Callable:
30+
"""local_server helps setting up and running an HTTP(S) server for testing purposes."""
31+
32+
def decorator(test_function: Callable) -> Callable:
33+
@functools.wraps(test_function)
34+
def inner(*args, **kwargs):
35+
is_https = tls_version and cert and key
36+
# Disable warnings caused by the self-signed certificate.
37+
urllib3.disable_warnings()
38+
39+
if is_https:
40+
# Build the SSL context for the server.
41+
ssl_context = SSLContext(tls_version)
42+
ssl_context.load_cert_chain(certfile=cert, keyfile=key)
43+
44+
# Create and start the server on a separate thread.
45+
server = HTTPServer(('localhost', port), SimpleHTTPRequestHandler)
46+
if is_https:
47+
server.socket = ssl_context.wrap_socket(server.socket, server_side=True)
48+
49+
t = threading.Thread(target=server.serve_forever)
50+
t.start()
51+
52+
# We run everything in a big try-except-finally block to make sure we always
53+
# shutdown the HTTP server gracefully.
54+
try:
55+
test_function(*args, **kwargs)
56+
except Exception: # pylint: disable=try-except-raise
57+
raise
58+
finally:
59+
server.shutdown()
60+
t.join()
61+
# Re-enable warnings.
62+
warnings.resetwarnings()
63+
64+
return inner
65+
66+
return decorator

test/utils/logger_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
# limitations under the License.
1616

1717
import logging
18-
from http.client import HTTPConnection
19-
from ibm_cloud_sdk_core.logger import get_logger
18+
from http import client
19+
from ibm_cloud_sdk_core.logger import (
20+
get_logger,
21+
LoggingFilter,
22+
)
2023

2124

2225
def setup_test_logger(level: int):
@@ -27,4 +30,5 @@ def setup_test_logger(level: int):
2730

2831
# If debug logging is requested, then trigger HTTP message logging as well.
2932
if logger.isEnabledFor(logging.DEBUG):
30-
HTTPConnection.debuglevel = 1
33+
client.HTTPConnection.debuglevel = 1
34+
client.print = lambda *args: logger.debug(LoggingFilter.filter_message(" ".join(args)))

0 commit comments

Comments
 (0)