Skip to content

Improved PII stripping. #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 63 additions & 3 deletions sentry_sdk/integrations/pymongo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import absolute_import
import copy

from sentry_sdk import Hub
from sentry_sdk.hub import _should_send_default_pii
Expand All @@ -23,6 +24,66 @@
)


SAFE_COMMAND_ATTRIBUTES = [
"insert",
"ordered",
"find",
"limit",
"singleBatch",
"aggregate",
"createIndexes",
"indexes",
"delete",
"findAndModify",
"renameCollection",
"to",
"drop",
]


def _strip_pii(command):
# type: (Dict[str, Any]) -> Dict[str, Any]
for idx, key in enumerate(command):
is_safe_field = key in SAFE_COMMAND_ATTRIBUTES
if is_safe_field:
# Skip if safe key
continue

update_db_command = key == "update" and "findAndModify" not in command
if update_db_command:
# Also skip "update" db command because it is save.
# There is also an "update" key in the "findAndModify" command, which is NOT safe!
continue

# Special stripping for documents
is_document = key == "documents"
if is_document:
for doc in command[key]:
for doc_key in doc:
doc[doc_key] = "%s"
continue

# Special stripping for dict style fields
is_dict_field = key in ["filter", "query", "update"]
if is_dict_field:
for item_key in command[key]:
command[key][item_key] = "%s"
continue

# For pipeline fields strip the `$match` dict
is_pipeline_field = key == "pipeline"
if is_pipeline_field:
for pipeline in command[key]:
for match_key in pipeline["$match"] if "$match" in pipeline else []:
pipeline["$match"][match_key] = "%s"
continue

# Default stripping
command[key] = "%s"

return command


class CommandTracer(monitoring.CommandListener):
def __init__(self):
# type: () -> None
Expand All @@ -38,7 +99,7 @@ def started(self, event):
if hub.get_integration(PyMongoIntegration) is None:
return
with capture_internal_exceptions():
command = dict(event.command)
command = dict(copy.deepcopy(event.command))

command.pop("$db", None)
command.pop("$clusterTime", None)
Expand Down Expand Up @@ -70,8 +131,7 @@ def started(self, event):
pass

if not _should_send_default_pii():
for key in command:
command[key] = "%s"
command = _strip_pii(command)

query = "{} {}".format(event.command_name, command)
span = hub.start_span(op=op, description=query)
Expand Down
298 changes: 297 additions & 1 deletion tests/integrations/pymongo/test_pymongo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from sentry_sdk import capture_message, start_transaction
from sentry_sdk.integrations.pymongo import PyMongoIntegration
from sentry_sdk.integrations.pymongo import PyMongoIntegration, _strip_pii

from mockupdb import MockupDB, OpQuery
from pymongo import MongoClient
Expand Down Expand Up @@ -123,3 +123,299 @@ def test_breadcrumbs(sentry_init, capture_events, mongo_server, with_pii):
"net.peer.name": mongo_server.host,
"net.peer.port": str(mongo_server.port),
}


@pytest.mark.parametrize(
"testcase",
[
{
"command": {
"insert": "my_collection",
"ordered": True,
"documents": [
{
"username": "anton2",
"email": "[email protected]",
"password": "c4e86722fb56d946f7ddeecdae47e1c4458bf98a0a3ee5d5113111adf7bf0175",
"_id": "635bc7403cb4f8a736f61cf2",
}
],
},
"command_stripped": {
"insert": "my_collection",
"ordered": True,
"documents": [
{"username": "%s", "email": "%s", "password": "%s", "_id": "%s"}
],
},
},
{
"command": {
"insert": "my_collection",
"ordered": True,
"documents": [
{
"username": "indiana4",
"email": "[email protected]",
"password": "63e86722fb56d946f7ddeecdae47e1c4458bf98a0a3ee5d5113111adf7bf016b",
"_id": "635bc7403cb4f8a736f61cf3",
}
],
},
"command_stripped": {
"insert": "my_collection",
"ordered": True,
"documents": [
{"username": "%s", "email": "%s", "password": "%s", "_id": "%s"}
],
},
},
{
"command": {
"find": "my_collection",
"filter": {},
"limit": 1,
"singleBatch": True,
},
"command_stripped": {
"find": "my_collection",
"filter": {},
"limit": 1,
"singleBatch": True,
},
},
{
"command": {
"find": "my_collection",
"filter": {"username": "notthere"},
"limit": 1,
"singleBatch": True,
},
"command_stripped": {
"find": "my_collection",
"filter": {"username": "%s"},
"limit": 1,
"singleBatch": True,
},
},
{
"command": {
"insert": "my_collection",
"ordered": True,
"documents": [
{
"username": "userx1",
"email": "[email protected]",
"password": "ccc86722fb56d946f7ddeecdae47e1c4458bf98a0a3ee5d5113111adf7bf0175",
"_id": "635bc7403cb4f8a736f61cf4",
},
{
"username": "userx2",
"email": "[email protected]",
"password": "xxx86722fb56d946f7ddeecdae47e1c4458bf98a0a3ee5d5113111adf7bf0175",
"_id": "635bc7403cb4f8a736f61cf5",
},
],
},
"command_stripped": {
"insert": "my_collection",
"ordered": True,
"documents": [
{"username": "%s", "email": "%s", "password": "%s", "_id": "%s"},
{"username": "%s", "email": "%s", "password": "%s", "_id": "%s"},
],
},
},
{
"command": {
"find": "my_collection",
"filter": {"email": "[email protected]"},
},
"command_stripped": {"find": "my_collection", "filter": {"email": "%s"}},
},
{
"command": {
"aggregate": "my_collection",
"pipeline": [{"$match": {}}, {"$group": {"_id": 1, "n": {"$sum": 1}}}],
"cursor": {},
},
"command_stripped": {
"aggregate": "my_collection",
"pipeline": [{"$match": {}}, {"$group": {"_id": 1, "n": {"$sum": 1}}}],
"cursor": "%s",
},
},
{
"command": {
"aggregate": "my_collection",
"pipeline": [
{"$match": {"email": "[email protected]"}},
{"$group": {"_id": 1, "n": {"$sum": 1}}},
],
"cursor": {},
},
"command_stripped": {
"aggregate": "my_collection",
"pipeline": [
{"$match": {"email": "%s"}},
{"$group": {"_id": 1, "n": {"$sum": 1}}},
],
"cursor": "%s",
},
},
{
"command": {
"createIndexes": "my_collection",
"indexes": [{"name": "username_1", "key": [("username", 1)]}],
},
"command_stripped": {
"createIndexes": "my_collection",
"indexes": [{"name": "username_1", "key": [("username", 1)]}],
},
},
{
"command": {
"update": "my_collection",
"ordered": True,
"updates": [
("q", {"email": "[email protected]"}),
(
"u",
{
"email": "[email protected]",
"extra_field": "extra_content",
"new": "bla",
},
),
("multi", False),
("upsert", False),
],
},
"command_stripped": {
"update": "my_collection",
"ordered": True,
"updates": "%s",
},
},
{
"command": {
"update": "my_collection",
"ordered": True,
"updates": [
("q", {"email": "[email protected]"}),
("u", {"$rename": {"new": "new_field"}}),
("multi", False),
("upsert", False),
],
},
"command_stripped": {
"update": "my_collection",
"ordered": True,
"updates": "%s",
},
},
{
"command": {
"update": "my_collection",
"ordered": True,
"updates": [
("q", {"email": "[email protected]"}),
("u", {"$rename": {"password": "pwd"}}),
("multi", True),
("upsert", False),
],
},
"command_stripped": {
"update": "my_collection",
"ordered": True,
"updates": "%s",
},
},
{
"command": {
"delete": "my_collection",
"ordered": True,
"deletes": [("q", {"username": "userx2"}), ("limit", 1)],
},
"command_stripped": {
"delete": "my_collection",
"ordered": True,
"deletes": "%s",
},
},
{
"command": {
"delete": "my_collection",
"ordered": True,
"deletes": [("q", {"email": "[email protected]"}), ("limit", 0)],
},
"command_stripped": {
"delete": "my_collection",
"ordered": True,
"deletes": "%s",
},
},
{
"command": {
"findAndModify": "my_collection",
"query": {"email": "[email protected]"},
"new": False,
"remove": True,
},
"command_stripped": {
"findAndModify": "my_collection",
"query": {"email": "%s"},
"new": "%s",
"remove": "%s",
},
},
{
"command": {
"findAndModify": "my_collection",
"query": {"email": "[email protected]"},
"new": False,
"update": {"email": "[email protected]", "extra_field": "xxx"},
"upsert": False,
},
"command_stripped": {
"findAndModify": "my_collection",
"query": {"email": "%s"},
"new": "%s",
"update": {"email": "%s", "extra_field": "%s"},
"upsert": "%s",
},
},
{
"command": {
"findAndModify": "my_collection",
"query": {"email": "[email protected]"},
"new": False,
"update": {"$rename": {"extra_field": "extra_field2"}},
"upsert": False,
},
"command_stripped": {
"findAndModify": "my_collection",
"query": {"email": "%s"},
"new": "%s",
"update": {"$rename": "%s"},
"upsert": "%s",
},
},
{
"command": {
"renameCollection": "test.my_collection",
"to": "test.new_collection",
},
"command_stripped": {
"renameCollection": "test.my_collection",
"to": "test.new_collection",
},
},
{
"command": {"drop": "new_collection"},
"command_stripped": {"drop": "new_collection"},
},
],
)
def test_strip_pii(testcase):
assert _strip_pii(testcase["command"]) == testcase["command_stripped"]