Skip to content

PYTHON-2820 Test serialization of BSON with embedded null bytes in strings #723

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions test/bson_corpus/document.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@
{
"description": "Invalid subdocument: bad string length in field",
"bson": "1C00000003666F6F001200000002626172000500000062617A000000"
},
{
"description": "Null byte in sub-document key",
"bson": "150000000378000D00000010610000010000000000"
}
]
}
4 changes: 2 additions & 2 deletions test/bson_corpus/regex.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@
],
"decodeErrors": [
{
"description": "embedded null in pattern",
"description": "Null byte in pattern string",
"bson": "0F0000000B610061006300696D0000"
},
{
"description": "embedded null in flags",
"description": "Null byte in flags string",
"bson": "100000000B61006162630069006D0000"
}
]
Expand Down
21 changes: 20 additions & 1 deletion test/bson_corpus/top.json
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@
{
"description": "Document truncated mid-key",
"bson": "1200000002666F"
},
{
"description": "Null byte in document key",
"bson": "0D000000107800000100000000"
}
],
"parseErrors": [
Expand Down Expand Up @@ -241,7 +245,22 @@
{
"description": "Bad DBpointer (extra field)",
"string": "{\"a\": {\"$dbPointer\": {\"a\": {\"$numberInt\": \"1\"}, \"$id\": {\"$oid\": \"56e1fc72e0c917e9c4714161\"}, \"c\": {\"$numberInt\": \"2\"}, \"$ref\": \"b\"}}}"
},
{
"description" : "Null byte in document key",
"string" : "{\"a\\u0000\": 1 }"
},
{
"description" : "Null byte in sub-document key",
"string" : "{\"a\" : {\"b\\u0000\": 1 }}"
},
{
"description": "Null byte in $regularExpression pattern",
"string": "{\"a\" : {\"$regularExpression\" : { \"pattern\": \"b\\u0000\", \"options\" : \"i\"}}}"
},
{
"description": "Null byte in $regularExpression options",
"string": "{\"a\" : {\"$regularExpression\" : { \"pattern\": \"b\", \"options\" : \"i\\u0000\"}}}"
}

]
}
12 changes: 9 additions & 3 deletions test/test_bson_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from bson.codec_options import CodecOptions
from bson.decimal128 import Decimal128
from bson.dbref import DBRef
from bson.errors import InvalidBSON, InvalidId
from bson.errors import InvalidBSON, InvalidDocument, InvalidId
from bson.json_util import JSONMode
from bson.son import SON

Expand All @@ -51,6 +51,8 @@
# This variant of $numberLong may have been generated by an old version
# of mongoexport.
'Bad $numberLong (number, not string)',
# We parse Regex flags with extra characters, including nulls.
'Null byte in $regularExpression options',
Copy link
Member Author

@ShaneHarvey ShaneHarvey Sep 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our Regex type ignores extra characters in "flags":

>>> Regex('', '1234567qwertyasdfvbn\x00')
Regex('', re.DOTALL)

Another example:

>>> json_util.loads('{"a" : {"$regularExpression" : { "pattern": "b", "options" : "ijkhasgdhb\\u0000"}}}')
{'a': Regex('b', re.IGNORECASE|re.DOTALL)}
>>> json_util.dumps(_)
'{"a": {"$regularExpression": {"pattern": "b", "options": "is"}}}'

])

_DEPRECATED_BSON_TYPES = {
Expand Down Expand Up @@ -198,10 +200,14 @@ def run_test(self):
decode_extjson(parse_error_case['string'])
else:
try:
decode_extjson(parse_error_case['string'])
doc = decode_extjson(parse_error_case['string'])
# Null bytes are validated when encoding to BSON.
if 'Null' in description:
to_bson(doc)
raise AssertionError('exception not raised for test '
'case: ' + description)
except (ValueError, KeyError, TypeError, InvalidId):
except (ValueError, KeyError, TypeError, InvalidId,
InvalidDocument):
pass
elif bson_type == '0x05':
try:
Expand Down