Skip to content

PYTHON-2680 Breaking changes to DBRef BSON+JSON decoding #722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 10, 2021
Merged
5 changes: 4 additions & 1 deletion bson/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,10 @@ def _get_object(data, view, position, obj_end, opts, dummy):
obj = _elements_to_dict(data, view, position + 4, end, opts)

position += obj_size
if "$ref" in obj:
# If DBRef validation fails, return a normal doc.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the breaking DBRef BSON decoding change.

if (isinstance(obj.get('$ref'), str) and
"$id" in obj and
isinstance(obj.get('$db'), (str, type(None)))):
return (DBRef(obj.pop("$ref"), obj.pop("$id", None),
obj.pop("$db", None), obj), position)
return obj, position
Expand Down
119 changes: 69 additions & 50 deletions bson/_cbsonmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1506,6 +1506,71 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
return result;
}

/*
* Hook for optional decoding BSON documents to DBRef.
*/
static PyObject *_dbref_hook(PyObject* self, PyObject* value) {
struct module_state *state = GETSTATE(self);
PyObject* dbref = NULL;
PyObject* dbref_type = NULL;
PyObject* ref = NULL;
PyObject* id = NULL;
PyObject* database = NULL;
PyObject* ret = NULL;
int db_present = 0;

/* Decoding for DBRefs */
if (PyMapping_HasKeyString(value, "$ref") && PyMapping_HasKeyString(value, "$id")) { /* DBRef */
ref = PyMapping_GetItemString(value, "$ref");
/* PyMapping_GetItemString returns NULL to indicate error. */
if (!ref) {
goto invalid;
}
id = PyMapping_GetItemString(value, "$id");
/* PyMapping_GetItemString returns NULL to indicate error. */
if (!id) {
goto invalid;
}

if (PyMapping_HasKeyString(value, "$db")) {
database = PyMapping_GetItemString(value, "$db");
if (!database) {
goto invalid;
}
db_present = 1;
} else {
database = Py_None;
Py_INCREF(database);
}

// check types
if (!(PyUnicode_Check(ref) && (database == Py_None || PyUnicode_Check(database)))) {
ret = value;
goto invalid;
}

PyMapping_DelItemString(value, "$ref");
PyMapping_DelItemString(value, "$id");
if (db_present) {
PyMapping_DelItemString(value, "$db");
}

if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) {
dbref = PyObject_CallFunctionObjArgs(dbref_type, ref, id, database, value, NULL);
Py_DECREF(value);
ret = dbref;
}
} else {
ret = value;
}
invalid:
Py_XDECREF(dbref_type);
Py_XDECREF(ref);
Py_XDECREF(id);
Py_XDECREF(database);
return ret;
}

static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
unsigned* position, unsigned char type,
unsigned max, const codec_options_t* options) {
Expand Down Expand Up @@ -1552,7 +1617,6 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
}
case 3:
{
PyObject* collection;
uint32_t size;

if (max < 4) {
Expand Down Expand Up @@ -1585,55 +1649,10 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
goto invalid;
}

/* Decoding for DBRefs */
if (PyMapping_HasKeyString(value, "$ref")) { /* DBRef */
PyObject* dbref = NULL;
PyObject* dbref_type;
PyObject* id;
PyObject* database;

collection = PyMapping_GetItemString(value, "$ref");
/* PyMapping_GetItemString returns NULL to indicate error. */
if (!collection) {
goto invalid;
}
PyMapping_DelItemString(value, "$ref");

if (PyMapping_HasKeyString(value, "$id")) {
id = PyMapping_GetItemString(value, "$id");
if (!id) {
Py_DECREF(collection);
goto invalid;
}
PyMapping_DelItemString(value, "$id");
} else {
id = Py_None;
Py_INCREF(id);
}

if (PyMapping_HasKeyString(value, "$db")) {
database = PyMapping_GetItemString(value, "$db");
if (!database) {
Py_DECREF(collection);
Py_DECREF(id);
goto invalid;
}
PyMapping_DelItemString(value, "$db");
} else {
database = Py_None;
Py_INCREF(database);
}

if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) {
dbref = PyObject_CallFunctionObjArgs(dbref_type, collection, id, database, value, NULL);
Py_DECREF(dbref_type);
}
Py_DECREF(value);
value = dbref;

Py_DECREF(id);
Py_DECREF(collection);
Py_DECREF(database);
/* Hook for DBRefs */
value = _dbref_hook(self, value);
if (!value) {
goto invalid;
}

*position += size;
Expand Down
18 changes: 9 additions & 9 deletions bson/json_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@
"x": re.X,
}

# Dollar-prefixed keys which may appear in DBRefs.
_DBREF_KEYS = frozenset(['$id', '$ref', '$db'])


class DatetimeRepresentation:
LEGACY = 0
Expand Down Expand Up @@ -463,7 +460,9 @@ def object_pairs_hook(pairs, json_options=DEFAULT_JSON_OPTIONS):
def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
if "$oid" in dct:
return _parse_canonical_oid(dct)
if "$ref" in dct:
if (isinstance(dct.get('$ref'), str) and
"$id" in dct and
isinstance(dct.get('$db'), (str, type(None)))):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the breaking DBRef JSON decoding change.

return _parse_canonical_dbref(dct)
if "$date" in dct:
return _parse_canonical_datetime(dct, json_options)
Expand Down Expand Up @@ -666,15 +665,16 @@ def _parse_canonical_regex(doc):
if len(regex) != 2:
raise TypeError('Bad $regularExpression must include only "pattern"'
'and "options" components: %s' % (doc,))
return Regex(regex['pattern'], regex['options'])

# Validate options, defer to Regex for pattern validation.
options = regex['options']
if not isinstance(options, str):
raise TypeError("pattern must be a string, not %s" % type(options))
return Regex(regex['pattern'], options)


def _parse_canonical_dbref(doc):
"""Decode a JSON DBRef to bson.dbref.DBRef."""
for key in doc:
if key.startswith('$') and key not in _DBREF_KEYS:
# Other keys start with $, so dct cannot be parsed as a DBRef.
return doc
return DBRef(doc.pop('$ref'), doc.pop('$id'),
database=doc.pop('$db', None), **doc)

Expand Down
9 changes: 9 additions & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@ Breaking Changes in 4.0
- Changed the default JSON encoding representation from legacy to relaxed.
The json_mode parameter for :const:`bson.json_util.dumps` now defaults to
:const:`~bson.json_util.RELAXED_JSON_OPTIONS`.
- Changed the BSON and JSON decoding behavior of :class:`~bson.dbref.DBRef`
to match the behavior outlined in the `DBRef specification`_ version 1.0.
Specifically, PyMongo now only decodes a subdocument into a
:class:`~bson.dbref.DBRef` if and only if, it contains both ``$ref`` and
``$id`` fields and the ``$ref``, ``$id``, and ``$db`` fields are of the
correct type. Otherwise the document is returned as normal. Previously, any
subdocument containing a ``$ref`` field would be decoded as a
:class:`~bson.dbref.DBRef`.
- The "tls" install extra is no longer necessary or supported and will be
ignored by pip.
- The ``hint`` option is now required when using ``min`` or ``max`` queries
Expand All @@ -123,6 +131,7 @@ See the `PyMongo 4.0 release notes in JIRA`_ for the list of resolved issues
in this release.

.. _PyMongo 4.0 release notes in JIRA: https://jira.mongodb.org/secure/ReleaseNote.jspa?projectId=10004&version=18463
.. _DBRef specification: https://github.com/mongodb/specifications/blob/5a8c8d7/source/dbref.rst

Changes in Version 3.12.0
-------------------------
Expand Down
13 changes: 13 additions & 0 deletions doc/migrate-to-pymongo4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -752,3 +752,16 @@ Name is a required argument for pymongo.driver_info.DriverInfo

``name`` is now a required argument for the :class:`pymongo.driver_info.DriverInfo` class.

DBRef BSON/JSON decoding behavior
.................................

Changed the BSON and JSON decoding behavior of :class:`~bson.dbref.DBRef`
to match the behavior outlined in the `DBRef specification`_ version 1.0.
Specifically, PyMongo now only decodes a subdocument into a
:class:`~bson.dbref.DBRef` if and only if, it contains both ``$ref`` and
``$id`` fields and the ``$ref``, ``$id``, and ``$db`` fields are of the
correct type. Otherwise the document is returned as normal. Previously, any
subdocument containing a ``$ref`` field would be decoded as a
:class:`~bson.dbref.DBRef`.

.. _DBRef specification: https://github.com/mongodb/specifications/blob/5a8c8d7/source/dbref.rst
19 changes: 18 additions & 1 deletion test/bson_corpus/binary.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
"canonical_bson": "1D000000057800100000000573FFD26444B34C6990E8E7D1DFC035D400",
"canonical_extjson": "{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"05\"}}}"
},
{
"description": "subtype 0x07",
"canonical_bson": "1D000000057800100000000773FFD26444B34C6990E8E7D1DFC035D400",
"canonical_extjson": "{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"07\"}}}"
},
{
"description": "subtype 0x80",
"canonical_bson": "0F0000000578000200000080FFFF00",
Expand Down Expand Up @@ -94,8 +99,20 @@
"string": "{\"x\" : { \"$uuid\" : { \"data\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}}"
},
{
"description": "$uuid invalid value",
"description": "$uuid invalid value--too short",
"string": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-90e8-e7d1dfc035d4\"}}"
},
{
"description": "$uuid invalid value--too long",
"string": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4-789e4\"}}"
},
{
"description": "$uuid invalid value--misplaced hyphens",
"string": "{\"x\" : { \"$uuid\" : \"73ff-d26444b-34c6-990e8e-7d1dfc035d4\"}}"
},
{
"description": "$uuid invalid value--too many hyphens",
"string": "{\"x\" : { \"$uuid\" : \"----d264-44b3-4--9-90e8-e7d1dfc0----\"}}"
}
]
}
26 changes: 13 additions & 13 deletions test/bson_corpus/code.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,48 +20,48 @@
},
{
"description": "two-byte UTF-8 (\u00e9)",
"canonical_bson": "190000000261000D000000C3A9C3A9C3A9C3A9C3A9C3A90000",
"canonical_extjson": "{\"a\" : \"\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\"}"
"canonical_bson": "190000000D61000D000000C3A9C3A9C3A9C3A9C3A9C3A90000",
"canonical_extjson": "{\"a\" : {\"$code\" : \"\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\"}}"
},
{
"description": "three-byte UTF-8 (\u2606)",
"canonical_bson": "190000000261000D000000E29886E29886E29886E298860000",
"canonical_extjson": "{\"a\" : \"\\u2606\\u2606\\u2606\\u2606\"}"
"canonical_bson": "190000000D61000D000000E29886E29886E29886E298860000",
"canonical_extjson": "{\"a\" : {\"$code\" : \"\\u2606\\u2606\\u2606\\u2606\"}}"
},
{
"description": "Embedded nulls",
"canonical_bson": "190000000261000D0000006162006261620062616261620000",
"canonical_extjson": "{\"a\" : \"ab\\u0000bab\\u0000babab\"}"
"canonical_bson": "190000000D61000D0000006162006261620062616261620000",
"canonical_extjson": "{\"a\" : {\"$code\" : \"ab\\u0000bab\\u0000babab\"}}"
}
],
"decodeErrors": [
{
"description": "bad code string length: 0 (but no 0x00 either)",
"bson": "0C0000000261000000000000"
"bson": "0C0000000D61000000000000"
},
{
"description": "bad code string length: -1",
"bson": "0C000000026100FFFFFFFF00"
"bson": "0C0000000D6100FFFFFFFF00"
},
{
"description": "bad code string length: eats terminator",
"bson": "10000000026100050000006200620000"
"bson": "100000000D6100050000006200620000"
},
{
"description": "bad code string length: longer than rest of document",
"bson": "120000000200FFFFFF00666F6F6261720000"
"bson": "120000000D00FFFFFF00666F6F6261720000"
},
{
"description": "code string is not null-terminated",
"bson": "1000000002610004000000616263FF00"
"bson": "100000000D610004000000616263FF00"
},
{
"description": "empty code string, but extra null",
"bson": "0E00000002610001000000000000"
"bson": "0E0000000D610001000000000000"
},
{
"description": "invalid UTF-8",
"bson": "0E00000002610002000000E90000"
"bson": "0E0000000D610002000000E90000"
}
]
}
22 changes: 21 additions & 1 deletion test/bson_corpus/dbref.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description": "DBRef",
"description": "Document type (DBRef sub-documents)",
"bson_type": "0x03",
"valid": [
{
Expand All @@ -26,6 +26,26 @@
"description": "Document with key names similar to those of a DBRef",
"canonical_bson": "3e0000000224726566000c0000006e6f742d612d646272656600072469640058921b3e6e32ab156a22b59e022462616e616e6100050000007065656c0000",
"canonical_extjson": "{\"$ref\": \"not-a-dbref\", \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}, \"$banana\": \"peel\"}"
},
{
"description": "DBRef with additional dollar-prefixed and dotted fields",
"canonical_bson": "48000000036462726566003c0000000224726566000b000000636f6c6c656374696f6e00072469640058921b3e6e32ab156a22b59e10612e62000100000010246300010000000000",
"canonical_extjson": "{\"dbref\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}, \"a.b\": {\"$numberInt\": \"1\"}, \"$c\": {\"$numberInt\": \"1\"}}}"
},
{
"description": "Sub-document resembles DBRef but $id is missing",
"canonical_bson": "26000000036462726566001a0000000224726566000b000000636f6c6c656374696f6e000000",
"canonical_extjson": "{\"dbref\": {\"$ref\": \"collection\"}}"
},
{
"description": "Sub-document resembles DBRef but $ref is not a string",
"canonical_bson": "2c000000036462726566002000000010247265660001000000072469640058921b3e6e32ab156a22b59e0000",
"canonical_extjson": "{\"dbref\": {\"$ref\": {\"$numberInt\": \"1\"}, \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}}}"
},
{
"description": "Sub-document resembles DBRef but $db is not a string",
"canonical_bson": "4000000003646272656600340000000224726566000b000000636f6c6c656374696f6e00072469640058921b3e6e32ab156a22b59e1024646200010000000000",
"canonical_extjson": "{\"dbref\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}, \"$db\": {\"$numberInt\": \"1\"}}}"
}
]
}
4 changes: 2 additions & 2 deletions test/bson_corpus/double.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@
{
"description": "1.2345678921232E+18",
"canonical_bson": "100000000164002a1bf5f41022b14300",
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"1.2345678921232e+18\"}}",
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"1.2345678921232E+18\"}}",
"relaxed_extjson": "{\"d\" : 1.2345678921232E+18}"
},
{
"description": "-1.2345678921232E+18",
"canonical_bson": "100000000164002a1bf5f41022b1c300",
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"-1.2345678921232e+18\"}}",
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"-1.2345678921232E+18\"}}",
"relaxed_extjson": "{\"d\" : -1.2345678921232E+18}"
},
{
Expand Down
Loading