Skip to content

Commit afa3997

Browse files
authored
PYTHON-2680 Breaking changes to DBRef BSON+JSON decoding (#722)
Implement DBRef spec version 1.0 tests.
1 parent 90d4c6f commit afa3997

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+333
-140
lines changed

bson/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,10 @@ def _get_object(data, view, position, obj_end, opts, dummy):
202202
obj = _elements_to_dict(data, view, position + 4, end, opts)
203203

204204
position += obj_size
205-
if "$ref" in obj:
205+
# If DBRef validation fails, return a normal doc.
206+
if (isinstance(obj.get('$ref'), str) and
207+
"$id" in obj and
208+
isinstance(obj.get('$db'), (str, type(None)))):
206209
return (DBRef(obj.pop("$ref"), obj.pop("$id", None),
207210
obj.pop("$db", None), obj), position)
208211
return obj, position

bson/_cbsonmodule.c

Lines changed: 69 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,71 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
15061506
return result;
15071507
}
15081508

1509+
/*
1510+
* Hook for optional decoding BSON documents to DBRef.
1511+
*/
1512+
static PyObject *_dbref_hook(PyObject* self, PyObject* value) {
1513+
struct module_state *state = GETSTATE(self);
1514+
PyObject* dbref = NULL;
1515+
PyObject* dbref_type = NULL;
1516+
PyObject* ref = NULL;
1517+
PyObject* id = NULL;
1518+
PyObject* database = NULL;
1519+
PyObject* ret = NULL;
1520+
int db_present = 0;
1521+
1522+
/* Decoding for DBRefs */
1523+
if (PyMapping_HasKeyString(value, "$ref") && PyMapping_HasKeyString(value, "$id")) { /* DBRef */
1524+
ref = PyMapping_GetItemString(value, "$ref");
1525+
/* PyMapping_GetItemString returns NULL to indicate error. */
1526+
if (!ref) {
1527+
goto invalid;
1528+
}
1529+
id = PyMapping_GetItemString(value, "$id");
1530+
/* PyMapping_GetItemString returns NULL to indicate error. */
1531+
if (!id) {
1532+
goto invalid;
1533+
}
1534+
1535+
if (PyMapping_HasKeyString(value, "$db")) {
1536+
database = PyMapping_GetItemString(value, "$db");
1537+
if (!database) {
1538+
goto invalid;
1539+
}
1540+
db_present = 1;
1541+
} else {
1542+
database = Py_None;
1543+
Py_INCREF(database);
1544+
}
1545+
1546+
// check types
1547+
if (!(PyUnicode_Check(ref) && (database == Py_None || PyUnicode_Check(database)))) {
1548+
ret = value;
1549+
goto invalid;
1550+
}
1551+
1552+
PyMapping_DelItemString(value, "$ref");
1553+
PyMapping_DelItemString(value, "$id");
1554+
if (db_present) {
1555+
PyMapping_DelItemString(value, "$db");
1556+
}
1557+
1558+
if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) {
1559+
dbref = PyObject_CallFunctionObjArgs(dbref_type, ref, id, database, value, NULL);
1560+
Py_DECREF(value);
1561+
ret = dbref;
1562+
}
1563+
} else {
1564+
ret = value;
1565+
}
1566+
invalid:
1567+
Py_XDECREF(dbref_type);
1568+
Py_XDECREF(ref);
1569+
Py_XDECREF(id);
1570+
Py_XDECREF(database);
1571+
return ret;
1572+
}
1573+
15091574
static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
15101575
unsigned* position, unsigned char type,
15111576
unsigned max, const codec_options_t* options) {
@@ -1552,7 +1617,6 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
15521617
}
15531618
case 3:
15541619
{
1555-
PyObject* collection;
15561620
uint32_t size;
15571621

15581622
if (max < 4) {
@@ -1585,55 +1649,10 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
15851649
goto invalid;
15861650
}
15871651

1588-
/* Decoding for DBRefs */
1589-
if (PyMapping_HasKeyString(value, "$ref")) { /* DBRef */
1590-
PyObject* dbref = NULL;
1591-
PyObject* dbref_type;
1592-
PyObject* id;
1593-
PyObject* database;
1594-
1595-
collection = PyMapping_GetItemString(value, "$ref");
1596-
/* PyMapping_GetItemString returns NULL to indicate error. */
1597-
if (!collection) {
1598-
goto invalid;
1599-
}
1600-
PyMapping_DelItemString(value, "$ref");
1601-
1602-
if (PyMapping_HasKeyString(value, "$id")) {
1603-
id = PyMapping_GetItemString(value, "$id");
1604-
if (!id) {
1605-
Py_DECREF(collection);
1606-
goto invalid;
1607-
}
1608-
PyMapping_DelItemString(value, "$id");
1609-
} else {
1610-
id = Py_None;
1611-
Py_INCREF(id);
1612-
}
1613-
1614-
if (PyMapping_HasKeyString(value, "$db")) {
1615-
database = PyMapping_GetItemString(value, "$db");
1616-
if (!database) {
1617-
Py_DECREF(collection);
1618-
Py_DECREF(id);
1619-
goto invalid;
1620-
}
1621-
PyMapping_DelItemString(value, "$db");
1622-
} else {
1623-
database = Py_None;
1624-
Py_INCREF(database);
1625-
}
1626-
1627-
if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) {
1628-
dbref = PyObject_CallFunctionObjArgs(dbref_type, collection, id, database, value, NULL);
1629-
Py_DECREF(dbref_type);
1630-
}
1631-
Py_DECREF(value);
1632-
value = dbref;
1633-
1634-
Py_DECREF(id);
1635-
Py_DECREF(collection);
1636-
Py_DECREF(database);
1652+
/* Hook for DBRefs */
1653+
value = _dbref_hook(self, value);
1654+
if (!value) {
1655+
goto invalid;
16371656
}
16381657

16391658
*position += size;

bson/json_util.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,6 @@
121121
"x": re.X,
122122
}
123123

124-
# Dollar-prefixed keys which may appear in DBRefs.
125-
_DBREF_KEYS = frozenset(['$id', '$ref', '$db'])
126-
127124

128125
class DatetimeRepresentation:
129126
LEGACY = 0
@@ -463,7 +460,9 @@ def object_pairs_hook(pairs, json_options=DEFAULT_JSON_OPTIONS):
463460
def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
464461
if "$oid" in dct:
465462
return _parse_canonical_oid(dct)
466-
if "$ref" in dct:
463+
if (isinstance(dct.get('$ref'), str) and
464+
"$id" in dct and
465+
isinstance(dct.get('$db'), (str, type(None)))):
467466
return _parse_canonical_dbref(dct)
468467
if "$date" in dct:
469468
return _parse_canonical_datetime(dct, json_options)
@@ -675,10 +674,6 @@ def _parse_canonical_regex(doc):
675674

676675
def _parse_canonical_dbref(doc):
677676
"""Decode a JSON DBRef to bson.dbref.DBRef."""
678-
for key in doc:
679-
if key.startswith('$') and key not in _DBREF_KEYS:
680-
# Other keys start with $, so dct cannot be parsed as a DBRef.
681-
return doc
682677
return DBRef(doc.pop('$ref'), doc.pop('$id'),
683678
database=doc.pop('$db', None), **doc)
684679

doc/changelog.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,14 @@ Breaking Changes in 4.0
104104
- Changed the default JSON encoding representation from legacy to relaxed.
105105
The json_mode parameter for :const:`bson.json_util.dumps` now defaults to
106106
:const:`~bson.json_util.RELAXED_JSON_OPTIONS`.
107+
- Changed the BSON and JSON decoding behavior of :class:`~bson.dbref.DBRef`
108+
to match the behavior outlined in the `DBRef specification`_ version 1.0.
109+
Specifically, PyMongo now only decodes a subdocument into a
110+
:class:`~bson.dbref.DBRef` if and only if, it contains both ``$ref`` and
111+
``$id`` fields and the ``$ref``, ``$id``, and ``$db`` fields are of the
112+
correct type. Otherwise the document is returned as normal. Previously, any
113+
subdocument containing a ``$ref`` field would be decoded as a
114+
:class:`~bson.dbref.DBRef`.
107115
- The "tls" install extra is no longer necessary or supported and will be
108116
ignored by pip.
109117
- The ``hint`` option is now required when using ``min`` or ``max`` queries
@@ -123,6 +131,7 @@ See the `PyMongo 4.0 release notes in JIRA`_ for the list of resolved issues
123131
in this release.
124132

125133
.. _PyMongo 4.0 release notes in JIRA: https://jira.mongodb.org/secure/ReleaseNote.jspa?projectId=10004&version=18463
134+
.. _DBRef specification: https://github.com/mongodb/specifications/blob/5a8c8d7/source/dbref.rst
126135

127136
Changes in Version 3.12.0
128137
-------------------------

doc/migrate-to-pymongo4.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,3 +752,16 @@ Name is a required argument for pymongo.driver_info.DriverInfo
752752

753753
``name`` is now a required argument for the :class:`pymongo.driver_info.DriverInfo` class.
754754

755+
DBRef BSON/JSON decoding behavior
756+
.................................
757+
758+
Changed the BSON and JSON decoding behavior of :class:`~bson.dbref.DBRef`
759+
to match the behavior outlined in the `DBRef specification`_ version 1.0.
760+
Specifically, PyMongo now only decodes a subdocument into a
761+
:class:`~bson.dbref.DBRef` if and only if, it contains both ``$ref`` and
762+
``$id`` fields and the ``$ref``, ``$id``, and ``$db`` fields are of the
763+
correct type. Otherwise the document is returned as normal. Previously, any
764+
subdocument containing a ``$ref`` field would be decoded as a
765+
:class:`~bson.dbref.DBRef`.
766+
767+
.. _DBRef specification: https://github.com/mongodb/specifications/blob/5a8c8d7/source/dbref.rst

test/bson_corpus/binary.json

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@
5050
"canonical_bson": "1D000000057800100000000573FFD26444B34C6990E8E7D1DFC035D400",
5151
"canonical_extjson": "{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"05\"}}}"
5252
},
53+
{
54+
"description": "subtype 0x07",
55+
"canonical_bson": "1D000000057800100000000773FFD26444B34C6990E8E7D1DFC035D400",
56+
"canonical_extjson": "{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"07\"}}}"
57+
},
5358
{
5459
"description": "subtype 0x80",
5560
"canonical_bson": "0F0000000578000200000080FFFF00",
@@ -94,8 +99,20 @@
9499
"string": "{\"x\" : { \"$uuid\" : { \"data\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}}"
95100
},
96101
{
97-
"description": "$uuid invalid value",
102+
"description": "$uuid invalid value--too short",
98103
"string": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-90e8-e7d1dfc035d4\"}}"
104+
},
105+
{
106+
"description": "$uuid invalid value--too long",
107+
"string": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4-789e4\"}}"
108+
},
109+
{
110+
"description": "$uuid invalid value--misplaced hyphens",
111+
"string": "{\"x\" : { \"$uuid\" : \"73ff-d26444b-34c6-990e8e-7d1dfc035d4\"}}"
112+
},
113+
{
114+
"description": "$uuid invalid value--too many hyphens",
115+
"string": "{\"x\" : { \"$uuid\" : \"----d264-44b3-4--9-90e8-e7d1dfc0----\"}}"
99116
}
100117
]
101118
}

test/bson_corpus/code.json

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,48 +20,48 @@
2020
},
2121
{
2222
"description": "two-byte UTF-8 (\u00e9)",
23-
"canonical_bson": "190000000261000D000000C3A9C3A9C3A9C3A9C3A9C3A90000",
24-
"canonical_extjson": "{\"a\" : \"\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\"}"
23+
"canonical_bson": "190000000D61000D000000C3A9C3A9C3A9C3A9C3A9C3A90000",
24+
"canonical_extjson": "{\"a\" : {\"$code\" : \"\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\"}}"
2525
},
2626
{
2727
"description": "three-byte UTF-8 (\u2606)",
28-
"canonical_bson": "190000000261000D000000E29886E29886E29886E298860000",
29-
"canonical_extjson": "{\"a\" : \"\\u2606\\u2606\\u2606\\u2606\"}"
28+
"canonical_bson": "190000000D61000D000000E29886E29886E29886E298860000",
29+
"canonical_extjson": "{\"a\" : {\"$code\" : \"\\u2606\\u2606\\u2606\\u2606\"}}"
3030
},
3131
{
3232
"description": "Embedded nulls",
33-
"canonical_bson": "190000000261000D0000006162006261620062616261620000",
34-
"canonical_extjson": "{\"a\" : \"ab\\u0000bab\\u0000babab\"}"
33+
"canonical_bson": "190000000D61000D0000006162006261620062616261620000",
34+
"canonical_extjson": "{\"a\" : {\"$code\" : \"ab\\u0000bab\\u0000babab\"}}"
3535
}
3636
],
3737
"decodeErrors": [
3838
{
3939
"description": "bad code string length: 0 (but no 0x00 either)",
40-
"bson": "0C0000000261000000000000"
40+
"bson": "0C0000000D61000000000000"
4141
},
4242
{
4343
"description": "bad code string length: -1",
44-
"bson": "0C000000026100FFFFFFFF00"
44+
"bson": "0C0000000D6100FFFFFFFF00"
4545
},
4646
{
4747
"description": "bad code string length: eats terminator",
48-
"bson": "10000000026100050000006200620000"
48+
"bson": "100000000D6100050000006200620000"
4949
},
5050
{
5151
"description": "bad code string length: longer than rest of document",
52-
"bson": "120000000200FFFFFF00666F6F6261720000"
52+
"bson": "120000000D00FFFFFF00666F6F6261720000"
5353
},
5454
{
5555
"description": "code string is not null-terminated",
56-
"bson": "1000000002610004000000616263FF00"
56+
"bson": "100000000D610004000000616263FF00"
5757
},
5858
{
5959
"description": "empty code string, but extra null",
60-
"bson": "0E00000002610001000000000000"
60+
"bson": "0E0000000D610001000000000000"
6161
},
6262
{
6363
"description": "invalid UTF-8",
64-
"bson": "0E00000002610002000000E90000"
64+
"bson": "0E0000000D610002000000E90000"
6565
}
6666
]
6767
}

test/bson_corpus/dbref.json

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"description": "DBRef",
2+
"description": "Document type (DBRef sub-documents)",
33
"bson_type": "0x03",
44
"valid": [
55
{
@@ -26,6 +26,26 @@
2626
"description": "Document with key names similar to those of a DBRef",
2727
"canonical_bson": "3e0000000224726566000c0000006e6f742d612d646272656600072469640058921b3e6e32ab156a22b59e022462616e616e6100050000007065656c0000",
2828
"canonical_extjson": "{\"$ref\": \"not-a-dbref\", \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}, \"$banana\": \"peel\"}"
29+
},
30+
{
31+
"description": "DBRef with additional dollar-prefixed and dotted fields",
32+
"canonical_bson": "48000000036462726566003c0000000224726566000b000000636f6c6c656374696f6e00072469640058921b3e6e32ab156a22b59e10612e62000100000010246300010000000000",
33+
"canonical_extjson": "{\"dbref\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}, \"a.b\": {\"$numberInt\": \"1\"}, \"$c\": {\"$numberInt\": \"1\"}}}"
34+
},
35+
{
36+
"description": "Sub-document resembles DBRef but $id is missing",
37+
"canonical_bson": "26000000036462726566001a0000000224726566000b000000636f6c6c656374696f6e000000",
38+
"canonical_extjson": "{\"dbref\": {\"$ref\": \"collection\"}}"
39+
},
40+
{
41+
"description": "Sub-document resembles DBRef but $ref is not a string",
42+
"canonical_bson": "2c000000036462726566002000000010247265660001000000072469640058921b3e6e32ab156a22b59e0000",
43+
"canonical_extjson": "{\"dbref\": {\"$ref\": {\"$numberInt\": \"1\"}, \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}}}"
44+
},
45+
{
46+
"description": "Sub-document resembles DBRef but $db is not a string",
47+
"canonical_bson": "4000000003646272656600340000000224726566000b000000636f6c6c656374696f6e00072469640058921b3e6e32ab156a22b59e1024646200010000000000",
48+
"canonical_extjson": "{\"dbref\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}, \"$db\": {\"$numberInt\": \"1\"}}}"
2949
}
3050
]
3151
}

test/bson_corpus/double.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@
3030
{
3131
"description": "1.2345678921232E+18",
3232
"canonical_bson": "100000000164002a1bf5f41022b14300",
33-
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"1.2345678921232e+18\"}}",
33+
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"1.2345678921232E+18\"}}",
3434
"relaxed_extjson": "{\"d\" : 1.2345678921232E+18}"
3535
},
3636
{
3737
"description": "-1.2345678921232E+18",
3838
"canonical_bson": "100000000164002a1bf5f41022b1c300",
39-
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"-1.2345678921232e+18\"}}",
39+
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"-1.2345678921232E+18\"}}",
4040
"relaxed_extjson": "{\"d\" : -1.2345678921232E+18}"
4141
},
4242
{

0 commit comments

Comments
 (0)