Skip to content

Commit e71e98e

Browse files
fix cursor validation
1 parent 7f9d1fd commit e71e98e

File tree

5 files changed

+130
-87
lines changed

5 files changed

+130
-87
lines changed

.evergreen/insert_malformed_utf8.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# test script for inserting invalid data into a db
2+
3+
from pymongo import MongoClient
4+
from bson import BSON
5+
from bson.raw_bson import RawBSONDocument
6+
import os
7+
8+
# Connect to MongoDB
9+
client = MongoClient(os.environ["MONGODB_URI"])
10+
db = client['test']
11+
collection = db['invalidutf8']
12+
13+
# template document
14+
document = {
15+
'field': 'asdf'
16+
}
17+
18+
# first, insert once with invalid value
19+
raw_bson = BSON.encode(document)
20+
raw_bson = raw_bson.replace(b'asdf', b'a\x80df')
21+
collection.drop()
22+
collection.insert_one(RawBSONDocument(raw_bson))
23+
# then again with invalid key
24+
raw_bson = raw_bson.replace(b'field', b'fe\x80\xc2\xa9')
25+
collection.insert_one(RawBSONDocument(raw_bson))
26+
# then insert another doc that serves as a control mechanism
27+
collection.insert_one({ 'control': 'normal\\string\\with\\backslashes', 'nested': { 'a': 'b' }, 'array': [1,2,3] })
28+
29+
print("Inserted document with an invalid UTF-8 string")

.evergreen/run-invalid-utf8-tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#! /bin/bash

src/cmap/wire_protocol/on_demand/document.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
getInt32LE,
1111
ObjectId,
1212
parseToElementsToArray,
13+
pluckBSONSerializeOptions,
1314
Timestamp,
1415
toUTF8
1516
} from '../../../bson';
@@ -330,11 +331,23 @@ export class OnDemandDocument {
330331
* @param options - BSON deserialization options
331332
*/
332333
public toObject(options?: BSONSerializeOptions): Record<string, any> {
333-
return BSON.deserialize(this.bson, {
334-
...options,
334+
const exactBSONOptions = {
335+
...pluckBSONSerializeOptions(options ?? {}),
336+
validation: this.parseBsonSerializationOptions(options),
335337
index: this.offset,
336338
allowObjectSmallerThanBufferSize: true
337-
});
339+
};
340+
return BSON.deserialize(this.bson, exactBSONOptions);
341+
}
342+
343+
private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
344+
utf8: { writeErrors: false } | false;
345+
} {
346+
const enableUtf8Validation = options?.enableUtf8Validation;
347+
if (enableUtf8Validation === false) {
348+
return { utf8: false };
349+
}
350+
return { utf8: { writeErrors: false } };
338351
}
339352

340353
/** Returns this document's bytes only */

src/cmap/wire_protocol/responses.ts

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -166,24 +166,6 @@ export class MongoDBResponse extends OnDemandDocument {
166166
}
167167
return this.clusterTime ?? null;
168168
}
169-
170-
public override toObject(options?: BSONSerializeOptions): Record<string, any> {
171-
const exactBSONOptions = {
172-
...pluckBSONSerializeOptions(options ?? {}),
173-
validation: this.parseBsonSerializationOptions(options)
174-
};
175-
return super.toObject(exactBSONOptions);
176-
}
177-
178-
private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
179-
utf8: { writeErrors: false } | false;
180-
} {
181-
const enableUtf8Validation = options?.enableUtf8Validation;
182-
if (enableUtf8Validation === false) {
183-
return { utf8: false };
184-
}
185-
return { utf8: { writeErrors: false } };
186-
}
187169
}
188170

189171
/** @internal */

test/integration/node-specific/bson-options/utf8_validation.test.ts

Lines changed: 84 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import * as sinon from 'sinon';
44
import {
55
BSON,
66
BSONError,
7-
Collection,
7+
type Collection,
88
type MongoClient,
99
MongoDBResponse,
1010
MongoError,
@@ -164,8 +164,8 @@ describe('utf8 validation with cursors', function () {
164164
beforeEach(async function () {
165165
client = this.configuration.newClient();
166166
await client.connect();
167-
const db = client.db("test");
168-
collection = db.collection("invalidutf8");
167+
const db = client.db('test');
168+
collection = db.collection('invalidutf8');
169169
});
170170

171171
afterEach(async function () {
@@ -174,7 +174,7 @@ describe('utf8 validation with cursors', function () {
174174

175175
context('when utf-8 validation is explicitly disabled', function () {
176176
it('documents can be read using a for-await loop without errors', async function () {
177-
for await (const doc of collection.find({}, { enableUtf8Validation: false }));
177+
for await (const _doc of collection.find({}, { enableUtf8Validation: false }));
178178
});
179179
it('documents can be read using next() without errors', async function () {
180180
const cursor = collection.find({}, { enableUtf8Validation: false });
@@ -200,10 +200,10 @@ describe('utf8 validation with cursors', function () {
200200
while (await cursor.hasNext()) {
201201
await cursor.tryNext();
202202
}
203-
})
204-
})
203+
});
204+
});
205205

206-
async function expectReject(fn: () => Promise<void>, options?: { regex?: RegExp, errorClass }) {
206+
async function expectReject(fn: () => Promise<void>, options?: { regex?: RegExp; errorClass }) {
207207
const regex = options?.regex ?? /.*/;
208208
const errorClass = options?.errorClass ?? MongoError;
209209
try {
@@ -217,95 +217,113 @@ describe('utf8 validation with cursors', function () {
217217

218218
context('when utf-8 validation is explicitly enabled', function () {
219219
it('a for-await loop throw a BSON error', async function () {
220-
await expectReject(async () => {
221-
for await (const doc of collection.find({}, { enableUtf8Validation: true }));
222-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ })
220+
await expectReject(
221+
async () => {
222+
for await (const doc of collection.find({}, { enableUtf8Validation: true }));
223+
},
224+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
225+
);
223226
});
224227
it('next() throws a BSON error', async function () {
225-
await expectReject(async () => {
226-
const cursor = collection.find({}, { enableUtf8Validation: true });
227-
228-
while (await cursor.hasNext()) {
229-
await cursor.next();
230-
}
231-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
228+
await expectReject(
229+
async () => {
230+
const cursor = collection.find({}, { enableUtf8Validation: true });
231+
232+
while (await cursor.hasNext()) {
233+
await cursor.next();
234+
}
235+
},
236+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
232237
);
233238
});
234239

235240
it('toArray() throws a BSON error', async function () {
236-
await expectReject(async () => {
237-
const cursor = collection.find({}, { enableUtf8Validation: true });
238-
await cursor.toArray();
239-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
241+
await expectReject(
242+
async () => {
243+
const cursor = collection.find({}, { enableUtf8Validation: true });
244+
await cursor.toArray();
245+
},
246+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
240247
);
241-
242248
});
243249

244250
it('.stream() throws a BSONError', async function () {
245-
await expectReject(async () => {
246-
const cursor = collection.find({}, { enableUtf8Validation: true });
247-
await cursor.stream().toArray();
248-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
251+
await expectReject(
252+
async () => {
253+
const cursor = collection.find({}, { enableUtf8Validation: true });
254+
await cursor.stream().toArray();
255+
},
256+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
249257
);
250258
});
251259

252260
it('tryNext() throws a BSONError', async function () {
253-
await expectReject(async () => {
254-
const cursor = collection.find({}, { enableUtf8Validation: true });
255-
256-
while (await cursor.hasNext()) {
257-
await cursor.tryNext();
258-
}
259-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
261+
await expectReject(
262+
async () => {
263+
const cursor = collection.find({}, { enableUtf8Validation: true });
264+
265+
while (await cursor.hasNext()) {
266+
await cursor.tryNext();
267+
}
268+
},
269+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
260270
);
261-
262-
})
263-
})
271+
});
272+
});
264273

265274
context('utf-8 validation defaults to enabled', function () {
266275
it('a for-await loop throw a BSON error', async function () {
267-
await expectReject(async () => {
268-
for await (const doc of collection.find({}));
269-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ })
276+
await expectReject(
277+
async () => {
278+
for await (const doc of collection.find({}));
279+
},
280+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
281+
);
270282
});
271283
it('next() throws a BSON error', async function () {
272-
await expectReject(async () => {
273-
const cursor = collection.find({});
274-
275-
while (await cursor.hasNext()) {
276-
await cursor.next();
277-
}
278-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
284+
await expectReject(
285+
async () => {
286+
const cursor = collection.find({});
287+
288+
while (await cursor.hasNext()) {
289+
await cursor.next();
290+
}
291+
},
292+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
279293
);
280294
});
281295

282296
it('toArray() throws a BSON error', async function () {
283-
await expectReject(async () => {
284-
const cursor = collection.find({});
285-
await cursor.toArray();
286-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
297+
await expectReject(
298+
async () => {
299+
const cursor = collection.find({});
300+
await cursor.toArray();
301+
},
302+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
287303
);
288-
289304
});
290305

291306
it('.stream() throws a BSONError', async function () {
292-
await expectReject(async () => {
293-
const cursor = collection.find({});
294-
await cursor.stream().toArray();
295-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
307+
await expectReject(
308+
async () => {
309+
const cursor = collection.find({});
310+
await cursor.stream().toArray();
311+
},
312+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
296313
);
297314
});
298315

299316
it('tryNext() throws a BSONError', async function () {
300-
await expectReject(async () => {
301-
const cursor = collection.find({});
302-
303-
while (await cursor.hasNext()) {
304-
await cursor.tryNext();
305-
}
306-
}, { errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
317+
await expectReject(
318+
async () => {
319+
const cursor = collection.find({}, { enableUtf8Validation: true });
320+
321+
while (await cursor.hasNext()) {
322+
await cursor.tryNext();
323+
}
324+
},
325+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
307326
);
308-
309-
})
310-
})
311-
})
327+
});
328+
});
329+
});

0 commit comments

Comments
 (0)