Skip to content

Commit 6c9751b

Browse files
author
Divjot Arora
authored
GODRIVER-1532 Remove check when decoding invalid UTF-8 strings (#333)
1 parent 2c54694 commit 6c9751b

File tree

2 files changed

+118
-98
lines changed

2 files changed

+118
-98
lines changed

bson/bson_corpus_spec_test.go

Lines changed: 118 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"github.com/tidwall/pretty"
2525
"go.mongodb.org/mongo-driver/bson/bsoncodec"
2626
"go.mongodb.org/mongo-driver/bson/bsonrw"
27+
"go.mongodb.org/mongo-driver/bson/primitive"
2728
)
2829

2930
type testCase struct {
@@ -233,118 +234,146 @@ func runTest(t *testing.T, file string) {
233234
var test testCase
234235
require.NoError(t, json.Unmarshal(content, &test))
235236

236-
for _, v := range test.Valid {
237-
// get canonical BSON
238-
cB, err := hex.DecodeString(v.CanonicalBson)
239-
expectNoError(t, err, fmt.Sprintf("%s: reading canonical BSON", v.Description))
240-
241-
// get canonical extended JSON
242-
cEJ := unescapeUnicode(string(pretty.Ugly([]byte(v.CanonicalExtJSON))), test.BsonType)
243-
if test.BsonType == "0x01" {
244-
cEJ = normalizeCanonicalDouble(t, *test.TestKey, cEJ)
245-
}
237+
t.Run("valid", func(t *testing.T) {
238+
for _, v := range test.Valid {
239+
t.Run(v.Description, func(t *testing.T) {
240+
// get canonical BSON
241+
cB, err := hex.DecodeString(v.CanonicalBson)
242+
expectNoError(t, err, fmt.Sprintf("%s: reading canonical BSON", v.Description))
243+
244+
// get canonical extended JSON
245+
cEJ := unescapeUnicode(string(pretty.Ugly([]byte(v.CanonicalExtJSON))), test.BsonType)
246+
if test.BsonType == "0x01" {
247+
cEJ = normalizeCanonicalDouble(t, *test.TestKey, cEJ)
248+
}
246249

247-
/*** canonical BSON round-trip tests ***/
248-
doc := bsonToNative(t, cB, "canonical", v.Description)
250+
/*** canonical BSON round-trip tests ***/
251+
doc := bsonToNative(t, cB, "canonical", v.Description)
249252

250-
// native_to_bson(bson_to_native(cB)) = cB
251-
nativeToBSON(t, cB, doc, v.Description, "canonical", "bson_to_native(cB)")
253+
// native_to_bson(bson_to_native(cB)) = cB
254+
nativeToBSON(t, cB, doc, v.Description, "canonical", "bson_to_native(cB)")
252255

253-
// native_to_canonical_extended_json(bson_to_native(cB)) = cEJ
254-
nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "bson_to_native(cB)")
256+
// native_to_canonical_extended_json(bson_to_native(cB)) = cEJ
257+
nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "bson_to_native(cB)")
255258

256-
// native_to_relaxed_extended_json(bson_to_native(cB)) = rEJ (if rEJ exists)
257-
if v.RelaxedExtJSON != nil {
258-
rEJ := unescapeUnicode(string(pretty.Ugly([]byte(*v.RelaxedExtJSON))), test.BsonType)
259-
if test.BsonType == "0x01" {
260-
rEJ = normalizeRelaxedDouble(t, *test.TestKey, rEJ)
261-
}
259+
// native_to_relaxed_extended_json(bson_to_native(cB)) = rEJ (if rEJ exists)
260+
if v.RelaxedExtJSON != nil {
261+
rEJ := unescapeUnicode(string(pretty.Ugly([]byte(*v.RelaxedExtJSON))), test.BsonType)
262+
if test.BsonType == "0x01" {
263+
rEJ = normalizeRelaxedDouble(t, *test.TestKey, rEJ)
264+
}
262265

263-
nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "rEJ", "bson_to_native(cB)")
266+
nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "rEJ", "bson_to_native(cB)")
264267

265-
/*** relaxed extended JSON round-trip tests (if exists) ***/
266-
doc = jsonToNative(t, rEJ, "relaxed", v.Description)
268+
/*** relaxed extended JSON round-trip tests (if exists) ***/
269+
doc = jsonToNative(t, rEJ, "relaxed", v.Description)
267270

268-
// native_to_relaxed_extended_json(json_to_native(rEJ)) = rEJ
269-
nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "eJR", "json_to_native(rEJ)")
270-
}
271+
// native_to_relaxed_extended_json(json_to_native(rEJ)) = rEJ
272+
nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "eJR", "json_to_native(rEJ)")
273+
}
271274

272-
/*** canonical extended JSON round-trip tests ***/
273-
doc = jsonToNative(t, cEJ, "canonical", v.Description)
275+
/*** canonical extended JSON round-trip tests ***/
276+
doc = jsonToNative(t, cEJ, "canonical", v.Description)
274277

275-
// native_to_canonical_extended_json(json_to_native(cEJ)) = cEJ
276-
nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "json_to_native(cEJ)")
278+
// native_to_canonical_extended_json(json_to_native(cEJ)) = cEJ
279+
nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "json_to_native(cEJ)")
277280

278-
// native_to_bson(json_to_native(cEJ)) = cb (unless lossy)
279-
if v.Lossy == nil || !*v.Lossy {
280-
nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(cEJ)")
281-
}
281+
// native_to_bson(json_to_native(cEJ)) = cb (unless lossy)
282+
if v.Lossy == nil || !*v.Lossy {
283+
nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(cEJ)")
284+
}
282285

283-
/*** degenerate BSON round-trip tests (if exists) ***/
284-
if v.DegenerateBSON != nil {
285-
dB, err := hex.DecodeString(*v.DegenerateBSON)
286-
expectNoError(t, err, fmt.Sprintf("%s: reading degenerate BSON", v.Description))
286+
/*** degenerate BSON round-trip tests (if exists) ***/
287+
if v.DegenerateBSON != nil {
288+
dB, err := hex.DecodeString(*v.DegenerateBSON)
289+
expectNoError(t, err, fmt.Sprintf("%s: reading degenerate BSON", v.Description))
287290

288-
doc = bsonToNative(t, dB, "degenerate", v.Description)
291+
doc = bsonToNative(t, dB, "degenerate", v.Description)
289292

290-
// native_to_bson(bson_to_native(dB)) = cB
291-
nativeToBSON(t, cB, doc, v.Description, "degenerate", "bson_to_native(dB)")
292-
}
293+
// native_to_bson(bson_to_native(dB)) = cB
294+
nativeToBSON(t, cB, doc, v.Description, "degenerate", "bson_to_native(dB)")
295+
}
293296

294-
/*** degenerate JSON round-trip tests (if exists) ***/
295-
if v.DegenerateExtJSON != nil {
296-
dEJ := unescapeUnicode(string(pretty.Ugly([]byte(*v.DegenerateExtJSON))), test.BsonType)
297-
if test.BsonType == "0x01" {
298-
dEJ = normalizeCanonicalDouble(t, *test.TestKey, dEJ)
299-
}
297+
/*** degenerate JSON round-trip tests (if exists) ***/
298+
if v.DegenerateExtJSON != nil {
299+
dEJ := unescapeUnicode(string(pretty.Ugly([]byte(*v.DegenerateExtJSON))), test.BsonType)
300+
if test.BsonType == "0x01" {
301+
dEJ = normalizeCanonicalDouble(t, *test.TestKey, dEJ)
302+
}
300303

301-
doc = jsonToNative(t, dEJ, "degenerate canonical", v.Description)
304+
doc = jsonToNative(t, dEJ, "degenerate canonical", v.Description)
302305

303-
// native_to_canonical_extended_json(json_to_native(dEJ)) = cEJ
304-
nativeToJSON(t, cEJ, doc, v.Description, "degenerate canonical", "cEJ", "json_to_native(dEJ)")
306+
// native_to_canonical_extended_json(json_to_native(dEJ)) = cEJ
307+
nativeToJSON(t, cEJ, doc, v.Description, "degenerate canonical", "cEJ", "json_to_native(dEJ)")
305308

306-
// native_to_bson(json_to_native(dEJ)) = cB (unless lossy)
307-
if v.Lossy == nil || !*v.Lossy {
308-
nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(dEJ)")
309-
}
309+
// native_to_bson(json_to_native(dEJ)) = cB (unless lossy)
310+
if v.Lossy == nil || !*v.Lossy {
311+
nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(dEJ)")
312+
}
313+
}
314+
})
310315
}
311-
}
312-
313-
for _, d := range test.DecodeErrors {
314-
b, err := hex.DecodeString(d.Bson)
315-
expectNoError(t, err, d.Description)
316-
317-
var doc D
318-
err = Unmarshal(b, &doc)
319-
expectError(t, err, fmt.Sprintf("%s: expected decode error", d.Description))
320-
}
316+
})
317+
318+
t.Run("decode error", func(t *testing.T) {
319+
for _, d := range test.DecodeErrors {
320+
t.Run(d.Description, func(t *testing.T) {
321+
b, err := hex.DecodeString(d.Bson)
322+
expectNoError(t, err, d.Description)
323+
324+
var doc D
325+
err = Unmarshal(b, &doc)
326+
327+
// The driver unmarshals invalid UTF-8 strings without error. Loop over the unmarshalled elements
328+
// and assert that there was no error if any of the string or DBPointer values contain invalid UTF-8
329+
// characters.
330+
for _, elem := range doc {
331+
str, ok := elem.Value.(string)
332+
invalidString := ok && !utf8.ValidString(str)
333+
dbPtr, ok := elem.Value.(primitive.DBPointer)
334+
invalidDBPtr := ok && !utf8.ValidString(dbPtr.DB)
335+
336+
if invalidString || invalidDBPtr {
337+
expectNoError(t, err, d.Description)
338+
return
339+
}
340+
}
321341

322-
for _, p := range test.ParseErrors {
323-
// skip DBRef tests
324-
if strings.Contains(p.Description, "Bad DBRef") {
325-
continue
342+
expectError(t, err, fmt.Sprintf("%s: expected decode error", d.Description))
343+
})
326344
}
345+
})
346+
347+
t.Run("parse error", func(t *testing.T) {
348+
for _, p := range test.ParseErrors {
349+
t.Run(p.Description, func(t *testing.T) {
350+
// skip DBRef tests
351+
if strings.Contains(p.Description, "Bad DBRef") {
352+
t.Skip("skipping DBRef test")
353+
}
327354

328-
s := unescapeUnicode(p.String, test.BsonType)
329-
if test.BsonType == "0x13" {
330-
s = fmt.Sprintf(`{"$numberDecimal": "%s"}`, s)
331-
}
355+
s := unescapeUnicode(p.String, test.BsonType)
356+
if test.BsonType == "0x13" {
357+
s = fmt.Sprintf(`{"$numberDecimal": "%s"}`, s)
358+
}
332359

333-
switch test.BsonType {
334-
case "0x00":
335-
var doc D
336-
err := UnmarshalExtJSON([]byte(s), true, &doc)
337-
expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
338-
case "0x13":
339-
ejvr, err := bsonrw.NewExtJSONValueReader(strings.NewReader(s), true)
340-
expectNoError(t, err, fmt.Sprintf("error creating value reader: %s", err))
341-
_, err = ejvr.ReadDecimal128()
342-
expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
343-
default:
344-
t.Errorf("Update test to check for parse errors for type %s", test.BsonType)
345-
t.Fail()
360+
switch test.BsonType {
361+
case "0x00":
362+
var doc D
363+
err := UnmarshalExtJSON([]byte(s), true, &doc)
364+
expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
365+
case "0x13":
366+
ejvr, err := bsonrw.NewExtJSONValueReader(strings.NewReader(s), true)
367+
expectNoError(t, err, fmt.Sprintf("error creating value reader: %s", err))
368+
_, err = ejvr.ReadDecimal128()
369+
expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
370+
default:
371+
t.Errorf("Update test to check for parse errors for type %s", test.BsonType)
372+
t.Fail()
373+
}
374+
})
346375
}
347-
}
376+
})
348377
})
349378
}
350379

bson/bsonrw/value_reader.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
"io"
1515
"math"
1616
"sync"
17-
"unicode"
1817

1918
"go.mongodb.org/mongo-driver/bson/bsontype"
2019
"go.mongodb.org/mongo-driver/bson/primitive"
@@ -821,14 +820,6 @@ func (vr *valueReader) readString() (string, error) {
821820

822821
start := vr.offset
823822
vr.offset += int64(length)
824-
825-
if length == 2 {
826-
asciiByte := vr.d[start]
827-
if asciiByte > unicode.MaxASCII {
828-
return "", fmt.Errorf("invalid ascii byte")
829-
}
830-
}
831-
832823
return string(vr.d[start : start+int64(length)-1]), nil
833824
}
834825

0 commit comments

Comments
 (0)