Skip to content

Commit f6e86bb

Browse files
authored
test(NODE-6534): add spec test runner for Binary vector (#729)
1 parent f99fdfd commit f6e86bb

File tree

5 files changed

+489
-0
lines changed

5 files changed

+489
-0
lines changed
Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
import * as fs from 'fs';
2+
import * as path from 'path';
3+
import { BSON, BSONError, Binary } from '../register-bson';
4+
import { expect } from 'chai';
5+
6+
const { toHex, fromHex } = BSON.onDemand.ByteUtils;
7+
8+
const FLOAT = new Float64Array(1);
9+
const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8);
10+
11+
FLOAT[0] = -1;
12+
// Little endian [0, 0, 0, 0, 0, 0, 240, 191]
13+
// Big endian [191, 240, 0, 0, 0, 0, 0, 0]
14+
const isBigEndian = FLOAT_BYTES[7] === 0;
15+
16+
type VectorHexType = '0x03' | '0x27' | '0x10';
17+
type VectorTest = {
18+
description: string;
19+
vector: (number | string)[];
20+
valid: boolean;
21+
dtype_hex: VectorHexType;
22+
padding?: number;
23+
canonical_bson?: string;
24+
};
25+
type VectorSuite = { description: string; test_key: string; tests: VectorTest[] };
26+
27+
function validateVector(vector: Binary): void {
28+
const VECTOR_TYPE = Object.freeze({
29+
Int8: 0x03,
30+
Float32: 0x27,
31+
PackedBit: 0x10
32+
} as const);
33+
34+
if (vector.sub_type !== 9) return;
35+
36+
const size = vector.position;
37+
const d_type = vector.buffer[0] ?? 0;
38+
const padding = vector.buffer[1] ?? 0;
39+
40+
if ((d_type === VECTOR_TYPE.Float32 || d_type === VECTOR_TYPE.Int8) && padding !== 0) {
41+
throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
42+
}
43+
44+
if (d_type === VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
45+
throw new BSONError(
46+
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
47+
);
48+
}
49+
50+
if (d_type === VECTOR_TYPE.PackedBit && padding > 7) {
51+
throw new BSONError(
52+
`Invalid Vector: padding must be a value between 0 and 7. found: ${padding}`
53+
);
54+
}
55+
}
56+
57+
function fixFloats(f: string | number): number {
58+
if (typeof f === 'number') {
59+
return f;
60+
}
61+
if (f === 'inf') {
62+
return Infinity;
63+
}
64+
if (f === '-inf') {
65+
return -Infinity;
66+
}
67+
throw new Error(`test format error: unknown float value: ${f}`);
68+
}
69+
70+
function fixInt8s(f: number | string): number {
71+
if (typeof f !== 'number') throw new Error('test format error: unexpected test data');
72+
73+
if (f < -128 || f > 127) {
74+
// Javascript Int8Array only supports values from -128 to 127
75+
throw new Error(`unsupported_error: int8 out of range: ${f}`);
76+
}
77+
return f;
78+
}
79+
80+
function fixBits(f: number | string): number {
81+
if (typeof f !== 'number') throw new Error('test format error: unexpected test data');
82+
83+
if (f > 255 || f < 0 || !Number.isSafeInteger(f)) {
84+
// Javascript Uint8Array only supports values from 0 to 255
85+
throw new Error(`unsupported_error: bit out of range: ${f}`);
86+
}
87+
return f;
88+
}
89+
90+
function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary {
91+
let binary: Binary;
92+
switch (dtype_hex) {
93+
case '0x10': /* packed_bit */
94+
case '0x03': /* int8 */ {
95+
const array = new Int8Array(vector.map(dtype_hex === '0x03' /* int8 */ ? fixInt8s : fixBits));
96+
const buffer = new Uint8Array(array.byteLength + 2);
97+
buffer.set(new Uint8Array(array.buffer), 2);
98+
binary = new Binary(buffer, 9);
99+
break;
100+
}
101+
102+
case '0x27': /* float32 */ {
103+
const array = new Float32Array(vector.map(fixFloats));
104+
const buffer = new Uint8Array(array.byteLength + 2);
105+
if (isBigEndian) {
106+
for (let i = 0; i < array.length; i++) {
107+
const bytes = new Uint8Array(array.buffer, i * 4, 4);
108+
bytes.reverse();
109+
buffer.set(bytes, i * 4 + 2);
110+
}
111+
} else {
112+
buffer.set(new Uint8Array(array.buffer), 2);
113+
}
114+
binary = new Binary(buffer, 9);
115+
break;
116+
}
117+
118+
default:
119+
throw new Error(`Unknown dtype_hex: ${dtype_hex}`);
120+
}
121+
122+
binary.buffer[0] = +dtype_hex;
123+
binary.buffer[1] = padding ?? 0;
124+
125+
return binary;
126+
}
127+
128+
const invalidTestExpectedError = new Map()
129+
.set('FLOAT32 with padding', 'Invalid Vector: padding must be zero for int8 and float32 vectors')
130+
.set('INT8 with padding', 'Invalid Vector: padding must be zero for int8 and float32 vectors')
131+
.set(
132+
'Padding specified with no vector data PACKED_BIT',
133+
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
134+
)
135+
.set(
136+
'Padding specified with no vector data PACKED_BIT',
137+
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
138+
)
139+
.set(
140+
'Exceeding maximum padding PACKED_BIT',
141+
'Invalid Vector: padding must be a value between 0 and 7'
142+
)
143+
.set('Negative padding PACKED_BIT', 'Invalid Vector: padding must be a value between 0 and 7')
144+
// skipped
145+
.set('Overflow Vector PACKED_BIT', false)
146+
.set('Underflow Vector PACKED_BIT', false)
147+
.set('Overflow Vector INT8', false)
148+
.set('Underflow Vector INT8', false)
149+
.set('INT8 with float inputs', false)
150+
// duplicate test! but also skipped.
151+
.set('Vector with float values PACKED_BIT', false)
152+
.set('Vector with float values PACKED_BIT', false);
153+
154+
describe('BSON Binary Vector spec tests', () => {
155+
const tests: Record<string, VectorSuite> = Object.create(null);
156+
157+
for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) {
158+
tests[path.basename(file, '.json')] = JSON.parse(
159+
fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8')
160+
);
161+
}
162+
163+
for (const [suiteName, suite] of Object.entries(tests)) {
164+
describe(suiteName, function () {
165+
const valid = suite.tests.filter(t => t.valid);
166+
const invalid = suite.tests.filter(t => !t.valid);
167+
describe('valid', function () {
168+
/**
169+
* 1. encode a document from the numeric values, dtype, and padding, along with the "test_key", and assert this matches the canonical_bson string.
170+
* 2. decode the canonical_bson into its binary form, and then assert that the numeric values, dtype, and padding all match those provided in the JSON.
171+
*
172+
* > Note: For floating point number types, exact numerical matches may not be possible.
173+
* > Drivers that natively support the floating-point type being tested (e.g., when testing float32 vector values in a driver that natively supports float32),
174+
* > MUST assert that the input float array is the same after encoding and decoding.
175+
*/
176+
for (const test of valid) {
177+
it(`encode ${test.description}`, function () {
178+
const bin = make(test.vector, test.dtype_hex, test.padding);
179+
180+
const buffer = BSON.serialize({ [suite.test_key]: bin });
181+
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
182+
});
183+
184+
it(`decode ${test.description}`, function () {
185+
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
186+
const doc = BSON.deserialize(canonical_bson);
187+
188+
expect(doc[suite.test_key].sub_type).to.equal(0x09);
189+
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
190+
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
191+
});
192+
}
193+
});
194+
195+
describe('invalid', function () {
196+
/**
197+
* To prove correct in an invalid case (valid:false),
198+
* one MUST raise an exception when attempting to encode
199+
* a document from the numeric values, dtype, and padding.
200+
*/
201+
for (const test of invalid) {
202+
const expectedErrorMessage = invalidTestExpectedError.get(test.description);
203+
204+
it(`bson: ${test.description}`, function () {
205+
let thrownError: Error | undefined;
206+
try {
207+
const bin = make(test.vector, test.dtype_hex, test.padding);
208+
BSON.serialize({ bin });
209+
// TODO(NODE-6537): The following validation MUST be a part of serialize
210+
validateVector(bin);
211+
} catch (error) {
212+
thrownError = error;
213+
}
214+
215+
if (thrownError?.message.startsWith('unsupported_error')) {
216+
expect(
217+
expectedErrorMessage,
218+
'We expect a certain error message but got an unsupported error'
219+
).to.be.false;
220+
this.skip();
221+
}
222+
223+
expect(thrownError).to.be.instanceOf(BSONError);
224+
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
225+
});
226+
227+
it(`extended json: ${test.description}`, function () {
228+
let thrownError: Error | undefined;
229+
try {
230+
const bin = make(test.vector, test.dtype_hex, test.padding);
231+
BSON.EJSON.stringify({ bin });
232+
// TODO(NODE-6537): The following validation MUST be a part of stringify
233+
validateVector(bin);
234+
} catch (error) {
235+
thrownError = error;
236+
}
237+
238+
if (thrownError?.message.startsWith('unsupported_error')) {
239+
expect(
240+
expectedErrorMessage,
241+
'We expect a certain error message but got an unsupported error'
242+
).to.be.false;
243+
this.skip();
244+
}
245+
246+
expect(thrownError).to.be.instanceOf(BSONError);
247+
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
248+
});
249+
}
250+
});
251+
});
252+
}
253+
});
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{
2+
"description": "Tests of Binary subtype 9, Vectors, with dtype FLOAT32",
3+
"test_key": "vector",
4+
"tests": [
5+
{
6+
"description": "Simple Vector FLOAT32",
7+
"valid": true,
8+
"vector": [127.0, 7.0],
9+
"dtype_hex": "0x27",
10+
"dtype_alias": "FLOAT32",
11+
"padding": 0,
12+
"canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000"
13+
},
14+
{
15+
"description": "Vector with decimals and negative value FLOAT32",
16+
"valid": true,
17+
"vector": [127.7, -7.7],
18+
"dtype_hex": "0x27",
19+
"dtype_alias": "FLOAT32",
20+
"padding": 0,
21+
"canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000"
22+
},
23+
{
24+
"description": "Empty Vector FLOAT32",
25+
"valid": true,
26+
"vector": [],
27+
"dtype_hex": "0x27",
28+
"dtype_alias": "FLOAT32",
29+
"padding": 0,
30+
"canonical_bson": "1400000005766563746F72000200000009270000"
31+
},
32+
{
33+
"description": "Infinity Vector FLOAT32",
34+
"valid": true,
35+
"vector": ["-inf", 0.0, "inf"],
36+
"dtype_hex": "0x27",
37+
"dtype_alias": "FLOAT32",
38+
"padding": 0,
39+
"canonical_bson": "2000000005766563746F72000E000000092700000080FF000000000000807F00"
40+
},
41+
{
42+
"description": "FLOAT32 with padding",
43+
"valid": false,
44+
"vector": [127.0, 7.0],
45+
"dtype_hex": "0x27",
46+
"dtype_alias": "FLOAT32",
47+
"padding": 3
48+
}
49+
]
50+
}
51+
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"description": "Tests of Binary subtype 9, Vectors, with dtype INT8",
3+
"test_key": "vector",
4+
"tests": [
5+
{
6+
"description": "Simple Vector INT8",
7+
"valid": true,
8+
"vector": [127, 7],
9+
"dtype_hex": "0x03",
10+
"dtype_alias": "INT8",
11+
"padding": 0,
12+
"canonical_bson": "1600000005766563746F7200040000000903007F0700"
13+
},
14+
{
15+
"description": "Empty Vector INT8",
16+
"valid": true,
17+
"vector": [],
18+
"dtype_hex": "0x03",
19+
"dtype_alias": "INT8",
20+
"padding": 0,
21+
"canonical_bson": "1400000005766563746F72000200000009030000"
22+
},
23+
{
24+
"description": "Overflow Vector INT8",
25+
"valid": false,
26+
"vector": [128],
27+
"dtype_hex": "0x03",
28+
"dtype_alias": "INT8",
29+
"padding": 0
30+
},
31+
{
32+
"description": "Underflow Vector INT8",
33+
"valid": false,
34+
"vector": [-129],
35+
"dtype_hex": "0x03",
36+
"dtype_alias": "INT8",
37+
"padding": 0
38+
},
39+
{
40+
"description": "INT8 with padding",
41+
"valid": false,
42+
"vector": [127, 7],
43+
"dtype_hex": "0x03",
44+
"dtype_alias": "INT8",
45+
"padding": 3
46+
},
47+
{
48+
"description": "INT8 with float inputs",
49+
"valid": false,
50+
"vector": [127.77, 7.77],
51+
"dtype_hex": "0x03",
52+
"dtype_alias": "INT8",
53+
"padding": 0
54+
}
55+
]
56+
}
57+

0 commit comments

Comments
 (0)