Skip to content

Commit d3fe6e0

Browse files
committed
test(NODE-6534): add spec test runner for Binary vector
1 parent 887849d commit d3fe6e0

File tree

5 files changed

+430
-0
lines changed

5 files changed

+430
-0
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
import * as fs from 'fs';
2+
import * as path from 'path';
3+
import { BSON, BSONError, Binary } from '../register-bson';
4+
import { expect } from 'chai';
5+
6+
const { toHex, fromHex } = BSON.onDemand.ByteUtils;
7+
8+
const FLOAT = new Float64Array(1);
9+
const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8);
10+
11+
FLOAT[0] = -1;
12+
// Little endian [0, 0, 0, 0, 0, 0, 240, 191]
13+
// Big endian [191, 240, 0, 0, 0, 0, 0, 0]
14+
const isBigEndian = FLOAT_BYTES[7] === 0;
15+
16+
type DTypeAlias = 'INT8' | 'FLOAT32' | 'PACKED_BIT';
17+
type VectorTest = {
18+
description: string;
19+
vector: (number | string)[];
20+
valid: boolean;
21+
dtype_hex: string;
22+
dtype_alias: DTypeAlias;
23+
padding: number;
24+
canonical_bson?: string;
25+
};
26+
type VectorSuite = { description: string; test_key: string; tests: VectorTest[] };
27+
28+
function fixFloats(f: string | number): number {
29+
if (typeof f === 'number') {
30+
return f;
31+
}
32+
if (f === 'inf') {
33+
return Infinity;
34+
}
35+
if (f === '-inf') {
36+
return -Infinity;
37+
}
38+
throw new Error(`unknown float value: ${f}`);
39+
}
40+
41+
function fixInt8s(f: number | string): number {
42+
if (typeof f !== 'number') throw new Error('unexpected test data');
43+
44+
if (f < -128 || f > 127) {
45+
// TODO(NODE-6537): this must be a part of the final "make a binary from" API.
46+
throw new BSONError(`int8 out of range: ${f}`);
47+
}
48+
return f;
49+
}
50+
51+
function fixBits(f: number | string): number {
52+
if (typeof f !== 'number') throw new Error('unexpected test data');
53+
54+
if (f > 255 || f < 0 || !Number.isSafeInteger(f)) {
55+
// TODO(NODE-6537): this must be a part of the final "make a binary from" API.
56+
throw new BSONError(`bit out of range: ${f}`);
57+
}
58+
return f;
59+
}
60+
61+
/** TODO(NODE-6537): Replace the following with final "make a binary from" API */
62+
function VECTOR_TO_BINARY(
63+
vector: (number | string)[],
64+
dtype_hex: string,
65+
dtype_alias: DTypeAlias,
66+
padding: number
67+
): Binary {
68+
switch (dtype_alias) {
69+
case 'PACKED_BIT':
70+
case 'INT8': {
71+
const array = new Int8Array(vector.map(dtype_alias === 'INT8' ? fixInt8s : fixBits));
72+
const buffer = new Uint8Array(array.byteLength + 2);
73+
buffer[0] = +dtype_hex;
74+
buffer[1] = padding;
75+
buffer.set(new Uint8Array(array.buffer), 2);
76+
return new Binary(buffer, 9);
77+
}
78+
79+
case 'FLOAT32': {
80+
const array = new Float32Array(vector.map(fixFloats));
81+
const buffer = new Uint8Array(array.byteLength + 2);
82+
buffer[0] = +dtype_hex;
83+
buffer[1] = padding;
84+
if (isBigEndian) {
85+
for (let i = 0; i < array.length; i++) {
86+
const bytes = new Uint8Array(array.buffer, i * 4, 4);
87+
bytes.reverse();
88+
buffer.set(bytes, i * 4 + 2);
89+
}
90+
} else {
91+
buffer.set(new Uint8Array(array.buffer), 2);
92+
}
93+
return new Binary(buffer, 9);
94+
}
95+
96+
default:
97+
throw new Error(`Unknown dtype_alias: ${dtype_alias}`);
98+
}
99+
}
100+
101+
describe('BSON Binary Vector spec tests', () => {
102+
const tests: Record<string, VectorSuite> = Object.create(null);
103+
104+
for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) {
105+
tests[file.split('.')[0]] = JSON.parse(
106+
fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8')
107+
);
108+
}
109+
110+
for (const [suiteName, suite] of Object.entries(tests)) {
111+
describe(suiteName, function () {
112+
const valid = suite.tests.filter(t => t.valid);
113+
const invalid = suite.tests.filter(t => !t.valid);
114+
describe('valid', function () {
115+
/**
116+
* 1. encode a document from the numeric values, dtype, and padding, along with the "test_key", and assert this matches the canonical_bson string.
117+
* 2. decode the canonical_bson into its binary form, and then assert that the numeric values, dtype, and padding all match those provided in the JSON.
118+
*
119+
* > Note: For floating point number types, exact numerical matches may not be possible.
120+
* > Drivers that natively support the floating-point type being tested (e.g., when testing float32 vector values in a driver that natively supports float32),
121+
* > MUST assert that the input float array is the same after encoding and decoding.
122+
*/
123+
for (const test of valid) {
124+
it(`encode ${test.description}`, function () {
125+
const bin = VECTOR_TO_BINARY(
126+
test.vector,
127+
test.dtype_hex,
128+
test.dtype_alias,
129+
test.padding
130+
);
131+
132+
const buffer = BSON.serialize({ [suite.test_key]: bin });
133+
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
134+
});
135+
136+
it(`decode ${test.description}`, function () {
137+
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
138+
const doc = BSON.deserialize(canonical_bson);
139+
140+
expect(doc[suite.test_key].sub_type).to.equal(0x09);
141+
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
142+
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
143+
});
144+
}
145+
});
146+
147+
describe('invalid', function () {
148+
/**
149+
* To prove correct in an invalid case (valid:false),
150+
* one MUST raise an exception when attempting to encode
151+
* a document from the numeric values, dtype, and padding.
152+
*/
153+
for (const test of invalid) {
154+
it(test.description, function () {
155+
expect(() => {
156+
// Errors are thrown when creating the binary because of invalid values in the vector.
157+
const binary = VECTOR_TO_BINARY(
158+
test.vector,
159+
test.dtype_hex,
160+
test.dtype_alias,
161+
test.padding
162+
);
163+
// vector assertions TODO(NODE-6537): Replace the following with final "make a binary from" API.
164+
if (binary.sub_type === 0x09) {
165+
const enum dtype {
166+
float32 = 0x27,
167+
int8 = 0x03,
168+
bit = 0x10
169+
}
170+
171+
const size = binary.position;
172+
const data = binary.buffer;
173+
const d_type = data[0] ?? 0;
174+
const padding = data[1] ?? 0;
175+
176+
if ((d_type === dtype.float32 || d_type === dtype.int8) && padding !== 0) {
177+
throw new BSONError('padding must be zero for int8 and float32 vectors');
178+
}
179+
180+
if (d_type === dtype.bit && padding !== 0 && size === 2) {
181+
throw new BSONError('padding must be zero for packed bit vectors that are empty');
182+
}
183+
184+
if (d_type === dtype.bit && padding > 7) {
185+
throw new BSONError(`padding must be a value between 0 and 7. found: ${data[1]}`);
186+
}
187+
}
188+
}).to.throw(BSONError);
189+
});
190+
}
191+
});
192+
});
193+
}
194+
});
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{
2+
"description": "Tests of Binary subtype 9, Vectors, with dtype FLOAT32",
3+
"test_key": "vector",
4+
"tests": [
5+
{
6+
"description": "Simple Vector FLOAT32",
7+
"valid": true,
8+
"vector": [127.0, 7.0],
9+
"dtype_hex": "0x27",
10+
"dtype_alias": "FLOAT32",
11+
"padding": 0,
12+
"canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000"
13+
},
14+
{
15+
"description": "Vector with decimals and negative value FLOAT32",
16+
"valid": true,
17+
"vector": [127.7, -7.7],
18+
"dtype_hex": "0x27",
19+
"dtype_alias": "FLOAT32",
20+
"padding": 0,
21+
"canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000"
22+
},
23+
{
24+
"description": "Empty Vector FLOAT32",
25+
"valid": true,
26+
"vector": [],
27+
"dtype_hex": "0x27",
28+
"dtype_alias": "FLOAT32",
29+
"padding": 0,
30+
"canonical_bson": "1400000005766563746F72000200000009270000"
31+
},
32+
{
33+
"description": "Infinity Vector FLOAT32",
34+
"valid": true,
35+
"vector": ["-inf", 0.0, "inf"],
36+
"dtype_hex": "0x27",
37+
"dtype_alias": "FLOAT32",
38+
"padding": 0,
39+
"canonical_bson": "2000000005766563746F72000E000000092700000080FF000000000000807F00"
40+
},
41+
{
42+
"description": "FLOAT32 with padding",
43+
"valid": false,
44+
"vector": [127.0, 7.0],
45+
"dtype_hex": "0x27",
46+
"dtype_alias": "FLOAT32",
47+
"padding": 3
48+
}
49+
]
50+
}
51+
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"description": "Tests of Binary subtype 9, Vectors, with dtype INT8",
3+
"test_key": "vector",
4+
"tests": [
5+
{
6+
"description": "Simple Vector INT8",
7+
"valid": true,
8+
"vector": [127, 7],
9+
"dtype_hex": "0x03",
10+
"dtype_alias": "INT8",
11+
"padding": 0,
12+
"canonical_bson": "1600000005766563746F7200040000000903007F0700"
13+
},
14+
{
15+
"description": "Empty Vector INT8",
16+
"valid": true,
17+
"vector": [],
18+
"dtype_hex": "0x03",
19+
"dtype_alias": "INT8",
20+
"padding": 0,
21+
"canonical_bson": "1400000005766563746F72000200000009030000"
22+
},
23+
{
24+
"description": "Overflow Vector INT8",
25+
"valid": false,
26+
"vector": [128],
27+
"dtype_hex": "0x03",
28+
"dtype_alias": "INT8",
29+
"padding": 0
30+
},
31+
{
32+
"description": "Underflow Vector INT8",
33+
"valid": false,
34+
"vector": [-129],
35+
"dtype_hex": "0x03",
36+
"dtype_alias": "INT8",
37+
"padding": 0
38+
},
39+
{
40+
"description": "INT8 with padding",
41+
"valid": false,
42+
"vector": [127, 7],
43+
"dtype_hex": "0x03",
44+
"dtype_alias": "INT8",
45+
"padding": 3
46+
},
47+
{
48+
"description": "INT8 with float inputs",
49+
"valid": false,
50+
"vector": [127.77, 7.77],
51+
"dtype_hex": "0x03",
52+
"dtype_alias": "INT8",
53+
"padding": 0
54+
}
55+
]
56+
}
57+

0 commit comments

Comments
 (0)