Skip to content

Commit a5ed30d

Browse files
committed
test(NODE-6534): add spec test runner for Binary vector
1 parent f99fdfd commit a5ed30d

File tree

5 files changed

+459
-0
lines changed

5 files changed

+459
-0
lines changed
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
import * as fs from 'fs';
2+
import * as path from 'path';
3+
import { BSON, BSONError, Binary } from '../register-bson';
4+
import { expect } from 'chai';
5+
6+
const { toHex, fromHex } = BSON.onDemand.ByteUtils;
7+
8+
const FLOAT = new Float64Array(1);
9+
const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8);
10+
11+
FLOAT[0] = -1;
12+
// Little endian [0, 0, 0, 0, 0, 0, 240, 191]
13+
// Big endian [191, 240, 0, 0, 0, 0, 0, 0]
14+
const isBigEndian = FLOAT_BYTES[7] === 0;
15+
16+
type DTypeAlias = 'INT8' | 'FLOAT32' | 'PACKED_BIT';
17+
type VectorTest = {
18+
description: string;
19+
vector: (number | string)[];
20+
valid: boolean;
21+
dtype_hex: string;
22+
dtype_alias: DTypeAlias;
23+
padding: number;
24+
canonical_bson?: string;
25+
};
26+
type VectorSuite = { description: string; test_key: string; tests: VectorTest[] };
27+
28+
function validateVector(vector: Binary): void {
29+
const VECTOR_TYPE = Object.freeze({
30+
Int8: 0x03,
31+
Float32: 0x27,
32+
PackedBit: 0x10
33+
} as const);
34+
35+
if (vector.sub_type !== 9) return;
36+
37+
const size = vector.position;
38+
const d_type = vector.buffer[0] ?? 0;
39+
const padding = vector.buffer[1] ?? 0;
40+
41+
if ((d_type === VECTOR_TYPE.Float32 || d_type === VECTOR_TYPE.Int8) && padding !== 0) {
42+
throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
43+
}
44+
45+
if (d_type === VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
46+
throw new BSONError(
47+
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
48+
);
49+
}
50+
51+
if (d_type === VECTOR_TYPE.PackedBit && padding > 7) {
52+
throw new BSONError(
53+
`Invalid Vector: padding must be a value between 0 and 7. found: ${padding}`
54+
);
55+
}
56+
}
57+
58+
function fixFloats(f: string | number): number {
59+
if (typeof f === 'number') {
60+
return f;
61+
}
62+
if (f === 'inf') {
63+
return Infinity;
64+
}
65+
if (f === '-inf') {
66+
return -Infinity;
67+
}
68+
throw new Error(`test format error: unknown float value: ${f}`);
69+
}
70+
71+
function fixInt8s(f: number | string): number {
72+
if (typeof f !== 'number') throw new Error('test format error: unexpected test data');
73+
74+
if (f < -128 || f > 127) {
75+
// Javascript Int8Array only supports values from -128 to 127
76+
throw new Error(`unsupported_error: int8 out of range: ${f}`);
77+
}
78+
return f;
79+
}
80+
81+
function fixBits(f: number | string): number {
82+
if (typeof f !== 'number') throw new Error('test format error: unexpected test data');
83+
84+
if (f > 255 || f < 0 || !Number.isSafeInteger(f)) {
85+
// Javascript Uint8Array only supports values from 0 to 255
86+
throw new Error(`unsupported_error: bit out of range: ${f}`);
87+
}
88+
return f;
89+
}
90+
91+
function make(
92+
vector: (number | string)[],
93+
dtype_hex: string,
94+
dtype_alias: DTypeAlias,
95+
padding: number
96+
): Binary {
97+
let binary: Binary;
98+
switch (dtype_alias) {
99+
case 'PACKED_BIT':
100+
case 'INT8': {
101+
const array = new Int8Array(vector.map(dtype_alias === 'INT8' ? fixInt8s : fixBits));
102+
const buffer = new Uint8Array(array.byteLength + 2);
103+
buffer[0] = +dtype_hex;
104+
buffer[1] = padding;
105+
buffer.set(new Uint8Array(array.buffer), 2);
106+
binary = new Binary(buffer, 9);
107+
break;
108+
}
109+
110+
case 'FLOAT32': {
111+
const array = new Float32Array(vector.map(fixFloats));
112+
const buffer = new Uint8Array(array.byteLength + 2);
113+
buffer[0] = +dtype_hex;
114+
buffer[1] = padding;
115+
if (isBigEndian) {
116+
for (let i = 0; i < array.length; i++) {
117+
const bytes = new Uint8Array(array.buffer, i * 4, 4);
118+
bytes.reverse();
119+
buffer.set(bytes, i * 4 + 2);
120+
}
121+
} else {
122+
buffer.set(new Uint8Array(array.buffer), 2);
123+
}
124+
binary = new Binary(buffer, 9);
125+
break;
126+
}
127+
128+
default:
129+
throw new Error(`Unknown dtype_alias: ${dtype_alias}`);
130+
}
131+
132+
binary.buffer[0] = +dtype_hex;
133+
binary.buffer[1] = padding;
134+
135+
return binary;
136+
}
137+
138+
describe('BSON Binary Vector spec tests', () => {
139+
const tests: Record<string, VectorSuite> = Object.create(null);
140+
141+
for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) {
142+
tests[file.split('.')[0]] = JSON.parse(
143+
fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8')
144+
);
145+
}
146+
147+
for (const [suiteName, suite] of Object.entries(tests)) {
148+
describe(suiteName, function () {
149+
const valid = suite.tests.filter(t => t.valid);
150+
const invalid = suite.tests.filter(t => !t.valid);
151+
describe('valid', function () {
152+
/**
153+
* 1. encode a document from the numeric values, dtype, and padding, along with the "test_key", and assert this matches the canonical_bson string.
154+
* 2. decode the canonical_bson into its binary form, and then assert that the numeric values, dtype, and padding all match those provided in the JSON.
155+
*
156+
* > Note: For floating point number types, exact numerical matches may not be possible.
157+
* > Drivers that natively support the floating-point type being tested (e.g., when testing float32 vector values in a driver that natively supports float32),
158+
* > MUST assert that the input float array is the same after encoding and decoding.
159+
*/
160+
for (const test of valid) {
161+
it(`encode ${test.description}`, function () {
162+
const bin = make(test.vector, test.dtype_hex, test.dtype_alias, test.padding);
163+
164+
const buffer = BSON.serialize({ [suite.test_key]: bin });
165+
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
166+
});
167+
168+
it(`decode ${test.description}`, function () {
169+
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
170+
const doc = BSON.deserialize(canonical_bson);
171+
172+
expect(doc[suite.test_key].sub_type).to.equal(0x09);
173+
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
174+
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
175+
});
176+
}
177+
});
178+
179+
describe('invalid', function () {
180+
/**
181+
* To prove correct in an invalid case (valid:false),
182+
* one MUST raise an exception when attempting to encode
183+
* a document from the numeric values, dtype, and padding.
184+
*/
185+
for (const test of invalid) {
186+
it(`bson: ${test.description}`, function () {
187+
let thrownError: Error | undefined;
188+
try {
189+
const bin = make(test.vector, test.dtype_hex, test.dtype_alias, test.padding);
190+
BSON.serialize({ bin });
191+
// TODO(NODE-6537): The following validation MUST be a part of serialize
192+
validateVector(bin);
193+
} catch (error) {
194+
thrownError = error;
195+
}
196+
197+
if (thrownError?.message.startsWith('unsupported_error')) {
198+
this.skip();
199+
}
200+
expect(thrownError).to.be.instanceOf(BSONError);
201+
});
202+
203+
it(`extended json: ${test.description}`, function () {
204+
let thrownError: Error | undefined;
205+
try {
206+
const bin = make(test.vector, test.dtype_hex, test.dtype_alias, test.padding);
207+
BSON.EJSON.stringify({ bin });
208+
// TODO(NODE-6537): The following validation MUST be a part of stringify
209+
validateVector(bin);
210+
} catch (error) {
211+
thrownError = error;
212+
}
213+
214+
if (thrownError?.message.startsWith('unsupported_error')) {
215+
this.skip();
216+
}
217+
expect(thrownError).to.be.instanceOf(BSONError);
218+
});
219+
}
220+
});
221+
});
222+
}
223+
});
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{
2+
"description": "Tests of Binary subtype 9, Vectors, with dtype FLOAT32",
3+
"test_key": "vector",
4+
"tests": [
5+
{
6+
"description": "Simple Vector FLOAT32",
7+
"valid": true,
8+
"vector": [127.0, 7.0],
9+
"dtype_hex": "0x27",
10+
"dtype_alias": "FLOAT32",
11+
"padding": 0,
12+
"canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000"
13+
},
14+
{
15+
"description": "Vector with decimals and negative value FLOAT32",
16+
"valid": true,
17+
"vector": [127.7, -7.7],
18+
"dtype_hex": "0x27",
19+
"dtype_alias": "FLOAT32",
20+
"padding": 0,
21+
"canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000"
22+
},
23+
{
24+
"description": "Empty Vector FLOAT32",
25+
"valid": true,
26+
"vector": [],
27+
"dtype_hex": "0x27",
28+
"dtype_alias": "FLOAT32",
29+
"padding": 0,
30+
"canonical_bson": "1400000005766563746F72000200000009270000"
31+
},
32+
{
33+
"description": "Infinity Vector FLOAT32",
34+
"valid": true,
35+
"vector": ["-inf", 0.0, "inf"],
36+
"dtype_hex": "0x27",
37+
"dtype_alias": "FLOAT32",
38+
"padding": 0,
39+
"canonical_bson": "2000000005766563746F72000E000000092700000080FF000000000000807F00"
40+
},
41+
{
42+
"description": "FLOAT32 with padding",
43+
"valid": false,
44+
"vector": [127.0, 7.0],
45+
"dtype_hex": "0x27",
46+
"dtype_alias": "FLOAT32",
47+
"padding": 3
48+
}
49+
]
50+
}
51+
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"description": "Tests of Binary subtype 9, Vectors, with dtype INT8",
3+
"test_key": "vector",
4+
"tests": [
5+
{
6+
"description": "Simple Vector INT8",
7+
"valid": true,
8+
"vector": [127, 7],
9+
"dtype_hex": "0x03",
10+
"dtype_alias": "INT8",
11+
"padding": 0,
12+
"canonical_bson": "1600000005766563746F7200040000000903007F0700"
13+
},
14+
{
15+
"description": "Empty Vector INT8",
16+
"valid": true,
17+
"vector": [],
18+
"dtype_hex": "0x03",
19+
"dtype_alias": "INT8",
20+
"padding": 0,
21+
"canonical_bson": "1400000005766563746F72000200000009030000"
22+
},
23+
{
24+
"description": "Overflow Vector INT8",
25+
"valid": false,
26+
"vector": [128],
27+
"dtype_hex": "0x03",
28+
"dtype_alias": "INT8",
29+
"padding": 0
30+
},
31+
{
32+
"description": "Underflow Vector INT8",
33+
"valid": false,
34+
"vector": [-129],
35+
"dtype_hex": "0x03",
36+
"dtype_alias": "INT8",
37+
"padding": 0
38+
},
39+
{
40+
"description": "INT8 with padding",
41+
"valid": false,
42+
"vector": [127, 7],
43+
"dtype_hex": "0x03",
44+
"dtype_alias": "INT8",
45+
"padding": 3
46+
},
47+
{
48+
"description": "INT8 with float inputs",
49+
"valid": false,
50+
"vector": [127.77, 7.77],
51+
"dtype_hex": "0x03",
52+
"dtype_alias": "INT8",
53+
"padding": 0
54+
}
55+
]
56+
}
57+

0 commit comments

Comments
 (0)