|
| 1 | +import * as fs from 'fs'; |
| 2 | +import * as path from 'path'; |
| 3 | +import { BSON, BSONError, Binary } from '../register-bson'; |
| 4 | +import { expect } from 'chai'; |
| 5 | + |
| 6 | +const { toHex, fromHex } = BSON.onDemand.ByteUtils; |
| 7 | + |
| 8 | +const FLOAT = new Float64Array(1); |
| 9 | +const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8); |
| 10 | + |
| 11 | +FLOAT[0] = -1; |
| 12 | +// Little endian [0, 0, 0, 0, 0, 0, 240, 191] |
| 13 | +// Big endian [191, 240, 0, 0, 0, 0, 0, 0] |
| 14 | +const isBigEndian = FLOAT_BYTES[7] === 0; |
| 15 | + |
| 16 | +type DTypeAlias = 'INT8' | 'FLOAT32' | 'PACKED_BIT'; |
| 17 | +type VectorTest = { |
| 18 | + description: string; |
| 19 | + vector: (number | string)[]; |
| 20 | + valid: boolean; |
| 21 | + dtype_hex: string; |
| 22 | + dtype_alias: DTypeAlias; |
| 23 | + padding: number; |
| 24 | + canonical_bson?: string; |
| 25 | +}; |
| 26 | +type VectorSuite = { description: string; test_key: string; tests: VectorTest[] }; |
| 27 | + |
| 28 | +function fixFloats(f: string | number): number { |
| 29 | + if (typeof f === 'number') { |
| 30 | + return f; |
| 31 | + } |
| 32 | + if (f === 'inf') { |
| 33 | + return Infinity; |
| 34 | + } |
| 35 | + if (f === '-inf') { |
| 36 | + return -Infinity; |
| 37 | + } |
| 38 | + throw new Error(`unknown float value: ${f}`); |
| 39 | +} |
| 40 | + |
| 41 | +function fixInt8s(f: number | string): number { |
| 42 | + if (typeof f !== 'number') throw new Error('unexpected test data'); |
| 43 | + |
| 44 | + if (f < -128 || f > 127) { |
| 45 | + // TODO(NODE-6537): this must be a part of the final "make a binary from" API. |
| 46 | + throw new BSONError(`int8 out of range: ${f}`); |
| 47 | + } |
| 48 | + return f; |
| 49 | +} |
| 50 | + |
| 51 | +function fixBits(f: number | string): number { |
| 52 | + if (typeof f !== 'number') throw new Error('unexpected test data'); |
| 53 | + |
| 54 | + if (f > 255 || f < 0 || !Number.isSafeInteger(f)) { |
| 55 | + // TODO(NODE-6537): this must be a part of the final "make a binary from" API. |
| 56 | + throw new BSONError(`bit out of range: ${f}`); |
| 57 | + } |
| 58 | + return f; |
| 59 | +} |
| 60 | + |
| 61 | +/** TODO(NODE-6537): Replace the following with final "make a binary from" API */ |
| 62 | +function VECTOR_TO_BINARY( |
| 63 | + vector: (number | string)[], |
| 64 | + dtype_hex: string, |
| 65 | + dtype_alias: DTypeAlias, |
| 66 | + padding: number |
| 67 | +): Binary { |
| 68 | + switch (dtype_alias) { |
| 69 | + case 'PACKED_BIT': |
| 70 | + case 'INT8': { |
| 71 | + const array = new Int8Array(vector.map(dtype_alias === 'INT8' ? fixInt8s : fixBits)); |
| 72 | + const buffer = new Uint8Array(array.byteLength + 2); |
| 73 | + buffer[0] = +dtype_hex; |
| 74 | + buffer[1] = padding; |
| 75 | + buffer.set(new Uint8Array(array.buffer), 2); |
| 76 | + return new Binary(buffer, 9); |
| 77 | + } |
| 78 | + |
| 79 | + case 'FLOAT32': { |
| 80 | + const array = new Float32Array(vector.map(fixFloats)); |
| 81 | + const buffer = new Uint8Array(array.byteLength + 2); |
| 82 | + buffer[0] = +dtype_hex; |
| 83 | + buffer[1] = padding; |
| 84 | + if (isBigEndian) { |
| 85 | + for (let i = 0; i < array.length; i++) { |
| 86 | + const bytes = new Uint8Array(array.buffer, i * 4, 4); |
| 87 | + bytes.reverse(); |
| 88 | + buffer.set(bytes, i * 4 + 2); |
| 89 | + } |
| 90 | + } else { |
| 91 | + buffer.set(new Uint8Array(array.buffer), 2); |
| 92 | + } |
| 93 | + return new Binary(buffer, 9); |
| 94 | + } |
| 95 | + |
| 96 | + default: |
| 97 | + throw new Error(`Unknown dtype_alias: ${dtype_alias}`); |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +describe('BSON Binary Vector spec tests', () => { |
| 102 | + const tests: Record<string, VectorSuite> = Object.create(null); |
| 103 | + |
| 104 | + for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) { |
| 105 | + tests[file.split('.')[0]] = JSON.parse( |
| 106 | + fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8') |
| 107 | + ); |
| 108 | + } |
| 109 | + |
| 110 | + for (const [suiteName, suite] of Object.entries(tests)) { |
| 111 | + describe(suiteName, function () { |
| 112 | + const valid = suite.tests.filter(t => t.valid); |
| 113 | + const invalid = suite.tests.filter(t => !t.valid); |
| 114 | + describe('valid', function () { |
| 115 | + /** |
| 116 | + * 1. encode a document from the numeric values, dtype, and padding, along with the "test_key", and assert this matches the canonical_bson string. |
| 117 | + * 2. decode the canonical_bson into its binary form, and then assert that the numeric values, dtype, and padding all match those provided in the JSON. |
| 118 | + * |
| 119 | + * > Note: For floating point number types, exact numerical matches may not be possible. |
| 120 | + * > Drivers that natively support the floating-point type being tested (e.g., when testing float32 vector values in a driver that natively supports float32), |
| 121 | + * > MUST assert that the input float array is the same after encoding and decoding. |
| 122 | + */ |
| 123 | + for (const test of valid) { |
| 124 | + it(`encode ${test.description}`, function () { |
| 125 | + const bin = VECTOR_TO_BINARY( |
| 126 | + test.vector, |
| 127 | + test.dtype_hex, |
| 128 | + test.dtype_alias, |
| 129 | + test.padding |
| 130 | + ); |
| 131 | + |
| 132 | + const buffer = BSON.serialize({ [suite.test_key]: bin }); |
| 133 | + expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase()); |
| 134 | + }); |
| 135 | + |
| 136 | + it(`decode ${test.description}`, function () { |
| 137 | + const canonical_bson = fromHex(test.canonical_bson!.toLowerCase()); |
| 138 | + const doc = BSON.deserialize(canonical_bson); |
| 139 | + |
| 140 | + expect(doc[suite.test_key].sub_type).to.equal(0x09); |
| 141 | + expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex); |
| 142 | + expect(doc[suite.test_key].buffer[1]).to.equal(test.padding); |
| 143 | + }); |
| 144 | + } |
| 145 | + }); |
| 146 | + |
| 147 | + describe('invalid', function () { |
| 148 | + /** |
| 149 | + * To prove correct in an invalid case (valid:false), |
| 150 | + * one MUST raise an exception when attempting to encode |
| 151 | + * a document from the numeric values, dtype, and padding. |
| 152 | + */ |
| 153 | + for (const test of invalid) { |
| 154 | + it(test.description, function () { |
| 155 | + expect(() => { |
| 156 | + // Errors are thrown when creating the binary because of invalid values in the vector. |
| 157 | + const binary = VECTOR_TO_BINARY( |
| 158 | + test.vector, |
| 159 | + test.dtype_hex, |
| 160 | + test.dtype_alias, |
| 161 | + test.padding |
| 162 | + ); |
| 163 | + // vector assertions TODO(NODE-6537): Replace the following with final "make a binary from" API. |
| 164 | + if (binary.sub_type === 0x09) { |
| 165 | + const enum dtype { |
| 166 | + float32 = 0x27, |
| 167 | + int8 = 0x03, |
| 168 | + bit = 0x10 |
| 169 | + } |
| 170 | + |
| 171 | + const size = binary.position; |
| 172 | + const data = binary.buffer; |
| 173 | + const d_type = data[0] ?? 0; |
| 174 | + const padding = data[1] ?? 0; |
| 175 | + |
| 176 | + if ((d_type === dtype.float32 || d_type === dtype.int8) && padding !== 0) { |
| 177 | + throw new BSONError('padding must be zero for int8 and float32 vectors'); |
| 178 | + } |
| 179 | + |
| 180 | + if (d_type === dtype.bit && padding !== 0 && size === 2) { |
| 181 | + throw new BSONError('padding must be zero for packed bit vectors that are empty'); |
| 182 | + } |
| 183 | + |
| 184 | + if (d_type === dtype.bit && padding > 7) { |
| 185 | + throw new BSONError(`padding must be a value between 0 and 7. found: ${data[1]}`); |
| 186 | + } |
| 187 | + } |
| 188 | + }).to.throw(BSONError); |
| 189 | + }); |
| 190 | + } |
| 191 | + }); |
| 192 | + }); |
| 193 | + } |
| 194 | +}); |
0 commit comments