|
| 1 | +import * as fs from 'fs'; |
| 2 | +import * as path from 'path'; |
| 3 | +import { BSON, BSONError, Binary } from '../register-bson'; |
| 4 | +import { expect } from 'chai'; |
| 5 | + |
| 6 | +const { toHex, fromHex } = BSON.onDemand.ByteUtils; |
| 7 | + |
| 8 | +const FLOAT = new Float64Array(1); |
| 9 | +const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8); |
| 10 | + |
| 11 | +FLOAT[0] = -1; |
| 12 | +// Little endian [0, 0, 0, 0, 0, 0, 240, 191] |
| 13 | +// Big endian [191, 240, 0, 0, 0, 0, 0, 0] |
| 14 | +const isBigEndian = FLOAT_BYTES[7] === 0; |
| 15 | + |
| 16 | +type VectorHexType = '0x03' | '0x27' | '0x10'; |
| 17 | +type VectorTest = { |
| 18 | + description: string; |
| 19 | + vector: (number | string)[]; |
| 20 | + valid: boolean; |
| 21 | + dtype_hex: VectorHexType; |
| 22 | + padding?: number; |
| 23 | + canonical_bson?: string; |
| 24 | +}; |
| 25 | +type VectorSuite = { description: string; test_key: string; tests: VectorTest[] }; |
| 26 | + |
| 27 | +function validateVector(vector: Binary): void { |
| 28 | + const VECTOR_TYPE = Object.freeze({ |
| 29 | + Int8: 0x03, |
| 30 | + Float32: 0x27, |
| 31 | + PackedBit: 0x10 |
| 32 | + } as const); |
| 33 | + |
| 34 | + if (vector.sub_type !== 9) return; |
| 35 | + |
| 36 | + const size = vector.position; |
| 37 | + const d_type = vector.buffer[0] ?? 0; |
| 38 | + const padding = vector.buffer[1] ?? 0; |
| 39 | + |
| 40 | + if ((d_type === VECTOR_TYPE.Float32 || d_type === VECTOR_TYPE.Int8) && padding !== 0) { |
| 41 | + throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); |
| 42 | + } |
| 43 | + |
| 44 | + if (d_type === VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { |
| 45 | + throw new BSONError( |
| 46 | + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' |
| 47 | + ); |
| 48 | + } |
| 49 | + |
| 50 | + if (d_type === VECTOR_TYPE.PackedBit && padding > 7) { |
| 51 | + throw new BSONError( |
| 52 | + `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` |
| 53 | + ); |
| 54 | + } |
| 55 | +} |
| 56 | + |
| 57 | +function fixFloats(f: string | number): number { |
| 58 | + if (typeof f === 'number') { |
| 59 | + return f; |
| 60 | + } |
| 61 | + if (f === 'inf') { |
| 62 | + return Infinity; |
| 63 | + } |
| 64 | + if (f === '-inf') { |
| 65 | + return -Infinity; |
| 66 | + } |
| 67 | + throw new Error(`test format error: unknown float value: ${f}`); |
| 68 | +} |
| 69 | + |
| 70 | +function fixInt8s(f: number | string): number { |
| 71 | + if (typeof f !== 'number') throw new Error('test format error: unexpected test data'); |
| 72 | + |
| 73 | + if (f < -128 || f > 127) { |
| 74 | + // Javascript Int8Array only supports values from -128 to 127 |
| 75 | + throw new Error(`unsupported_error: int8 out of range: ${f}`); |
| 76 | + } |
| 77 | + return f; |
| 78 | +} |
| 79 | + |
| 80 | +function fixBits(f: number | string): number { |
| 81 | + if (typeof f !== 'number') throw new Error('test format error: unexpected test data'); |
| 82 | + |
| 83 | + if (f > 255 || f < 0 || !Number.isSafeInteger(f)) { |
| 84 | + // Javascript Uint8Array only supports values from 0 to 255 |
| 85 | + throw new Error(`unsupported_error: bit out of range: ${f}`); |
| 86 | + } |
| 87 | + return f; |
| 88 | +} |
| 89 | + |
| 90 | +function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary { |
| 91 | + let binary: Binary; |
| 92 | + switch (dtype_hex) { |
| 93 | + case '0x10': /* packed_bit */ |
| 94 | + case '0x03': /* int8 */ { |
| 95 | + const array = new Int8Array(vector.map(dtype_hex === '0x03' /* int8 */ ? fixInt8s : fixBits)); |
| 96 | + const buffer = new Uint8Array(array.byteLength + 2); |
| 97 | + buffer.set(new Uint8Array(array.buffer), 2); |
| 98 | + binary = new Binary(buffer, 9); |
| 99 | + break; |
| 100 | + } |
| 101 | + |
| 102 | + case '0x27': /* float32 */ { |
| 103 | + const array = new Float32Array(vector.map(fixFloats)); |
| 104 | + const buffer = new Uint8Array(array.byteLength + 2); |
| 105 | + if (isBigEndian) { |
| 106 | + for (let i = 0; i < array.length; i++) { |
| 107 | + const bytes = new Uint8Array(array.buffer, i * 4, 4); |
| 108 | + bytes.reverse(); |
| 109 | + buffer.set(bytes, i * 4 + 2); |
| 110 | + } |
| 111 | + } else { |
| 112 | + buffer.set(new Uint8Array(array.buffer), 2); |
| 113 | + } |
| 114 | + binary = new Binary(buffer, 9); |
| 115 | + break; |
| 116 | + } |
| 117 | + |
| 118 | + default: |
| 119 | + throw new Error(`Unknown dtype_hex: ${dtype_hex}`); |
| 120 | + } |
| 121 | + |
| 122 | + binary.buffer[0] = +dtype_hex; |
| 123 | + binary.buffer[1] = padding ?? 0; |
| 124 | + |
| 125 | + return binary; |
| 126 | +} |
| 127 | + |
| 128 | +const invalidTestExpectedError = new Map() |
| 129 | + .set('FLOAT32 with padding', 'Invalid Vector: padding must be zero for int8 and float32 vectors') |
| 130 | + .set('INT8 with padding', 'Invalid Vector: padding must be zero for int8 and float32 vectors') |
| 131 | + .set( |
| 132 | + 'Padding specified with no vector data PACKED_BIT', |
| 133 | + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' |
| 134 | + ) |
| 135 | + .set( |
| 136 | + 'Padding specified with no vector data PACKED_BIT', |
| 137 | + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' |
| 138 | + ) |
| 139 | + .set( |
| 140 | + 'Exceeding maximum padding PACKED_BIT', |
| 141 | + 'Invalid Vector: padding must be a value between 0 and 7' |
| 142 | + ) |
| 143 | + .set('Negative padding PACKED_BIT', 'Invalid Vector: padding must be a value between 0 and 7') |
| 144 | + // skipped |
| 145 | + .set('Overflow Vector PACKED_BIT', false) |
| 146 | + .set('Underflow Vector PACKED_BIT', false) |
| 147 | + .set('Overflow Vector INT8', false) |
| 148 | + .set('Underflow Vector INT8', false) |
| 149 | + .set('INT8 with float inputs', false) |
| 150 | + // duplicate test! but also skipped. |
| 151 | + .set('Vector with float values PACKED_BIT', false) |
| 152 | + .set('Vector with float values PACKED_BIT', false); |
| 153 | + |
| 154 | +describe('BSON Binary Vector spec tests', () => { |
| 155 | + const tests: Record<string, VectorSuite> = Object.create(null); |
| 156 | + |
| 157 | + for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) { |
| 158 | + tests[path.basename(file, '.json')] = JSON.parse( |
| 159 | + fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8') |
| 160 | + ); |
| 161 | + } |
| 162 | + |
| 163 | + for (const [suiteName, suite] of Object.entries(tests)) { |
| 164 | + describe(suiteName, function () { |
| 165 | + const valid = suite.tests.filter(t => t.valid); |
| 166 | + const invalid = suite.tests.filter(t => !t.valid); |
| 167 | + describe('valid', function () { |
| 168 | + /** |
| 169 | + * 1. encode a document from the numeric values, dtype, and padding, along with the "test_key", and assert this matches the canonical_bson string. |
| 170 | + * 2. decode the canonical_bson into its binary form, and then assert that the numeric values, dtype, and padding all match those provided in the JSON. |
| 171 | + * |
| 172 | + * > Note: For floating point number types, exact numerical matches may not be possible. |
| 173 | + * > Drivers that natively support the floating-point type being tested (e.g., when testing float32 vector values in a driver that natively supports float32), |
| 174 | + * > MUST assert that the input float array is the same after encoding and decoding. |
| 175 | + */ |
| 176 | + for (const test of valid) { |
| 177 | + it(`encode ${test.description}`, function () { |
| 178 | + const bin = make(test.vector, test.dtype_hex, test.padding); |
| 179 | + |
| 180 | + const buffer = BSON.serialize({ [suite.test_key]: bin }); |
| 181 | + expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase()); |
| 182 | + }); |
| 183 | + |
| 184 | + it(`decode ${test.description}`, function () { |
| 185 | + const canonical_bson = fromHex(test.canonical_bson!.toLowerCase()); |
| 186 | + const doc = BSON.deserialize(canonical_bson); |
| 187 | + |
| 188 | + expect(doc[suite.test_key].sub_type).to.equal(0x09); |
| 189 | + expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex); |
| 190 | + expect(doc[suite.test_key].buffer[1]).to.equal(test.padding); |
| 191 | + }); |
| 192 | + } |
| 193 | + }); |
| 194 | + |
| 195 | + describe('invalid', function () { |
| 196 | + /** |
| 197 | + * To prove correct in an invalid case (valid:false), |
| 198 | + * one MUST raise an exception when attempting to encode |
| 199 | + * a document from the numeric values, dtype, and padding. |
| 200 | + */ |
| 201 | + for (const test of invalid) { |
| 202 | + const expectedErrorMessage = invalidTestExpectedError.get(test.description); |
| 203 | + |
| 204 | + it(`bson: ${test.description}`, function () { |
| 205 | + let thrownError: Error | undefined; |
| 206 | + try { |
| 207 | + const bin = make(test.vector, test.dtype_hex, test.padding); |
| 208 | + BSON.serialize({ bin }); |
| 209 | + // TODO(NODE-6537): The following validation MUST be a part of serialize |
| 210 | + validateVector(bin); |
| 211 | + } catch (error) { |
| 212 | + thrownError = error; |
| 213 | + } |
| 214 | + |
| 215 | + if (thrownError?.message.startsWith('unsupported_error')) { |
| 216 | + expect( |
| 217 | + expectedErrorMessage, |
| 218 | + 'We expect a certain error message but got an unsupported error' |
| 219 | + ).to.be.false; |
| 220 | + this.skip(); |
| 221 | + } |
| 222 | + |
| 223 | + expect(thrownError).to.be.instanceOf(BSONError); |
| 224 | + expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); |
| 225 | + }); |
| 226 | + |
| 227 | + it(`extended json: ${test.description}`, function () { |
| 228 | + let thrownError: Error | undefined; |
| 229 | + try { |
| 230 | + const bin = make(test.vector, test.dtype_hex, test.padding); |
| 231 | + BSON.EJSON.stringify({ bin }); |
| 232 | + // TODO(NODE-6537): The following validation MUST be a part of stringify |
| 233 | + validateVector(bin); |
| 234 | + } catch (error) { |
| 235 | + thrownError = error; |
| 236 | + } |
| 237 | + |
| 238 | + if (thrownError?.message.startsWith('unsupported_error')) { |
| 239 | + expect( |
| 240 | + expectedErrorMessage, |
| 241 | + 'We expect a certain error message but got an unsupported error' |
| 242 | + ).to.be.false; |
| 243 | + this.skip(); |
| 244 | + } |
| 245 | + |
| 246 | + expect(thrownError).to.be.instanceOf(BSONError); |
| 247 | + expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); |
| 248 | + }); |
| 249 | + } |
| 250 | + }); |
| 251 | + }); |
| 252 | + } |
| 253 | +}); |
0 commit comments