Skip to content

Commit adbf3e5

Browse files
committed
feat(NODE-6537): add support for binary vectors
1 parent d3fe6e0 commit adbf3e5

File tree

3 files changed

+354
-1
lines changed

3 files changed

+354
-1
lines changed

src/binary.ts

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { BSONError } from './error';
44
import { BSON_BINARY_SUBTYPE_UUID_NEW } from './constants';
55
import { ByteUtils } from './utils/byte_utils';
66
import { BSONValue } from './bson_value';
7+
import { NumberUtils } from './utils/number_utils';
78

89
/** @public */
910
export type BinarySequence = Uint8Array | number[];
@@ -22,6 +23,15 @@ export interface BinaryExtended {
2223
};
2324
}
2425

26+
/** Creates a copy of the Uint8Array bytes. */
27+
const copy =
28+
// eslint-disable-next-line @typescript-eslint/unbound-method
29+
Uint8Array.prototype.slice.call.bind(Uint8Array.prototype.slice) as unknown as (
30+
bytes: Uint8Array,
31+
start: number,
32+
end: number
33+
) => Uint8Array;
34+
2535
/**
2636
* A class representation of the BSON Binary type.
2737
* @public
@@ -58,9 +68,18 @@ export class Binary extends BSONValue {
5868
static readonly SUBTYPE_COLUMN = 7;
5969
/** Sensitive BSON type */
6070
static readonly SUBTYPE_SENSITIVE = 8;
71+
/** Vector BSON type */
72+
static readonly SUBTYPE_VECTOR = 9;
6173
/** User BSON type */
6274
static readonly SUBTYPE_USER_DEFINED = 128;
6375

76+
/** d_type of a Binary Vector (subtype: 9) */
77+
static readonly VECTOR_TYPE = Object.freeze({
78+
Int8: 0x10,
79+
Float32: 0x27,
80+
PackedBit: 0x03
81+
} as const);
82+
6483
buffer!: Uint8Array;
6584
sub_type!: number;
6685
position!: number;
@@ -272,6 +291,158 @@ export class Binary extends BSONValue {
272291
const subTypeArg = inspect(this.sub_type, options);
273292
return `Binary.createFromBase64(${base64Arg}, ${subTypeArg})`;
274293
}
294+
295+
/**
296+
* If this Binary represents a Int8 Vector,
297+
* returns a copy of the bytes in a new Int8Array.
298+
*/
299+
public toInt8Array(): Int8Array {
300+
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
301+
throw new BSONError('Binary sub_type is not Vector');
302+
}
303+
304+
if ((this.buffer[0] ?? 0) !== Binary.VECTOR_TYPE.Int8) {
305+
throw new BSONError('Binary d_type field is not Int8');
306+
}
307+
308+
return new Int8Array(copy(this.buffer, 2, this.position).buffer);
309+
}
310+
311+
/**
312+
* If this Binary represents a Float32 Vector,
313+
* returns a copy of the bytes in a new Float32Array.
314+
*/
315+
public toFloat32Array(): Float32Array {
316+
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
317+
throw new BSONError('Binary sub_type is not Vector');
318+
}
319+
320+
if ((this.buffer[0] ?? 0) !== Binary.VECTOR_TYPE.Float32) {
321+
throw new BSONError('Binary d_type field is not Float32');
322+
}
323+
324+
const bytes = copy(this.buffer, 2, this.position);
325+
if (NumberUtils.isBigEndian) {
326+
for (let i = 0; i < bytes.length; i += 4) {
327+
bytes[i] ^= bytes[i + 3];
328+
bytes[i + 1] ^= bytes[i + 2];
329+
}
330+
}
331+
return new Float32Array(bytes.buffer);
332+
}
333+
334+
/**
335+
* If this Binary represents packed bit Vector,
336+
* returns a copy of the bytes that are packed bits.
337+
*
338+
* Use `toBits` to get the unpacked bits.
339+
*/
340+
public toPackedBits(): Uint8Array {
341+
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
342+
throw new BSONError('Binary sub_type is not Vector');
343+
}
344+
345+
if ((this.buffer[0] ?? 0) !== Binary.VECTOR_TYPE.PackedBit) {
346+
throw new BSONError('Binary d_type field is not packed bit');
347+
}
348+
349+
return copy(this.buffer, 2, this.position);
350+
}
351+
352+
/**
353+
* If this Binary represents a Packed bit Vector,
354+
* returns a copy of the bit unpacked into a new Int8Array.
355+
*/
356+
public toBits(): Int8Array {
357+
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
358+
throw new BSONError('Binary sub_type is not Vector');
359+
}
360+
361+
if ((this.buffer[0] ?? 0) !== Binary.VECTOR_TYPE.PackedBit) {
362+
throw new BSONError('Binary d_type field is not packed bit');
363+
}
364+
365+
const byteCount = this.length() - 2;
366+
const bitCount = byteCount * 8 - this.buffer[1];
367+
const bits = new Int8Array(bitCount);
368+
outer: for (let byteOffset = 0; byteOffset < byteCount; byteOffset++) {
369+
const byte = this.buffer[byteOffset + 2];
370+
for (let bitBase = 0; bitBase < 8; bitBase++) {
371+
const bitOffset = Math.ceil(byteOffset / 8) * 8 + bitBase;
372+
if (bitOffset >= bits.length) break outer;
373+
const mask = 1 << (7 - bitBase);
374+
bits[bitOffset] = byte & mask ? 1 : 0;
375+
}
376+
}
377+
return bits;
378+
}
379+
380+
/**
381+
* Constructs a Binary representing an Int8 Vector.
382+
* @param array - The array to store as a view on the Binary class
383+
*/
384+
public static fromInt8Array(array: Int8Array): Binary {
385+
const buffer = ByteUtils.allocate(array.byteLength + 2);
386+
buffer[0] = Binary.VECTOR_TYPE.Int8;
387+
buffer[1] = 0;
388+
const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
389+
buffer.set(intBytes, 2);
390+
return new this(buffer, this.SUBTYPE_VECTOR);
391+
}
392+
393+
/** Constructs a Binary representing an Float32 Vector. */
394+
public static fromFloat32Array(array: Float32Array): Binary {
395+
const buffer = ByteUtils.allocate(array.byteLength + 2);
396+
buffer[0] = Binary.VECTOR_TYPE.Float32;
397+
buffer[1] = 0;
398+
const floatBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
399+
if (NumberUtils.isBigEndian) {
400+
for (let i = 0; i < array.length; i += 4) {
401+
floatBytes[i] ^= floatBytes[i + 3];
402+
floatBytes[i + 1] ^= floatBytes[i + 2];
403+
}
404+
}
405+
return new this(buffer, this.SUBTYPE_VECTOR);
406+
}
407+
408+
/**
409+
* Constructs a Binary representing a packed bit Vector.
410+
*
411+
* Use `fromBits` to pack an array of 1s and 0s.
412+
*/
413+
public static fromPackedBits(array: Uint8Array, padding = 0): Binary {
414+
const buffer = ByteUtils.allocate(array.byteLength + 2);
415+
buffer[0] = Binary.VECTOR_TYPE.PackedBit;
416+
buffer[1] = padding;
417+
buffer.set(array, 2);
418+
return new this(buffer, this.SUBTYPE_VECTOR);
419+
}
420+
421+
/**
422+
* Constructs a Binary representing an Packed Bit Vector.
423+
* @param array - The array of 1s and 0s to pack into the Binary instance
424+
*/
425+
public static fromBits(bits: ArrayLike<number>): Binary {
426+
const byteLength = Math.ceil(bits.length / 8);
427+
const bytes = new Uint8Array(byteLength + 2);
428+
bytes[0] = Binary.VECTOR_TYPE.PackedBit;
429+
430+
const remainder = bits.length % 8;
431+
bytes[1] = remainder === 0 ? 0 : 8 - remainder;
432+
433+
for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) {
434+
const byteOffset = Math.floor(bitOffset / 8);
435+
436+
const bit = bits[bitOffset] ? 1 : 0;
437+
438+
if (bit === 0) continue;
439+
440+
const shift = 7 - (bitOffset % 8);
441+
bytes[byteOffset + 2] |= bit << shift;
442+
}
443+
444+
return new this(bytes, Binary.SUBTYPE_VECTOR);
445+
}
275446
}
276447

277448
/** @public */

src/utils/number_utils.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ const isBigEndian = FLOAT_BYTES[7] === 0;
1313
* A collection of functions that get or set various numeric types and bit widths from a Uint8Array.
1414
*/
1515
export type NumberUtils = {
16+
/** Is true if the current system is big endian. */
17+
isBigEndian: boolean;
1618
/**
1719
* Parses a signed int32 at offset. Throws a `RangeError` if value is negative.
1820
*/
@@ -35,6 +37,8 @@ export type NumberUtils = {
3537
* @public
3638
*/
3739
export const NumberUtils: NumberUtils = {
40+
isBigEndian,
41+
3842
getNonnegativeInt32LE(source: Uint8Array, offset: number): number {
3943
if (source[offset + 3] > 127) {
4044
throw new RangeError(`Size cannot be negative at offset: ${offset}`);

0 commit comments

Comments
 (0)