Skip to content

Commit 0ff7122

Browse files
committed
feat(NODE-6537): add support for binary vectors
1 parent 1064783 commit 0ff7122

File tree

6 files changed

+477
-4
lines changed

6 files changed

+477
-4
lines changed

.evergreen/run-big-endian-test.sh

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,22 @@
1-
#!/usr/bin/env bash
1+
#! /usr/bin/env bash
22

3-
source $DRIVERS_TOOLS/.evergreen/init-node-and-npm-env.sh
3+
# At the time of writing. This script is not used in CI.
4+
# but can be used to locally iterate on big endian bugs.
5+
# buildx requires an output, so I put docs which should be a no-op.
46

5-
npx mocha test/s390x/big_endian.test.ts
7+
set -o errexit
8+
set -o nounset
9+
set -o pipefail
10+
set -o xtrace
11+
12+
# If you get an error you may have an outdated buildkit version
13+
# Try running this:
14+
# docker buildx rm builder && docker buildx create --name builder --bootstrap --use
15+
16+
docker buildx build \
17+
--progress=plain \
18+
--platform linux/s390x \
19+
--build-arg="NODE_ARCH=s390x" \
20+
-f ./.github/docker/Dockerfile \
21+
--output type=local,dest=./docs,platform-split=false \
22+
.

.github/docker/Dockerfile

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
FROM node:22 AS build
2+
3+
WORKDIR /bson
4+
COPY . .
5+
6+
RUN rm -rf node_modules && npm install && npm test
7+
8+
FROM scratch
9+
10+
COPY --from=build /bson/docs/ /

src/binary.ts

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { BSONError } from './error';
44
import { BSON_BINARY_SUBTYPE_UUID_NEW } from './constants';
55
import { ByteUtils } from './utils/byte_utils';
66
import { BSONValue } from './bson_value';
7+
import { NumberUtils } from './utils/number_utils';
78

89
/** @public */
910
export type BinarySequence = Uint8Array | number[];
@@ -22,6 +23,15 @@ export interface BinaryExtended {
2223
};
2324
}
2425

26+
/** Creates a copy of the Uint8Array bytes. */
27+
const copy =
28+
// eslint-disable-next-line @typescript-eslint/unbound-method
29+
Uint8Array.prototype.slice.call.bind(Uint8Array.prototype.slice) as unknown as (
30+
bytes: Uint8Array,
31+
start: number,
32+
end: number
33+
) => Uint8Array;
34+
2535
/**
2636
* A class representation of the BSON Binary type.
2737
* @public
@@ -58,9 +68,18 @@ export class Binary extends BSONValue {
5868
static readonly SUBTYPE_COLUMN = 7;
5969
/** Sensitive BSON type */
6070
static readonly SUBTYPE_SENSITIVE = 8;
71+
/** Vector BSON type */
72+
static readonly SUBTYPE_VECTOR = 9;
6173
/** User BSON type */
6274
static readonly SUBTYPE_USER_DEFINED = 128;
6375

76+
/** d_type of a Binary Vector (subtype: 9) */
77+
static readonly VECTOR_TYPE = Object.freeze({
78+
Int8: 0x03,
79+
Float32: 0x27,
80+
PackedBit: 0x10
81+
} as const);
82+
6483
/**
6584
* The bytes of the Binary value.
6685
*
@@ -238,6 +257,11 @@ export class Binary extends BSONValue {
238257
/** @internal */
239258
toExtendedJSON(options?: EJSONOptions): BinaryExtendedLegacy | BinaryExtended {
240259
options = options || {};
260+
261+
if (this.sub_type === Binary.SUBTYPE_VECTOR) {
262+
Binary.validateVector(this);
263+
}
264+
241265
const base64String = ByteUtils.toBase64(this.buffer);
242266

243267
const subType = Number(this.sub_type).toString(16);
@@ -310,6 +334,207 @@ export class Binary extends BSONValue {
310334
const subTypeArg = inspect(this.sub_type, options);
311335
return `Binary.createFromBase64(${base64Arg}, ${subTypeArg})`;
312336
}
337+
338+
/**
339+
* If this Binary represents a Int8 Vector,
340+
* returns a copy of the bytes in a new Int8Array.
341+
*/
342+
public toInt8Array(): Int8Array {
343+
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
344+
throw new BSONError('Binary sub_type is not Vector');
345+
}
346+
347+
if (this.buffer[0] !== Binary.VECTOR_TYPE.Int8) {
348+
throw new BSONError('Binary d_type field is not Int8');
349+
}
350+
351+
return new Int8Array(copy(this.buffer, 2, this.position).buffer);
352+
}
353+
354+
/**
355+
* If this Binary represents a Float32 Vector,
356+
* returns a copy of the bytes in a new Float32Array.
357+
*/
358+
public toFloat32Array(): Float32Array {
359+
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
360+
throw new BSONError('Binary sub_type is not Vector');
361+
}
362+
363+
if (this.buffer[0] !== Binary.VECTOR_TYPE.Float32) {
364+
throw new BSONError('Binary d_type field is not Float32');
365+
}
366+
367+
const floatBytes = copy(this.buffer, 2, this.position);
368+
if (NumberUtils.isBigEndian) {
369+
for (let i = 0; i < floatBytes.byteLength; i += 4) {
370+
const byte0 = floatBytes[i];
371+
const byte1 = floatBytes[i + 1];
372+
const byte2 = floatBytes[i + 2];
373+
const byte3 = floatBytes[i + 3];
374+
floatBytes[i] = byte3;
375+
floatBytes[i + 1] = byte2;
376+
floatBytes[i + 2] = byte1;
377+
floatBytes[i + 3] = byte0;
378+
}
379+
}
380+
return new Float32Array(floatBytes.buffer);
381+
}
382+
383+
/**
384+
* If this Binary represents packed bit Vector,
385+
* returns a copy of the bytes that are packed bits.
386+
*
387+
* Use `toBits` to get the unpacked bits.
388+
*/
389+
public toPackedBits(): Uint8Array {
390+
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
391+
throw new BSONError('Binary sub_type is not Vector');
392+
}
393+
394+
if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) {
395+
throw new BSONError('Binary d_type field is not packed bit');
396+
}
397+
398+
return copy(this.buffer, 2, this.position);
399+
}
400+
401+
/**
402+
* If this Binary represents a Packed bit Vector,
403+
* returns a copy of the bit unpacked into a new Int8Array.
404+
*/
405+
public toBits(): Int8Array {
406+
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
407+
throw new BSONError('Binary sub_type is not Vector');
408+
}
409+
410+
if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) {
411+
throw new BSONError('Binary d_type field is not packed bit');
412+
}
413+
414+
const byteCount = this.length() - 2;
415+
const bitCount = byteCount * 8 - this.buffer[1];
416+
const bits = new Int8Array(bitCount);
417+
418+
for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) {
419+
const byteOffset = (bitOffset / 8) | 0;
420+
const byte = this.buffer[byteOffset + 2];
421+
const shift = 7 - (bitOffset % 8);
422+
const bit = (byte >> shift) & 1;
423+
bits[bitOffset] = bit;
424+
}
425+
426+
return bits;
427+
}
428+
429+
/**
430+
* Constructs a Binary representing an Int8 Vector.
431+
* @param array - The array to store as a view on the Binary class
432+
*/
433+
public static fromInt8Array(array: Int8Array): Binary {
434+
const buffer = ByteUtils.allocate(array.byteLength + 2);
435+
buffer[0] = Binary.VECTOR_TYPE.Int8;
436+
buffer[1] = 0;
437+
const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
438+
buffer.set(intBytes, 2);
439+
return new this(buffer, this.SUBTYPE_VECTOR);
440+
}
441+
442+
/** Constructs a Binary representing an Float32 Vector. */
443+
public static fromFloat32Array(array: Float32Array): Binary {
444+
const binaryBytes = ByteUtils.allocate(array.byteLength + 2);
445+
binaryBytes[0] = Binary.VECTOR_TYPE.Float32;
446+
binaryBytes[1] = 0;
447+
448+
const floatBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
449+
binaryBytes.set(floatBytes, 2);
450+
451+
if (NumberUtils.isBigEndian) {
452+
for (let i = 2; i < binaryBytes.byteLength; i += 4) {
453+
const byte0 = binaryBytes[i];
454+
const byte1 = binaryBytes[i + 1];
455+
const byte2 = binaryBytes[i + 2];
456+
const byte3 = binaryBytes[i + 3];
457+
binaryBytes[i] = byte3;
458+
binaryBytes[i + 1] = byte2;
459+
binaryBytes[i + 2] = byte1;
460+
binaryBytes[i + 3] = byte0;
461+
}
462+
}
463+
464+
return new this(binaryBytes, this.SUBTYPE_VECTOR);
465+
}
466+
467+
/**
468+
* Constructs a Binary representing a packed bit Vector.
469+
*
470+
* Use `fromBits` to pack an array of 1s and 0s.
471+
*/
472+
public static fromPackedBits(array: Uint8Array, padding = 0): Binary {
473+
const buffer = ByteUtils.allocate(array.byteLength + 2);
474+
buffer[0] = Binary.VECTOR_TYPE.PackedBit;
475+
buffer[1] = padding;
476+
buffer.set(array, 2);
477+
return new this(buffer, this.SUBTYPE_VECTOR);
478+
}
479+
480+
/**
481+
* Constructs a Binary representing an Packed Bit Vector.
482+
* @param array - The array of 1s and 0s to pack into the Binary instance
483+
*/
484+
public static fromBits(bits: ArrayLike<number>): Binary {
485+
const byteLength = Math.ceil(bits.length / 8);
486+
const bytes = new Uint8Array(byteLength + 2);
487+
bytes[0] = Binary.VECTOR_TYPE.PackedBit;
488+
489+
const remainder = bits.length % 8;
490+
bytes[1] = remainder === 0 ? 0 : 8 - remainder;
491+
492+
for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) {
493+
const byteOffset = Math.floor(bitOffset / 8);
494+
const bit = bits[bitOffset];
495+
496+
if (bit !== 0 && bit !== 1) {
497+
throw new BSONError(
498+
`Invalid bit value at ${bitOffset}: must be 0 or 1, found ${bits[bitOffset]}`
499+
);
500+
}
501+
502+
if (bit === 0) continue;
503+
504+
const shift = 7 - (bitOffset % 8);
505+
bytes[byteOffset + 2] |= bit << shift;
506+
}
507+
508+
return new this(bytes, Binary.SUBTYPE_VECTOR);
509+
}
510+
511+
/** @internal */
512+
static validateVector(vector: Binary): void {
513+
if (vector.sub_type !== this.SUBTYPE_VECTOR) return;
514+
515+
const size = vector.position;
516+
const d_type = vector.buffer[0];
517+
const padding = vector.buffer[1];
518+
519+
if (
520+
(d_type === this.VECTOR_TYPE.Float32 || d_type === this.VECTOR_TYPE.Int8) &&
521+
padding !== 0
522+
) {
523+
throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
524+
}
525+
526+
if (d_type === this.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
527+
throw new BSONError(
528+
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
529+
);
530+
}
531+
532+
if (d_type === this.VECTOR_TYPE.PackedBit && padding > 7) {
533+
throw new BSONError(
534+
`Invalid Vector: padding must be a value between 0 and 7. found: ${padding}`
535+
);
536+
}
537+
}
313538
}
314539

315540
/** @public */

src/parser/serializer.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,10 @@ function serializeBinary(buffer: Uint8Array, key: string, value: Binary, index:
495495
index += NumberUtils.setInt32LE(buffer, index, size);
496496
}
497497

498+
if (value.sub_type === Binary.SUBTYPE_VECTOR) {
499+
Binary.validateVector(value);
500+
}
501+
498502
if (size <= 16) {
499503
for (let i = 0; i < size; i++) buffer[index + i] = data[i];
500504
} else {

src/utils/number_utils.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ const isBigEndian = FLOAT_BYTES[7] === 0;
1313
* A collection of functions that get or set various numeric types and bit widths from a Uint8Array.
1414
*/
1515
export type NumberUtils = {
16+
/** Is true if the current system is big endian. */
17+
isBigEndian: boolean;
1618
/**
1719
* Parses a signed int32 at offset. Throws a `RangeError` if value is negative.
1820
*/
@@ -35,6 +37,8 @@ export type NumberUtils = {
3537
* @public
3638
*/
3739
export const NumberUtils: NumberUtils = {
40+
isBigEndian,
41+
3842
getNonnegativeInt32LE(source: Uint8Array, offset: number): number {
3943
if (source[offset + 3] > 127) {
4044
throw new RangeError(`Size cannot be negative at offset: ${offset}`);

0 commit comments

Comments
 (0)