Skip to content

Commit

Permalink
feat(NODE-6537): add support for binary vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
nbbeeken committed Nov 15, 2024
1 parent c9160d3 commit af3f9cd
Show file tree
Hide file tree
Showing 6 changed files with 520 additions and 4 deletions.
23 changes: 20 additions & 3 deletions .evergreen/run-big-endian-test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
#!/usr/bin/env bash
#! /usr/bin/env bash

source $DRIVERS_TOOLS/.evergreen/init-node-and-npm-env.sh
# At the time of writing. This script is not used in CI.
# but can be used to locally iterate on big endian bugs.
# buildx requires an output, so I put docs which should be a no-op.

npx mocha test/s390x/big_endian.test.ts
set -o errexit
set -o nounset
set -o pipefail
set -o xtrace

# If you get an error you may have an outdated buildkit version
# Try running this:
# docker buildx rm builder && docker buildx create --name builder --bootstrap --use

docker buildx build \
--progress=plain \
--platform linux/s390x \
--build-arg="NODE_ARCH=s390x" \
-f ./.github/docker/Dockerfile \
--output type=local,dest=./docs,platform-split=false \
.
10 changes: 10 additions & 0 deletions .github/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM node:22 AS build

WORKDIR /bson
COPY . .

RUN rm -rf node_modules && npm install && npm test

FROM scratch

COPY --from=build /bson/docs/ /
222 changes: 222 additions & 0 deletions src/binary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { BSONError } from './error';
import { BSON_BINARY_SUBTYPE_UUID_NEW } from './constants';
import { ByteUtils } from './utils/byte_utils';
import { BSONValue } from './bson_value';
import { NumberUtils } from './utils/number_utils';

/** @public */
export type BinarySequence = Uint8Array | number[];
Expand Down Expand Up @@ -58,9 +59,18 @@ export class Binary extends BSONValue {
static readonly SUBTYPE_COLUMN = 7;
/** Sensitive BSON type */
static readonly SUBTYPE_SENSITIVE = 8;
/** Vector BSON type */
static readonly SUBTYPE_VECTOR = 9;
/** User BSON type */
static readonly SUBTYPE_USER_DEFINED = 128;

/** d_type of a Binary Vector (subtype: 9) */
static readonly VECTOR_TYPE = Object.freeze({
Int8: 0x03,
Float32: 0x27,
PackedBit: 0x10
} as const);

/**
* The bytes of the Binary value.
*
Expand Down Expand Up @@ -238,6 +248,11 @@ export class Binary extends BSONValue {
/** @internal */
toExtendedJSON(options?: EJSONOptions): BinaryExtendedLegacy | BinaryExtended {
options = options || {};

if (this.sub_type === Binary.SUBTYPE_VECTOR) {
Binary.validateVector(this);
}

const base64String = ByteUtils.toBase64(this.buffer);

const subType = Number(this.sub_type).toString(16);
Expand Down Expand Up @@ -310,6 +325,213 @@ export class Binary extends BSONValue {
const subTypeArg = inspect(this.sub_type, options);
return `Binary.createFromBase64(${base64Arg}, ${subTypeArg})`;
}

/**
* If this Binary represents a Int8 Vector,
* returns a copy of the bytes in a new Int8Array.
*/
public toInt8Array(): Int8Array {
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
throw new BSONError('Binary sub_type is not Vector');
}

if (this.buffer[0] !== Binary.VECTOR_TYPE.Int8) {
throw new BSONError('Binary d_type field is not Int8');
}

return new Int8Array(
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
);
}

/**
* If this Binary represents a Float32 Vector,
* returns a copy of the bytes in a new Float32Array.
*/
public toFloat32Array(): Float32Array {
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
throw new BSONError('Binary sub_type is not Vector');
}

if (this.buffer[0] !== Binary.VECTOR_TYPE.Float32) {
throw new BSONError('Binary d_type field is not Float32');
}

const floatBytes = new Uint8Array(
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
);
if (NumberUtils.isBigEndian) {
for (let i = 0; i < floatBytes.byteLength; i += 4) {
const byte0 = floatBytes[i];
const byte1 = floatBytes[i + 1];
const byte2 = floatBytes[i + 2];
const byte3 = floatBytes[i + 3];
floatBytes[i] = byte3;
floatBytes[i + 1] = byte2;
floatBytes[i + 2] = byte1;
floatBytes[i + 3] = byte0;
}
}
return new Float32Array(floatBytes.buffer);
}

/**
* If this Binary represents packed bit Vector,
* returns a copy of the bytes that are packed bits.
*
* Use `toBits` to get the unpacked bits.
*/
public toPackedBits(): Uint8Array {
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
throw new BSONError('Binary sub_type is not Vector');
}

if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) {
throw new BSONError('Binary d_type field is not packed bit');
}

return new Uint8Array(
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
);
}

/**
* If this Binary represents a Packed bit Vector,
* returns a copy of the bit unpacked into a new Int8Array.
*/
public toBits(): Int8Array {
if (this.sub_type !== Binary.SUBTYPE_VECTOR) {
throw new BSONError('Binary sub_type is not Vector');
}

if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) {
throw new BSONError('Binary d_type field is not packed bit');
}

const byteCount = this.length() - 2;
const bitCount = byteCount * 8 - this.buffer[1];
const bits = new Int8Array(bitCount);

for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) {
const byteOffset = (bitOffset / 8) | 0;
const byte = this.buffer[byteOffset + 2];
const shift = 7 - (bitOffset % 8);
const bit = (byte >> shift) & 1;
bits[bitOffset] = bit;
}

return bits;
}

/**
* Constructs a Binary representing an Int8 Vector.
* @param array - The array to store as a view on the Binary class
*/
public static fromInt8Array(array: Int8Array): Binary {
const buffer = ByteUtils.allocate(array.byteLength + 2);
buffer[0] = Binary.VECTOR_TYPE.Int8;
buffer[1] = 0;
const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
buffer.set(intBytes, 2);
return new this(buffer, this.SUBTYPE_VECTOR);
}

/** Constructs a Binary representing an Float32 Vector. */
public static fromFloat32Array(array: Float32Array): Binary {
const binaryBytes = ByteUtils.allocate(array.byteLength + 2);
binaryBytes[0] = Binary.VECTOR_TYPE.Float32;
binaryBytes[1] = 0;

const floatBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength);
binaryBytes.set(floatBytes, 2);

if (NumberUtils.isBigEndian) {
for (let i = 2; i < binaryBytes.byteLength; i += 4) {
const byte0 = binaryBytes[i];
const byte1 = binaryBytes[i + 1];
const byte2 = binaryBytes[i + 2];
const byte3 = binaryBytes[i + 3];
binaryBytes[i] = byte3;
binaryBytes[i + 1] = byte2;
binaryBytes[i + 2] = byte1;
binaryBytes[i + 3] = byte0;
}
}

return new this(binaryBytes, this.SUBTYPE_VECTOR);
}

/**
* Constructs a Binary representing a packed bit Vector.
*
* Use `fromBits` to pack an array of 1s and 0s.
*/
public static fromPackedBits(array: Uint8Array, padding = 0): Binary {
const buffer = ByteUtils.allocate(array.byteLength + 2);
buffer[0] = Binary.VECTOR_TYPE.PackedBit;
buffer[1] = padding;
buffer.set(array, 2);
return new this(buffer, this.SUBTYPE_VECTOR);
}

/**
* Constructs a Binary representing an Packed Bit Vector.
* @param array - The array of 1s and 0s to pack into the Binary instance
*/
public static fromBits(bits: ArrayLike<number>): Binary {
const byteLength = Math.ceil(bits.length / 8);
const bytes = new Uint8Array(byteLength + 2);
bytes[0] = Binary.VECTOR_TYPE.PackedBit;

const remainder = bits.length % 8;
bytes[1] = remainder === 0 ? 0 : 8 - remainder;

for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) {
const byteOffset = Math.floor(bitOffset / 8);
const bit = bits[bitOffset];

if (bit !== 0 && bit !== 1) {
throw new BSONError(
`Invalid bit value at ${bitOffset}: must be 0 or 1, found ${bits[bitOffset]}`
);
}

if (bit === 0) continue;

const shift = 7 - (bitOffset % 8);
bytes[byteOffset + 2] |= bit << shift;
}

return new this(bytes, Binary.SUBTYPE_VECTOR);
}

/** @internal */
static validateVector(vector: Binary): void {
if (vector.sub_type !== this.SUBTYPE_VECTOR) return;

const size = vector.position;
const d_type = vector.buffer[0];
const padding = vector.buffer[1];

if (
(d_type === this.VECTOR_TYPE.Float32 || d_type === this.VECTOR_TYPE.Int8) &&
padding !== 0
) {
throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
}

if (d_type === this.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
throw new BSONError(
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
);
}

if (d_type === this.VECTOR_TYPE.PackedBit && padding > 7) {
throw new BSONError(
`Invalid Vector: padding must be a value between 0 and 7. found: ${padding}`
);
}
}
}

/** @public */
Expand Down
4 changes: 4 additions & 0 deletions src/parser/serializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,10 @@ function serializeBinary(buffer: Uint8Array, key: string, value: Binary, index:
index += NumberUtils.setInt32LE(buffer, index, size);
}

if (value.sub_type === Binary.SUBTYPE_VECTOR) {
Binary.validateVector(value);
}

if (size <= 16) {
for (let i = 0; i < size; i++) buffer[index + i] = data[i];
} else {
Expand Down
4 changes: 4 additions & 0 deletions src/utils/number_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ const isBigEndian = FLOAT_BYTES[7] === 0;
* A collection of functions that get or set various numeric types and bit widths from a Uint8Array.
*/
export type NumberUtils = {
/** Is true if the current system is big endian. */
isBigEndian: boolean;
/**
* Parses a signed int32 at offset. Throws a `RangeError` if value is negative.
*/
Expand All @@ -35,6 +37,8 @@ export type NumberUtils = {
* @public
*/
export const NumberUtils: NumberUtils = {
isBigEndian,

getNonnegativeInt32LE(source: Uint8Array, offset: number): number {
if (source[offset + 3] > 127) {
throw new RangeError(`Size cannot be negative at offset: ${offset}`);
Expand Down
Loading

0 comments on commit af3f9cd

Please sign in to comment.