From 7c08ddfdf79caa2fdfe2dc125fce6d0e253249f1 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Thu, 14 Nov 2024 23:37:20 -0500 Subject: [PATCH] feat(NODE-6537): add support for binary vectors --- .evergreen/run-big-endian-test.sh | 23 ++- .github/docker/Dockerfile | 10 + src/binary.ts | 220 ++++++++++++++++++++++ src/parser/serializer.ts | 4 + src/utils/number_utils.ts | 4 + test/node/binary.test.ts | 215 ++++++++++++++++++++- test/node/bson_binary_vector.spec.test.ts | 135 +++++-------- 7 files changed, 523 insertions(+), 88 deletions(-) create mode 100644 .github/docker/Dockerfile diff --git a/.evergreen/run-big-endian-test.sh b/.evergreen/run-big-endian-test.sh index a1cc8240..0b67133d 100644 --- a/.evergreen/run-big-endian-test.sh +++ b/.evergreen/run-big-endian-test.sh @@ -1,5 +1,22 @@ -#!/usr/bin/env bash +#! /usr/bin/env bash -source $DRIVERS_TOOLS/.evergreen/init-node-and-npm-env.sh +# At the time of writing. This script is not used in CI. +# but can be used to locally iterate on big endian bugs. +# buildx requires an output, so I put docs which should be a no-op. -npx mocha test/s390x/big_endian.test.ts +set -o errexit +set -o nounset +set -o pipefail +set -o xtrace + +# If you get an error you may have an outdated buildkit version +# Try running this: +# docker buildx rm builder && docker buildx create --name builder --bootstrap --use + +docker buildx build \ + --progress=plain \ + --platform linux/s390x \ + --build-arg="NODE_ARCH=s390x" \ + -f ./.github/docker/Dockerfile \ + --output type=local,dest=./docs,platform-split=false \ + . diff --git a/.github/docker/Dockerfile b/.github/docker/Dockerfile new file mode 100644 index 00000000..3ceb2a50 --- /dev/null +++ b/.github/docker/Dockerfile @@ -0,0 +1,10 @@ +FROM node:22 AS build + +WORKDIR /bson +COPY . . + +RUN rm -rf node_modules && npm install && npm test + +FROM scratch + +COPY --from=build /bson/docs/ / diff --git a/src/binary.ts b/src/binary.ts index d3b496c3..9e1cd68a 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -4,6 +4,7 @@ import { BSONError } from './error'; import { BSON_BINARY_SUBTYPE_UUID_NEW } from './constants'; import { ByteUtils } from './utils/byte_utils'; import { BSONValue } from './bson_value'; +import { NumberUtils } from './utils/number_utils'; /** @public */ export type BinarySequence = Uint8Array | number[]; @@ -22,6 +23,15 @@ export interface BinaryExtended { }; } +/** Creates a copy of the Uint8Array bytes. */ +const copy = + // eslint-disable-next-line @typescript-eslint/unbound-method + Uint8Array.prototype.slice.call.bind(Uint8Array.prototype.slice) as unknown as ( + bytes: Uint8Array, + start: number, + end: number + ) => Uint8Array; + /** * A class representation of the BSON Binary type. * @public @@ -58,9 +68,18 @@ export class Binary extends BSONValue { static readonly SUBTYPE_COLUMN = 7; /** Sensitive BSON type */ static readonly SUBTYPE_SENSITIVE = 8; + /** Vector BSON type */ + static readonly SUBTYPE_VECTOR = 9; /** User BSON type */ static readonly SUBTYPE_USER_DEFINED = 128; + /** d_type of a Binary Vector (subtype: 9) */ + static readonly VECTOR_TYPE = Object.freeze({ + Int8: 0x03, + Float32: 0x27, + PackedBit: 0x10 + } as const); + /** * The bytes of the Binary value. * @@ -238,6 +257,11 @@ export class Binary extends BSONValue { /** @internal */ toExtendedJSON(options?: EJSONOptions): BinaryExtendedLegacy | BinaryExtended { options = options || {}; + + if (this.sub_type === Binary.SUBTYPE_VECTOR) { + Binary.validateVector(this); + } + const base64String = ByteUtils.toBase64(this.buffer); const subType = Number(this.sub_type).toString(16); @@ -310,6 +334,202 @@ export class Binary extends BSONValue { const subTypeArg = inspect(this.sub_type, options); return `Binary.createFromBase64(${base64Arg}, ${subTypeArg})`; } + + /** + * If this Binary represents a Int8 Vector, + * returns a copy of the bytes in a new Int8Array. + */ + public toInt8Array(): Int8Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if ((this.buffer[0] ?? 0) !== Binary.VECTOR_TYPE.Int8) { + throw new BSONError('Binary d_type field is not Int8'); + } + + return new Int8Array(copy(this.buffer, 2, this.position).buffer); + } + + /** + * If this Binary represents a Float32 Vector, + * returns a copy of the bytes in a new Float32Array. + */ + public toFloat32Array(): Float32Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if ((this.buffer[0] ?? 0) !== Binary.VECTOR_TYPE.Float32) { + throw new BSONError('Binary d_type field is not Float32'); + } + + const floatBytes = copy(this.buffer, 2, this.position); + if (NumberUtils.isBigEndian) { + for (let i = 0; i < floatBytes.byteLength; i += 4) { + const byte0 = floatBytes[i]; + const byte1 = floatBytes[i + 1]; + const byte2 = floatBytes[i + 2]; + const byte3 = floatBytes[i + 3]; + floatBytes[i] = byte3; + floatBytes[i + 1] = byte2; + floatBytes[i + 2] = byte1; + floatBytes[i + 3] = byte0; + } + } + return new Float32Array(floatBytes.buffer); + } + + /** + * If this Binary represents packed bit Vector, + * returns a copy of the bytes that are packed bits. + * + * Use `toBits` to get the unpacked bits. + */ + public toPackedBits(): Uint8Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if ((this.buffer[0] ?? 0) !== Binary.VECTOR_TYPE.PackedBit) { + throw new BSONError('Binary d_type field is not packed bit'); + } + + return copy(this.buffer, 2, this.position); + } + + /** + * If this Binary represents a Packed bit Vector, + * returns a copy of the bit unpacked into a new Int8Array. + */ + public toBits(): Int8Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if ((this.buffer[0] ?? 0) !== Binary.VECTOR_TYPE.PackedBit) { + throw new BSONError('Binary d_type field is not packed bit'); + } + + const byteCount = this.length() - 2; + const bitCount = byteCount * 8 - this.buffer[1]; + const bits = new Int8Array(bitCount); + outer: for (let byteOffset = 0; byteOffset < byteCount; byteOffset++) { + const byte = this.buffer[byteOffset + 2]; + for (let bitBase = 0; bitBase < 8; bitBase++) { + const bitOffset = Math.ceil(byteOffset / 8) * 8 + bitBase; + if (bitOffset >= bits.length) break outer; + const mask = 1 << (7 - bitBase); + bits[bitOffset] = byte & mask ? 1 : 0; + } + } + return bits; + } + + /** + * Constructs a Binary representing an Int8 Vector. + * @param array - The array to store as a view on the Binary class + */ + public static fromInt8Array(array: Int8Array): Binary { + const buffer = ByteUtils.allocate(array.byteLength + 2); + buffer[0] = Binary.VECTOR_TYPE.Int8; + buffer[1] = 0; + const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength); + buffer.set(intBytes, 2); + return new this(buffer, this.SUBTYPE_VECTOR); + } + + /** Constructs a Binary representing an Float32 Vector. */ + public static fromFloat32Array(array: Float32Array): Binary { + const binaryBytes = ByteUtils.allocate(array.byteLength + 2); + binaryBytes[0] = Binary.VECTOR_TYPE.Float32; + binaryBytes[1] = 0; + + const floatBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength); + binaryBytes.set(floatBytes, 2); + + if (NumberUtils.isBigEndian) { + for (let i = 2; i < binaryBytes.byteLength; i += 4) { + const byte0 = binaryBytes[i]; + const byte1 = binaryBytes[i + 1]; + const byte2 = binaryBytes[i + 2]; + const byte3 = binaryBytes[i + 3]; + binaryBytes[i] = byte3; + binaryBytes[i + 1] = byte2; + binaryBytes[i + 2] = byte1; + binaryBytes[i + 3] = byte0; + } + } + + return new this(binaryBytes, this.SUBTYPE_VECTOR); + } + + /** + * Constructs a Binary representing a packed bit Vector. + * + * Use `fromBits` to pack an array of 1s and 0s. + */ + public static fromPackedBits(array: Uint8Array, padding = 0): Binary { + const buffer = ByteUtils.allocate(array.byteLength + 2); + buffer[0] = Binary.VECTOR_TYPE.PackedBit; + buffer[1] = padding; + buffer.set(array, 2); + return new this(buffer, this.SUBTYPE_VECTOR); + } + + /** + * Constructs a Binary representing an Packed Bit Vector. + * @param array - The array of 1s and 0s to pack into the Binary instance + */ + public static fromBits(bits: ArrayLike): Binary { + const byteLength = Math.ceil(bits.length / 8); + const bytes = new Uint8Array(byteLength + 2); + bytes[0] = Binary.VECTOR_TYPE.PackedBit; + + const remainder = bits.length % 8; + bytes[1] = remainder === 0 ? 0 : 8 - remainder; + + for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) { + const byteOffset = Math.floor(bitOffset / 8); + + const bit = bits[bitOffset] ? 1 : 0; + + if (bit === 0) continue; + + const shift = 7 - (bitOffset % 8); + bytes[byteOffset + 2] |= bit << shift; + } + + return new this(bytes, Binary.SUBTYPE_VECTOR); + } + + /** @internal */ + static validateVector(vector: Binary): void { + if (vector.sub_type !== this.SUBTYPE_VECTOR) return; + + const size = vector.position; + const d_type = vector.buffer[0] ?? 0; + const padding = vector.buffer[1] ?? 0; + + if ( + (d_type === this.VECTOR_TYPE.Float32 || d_type === this.VECTOR_TYPE.Int8) && + padding !== 0 + ) { + throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); + } + + if (d_type === this.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { + throw new BSONError( + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' + ); + } + + if (d_type === this.VECTOR_TYPE.PackedBit && padding > 7) { + throw new BSONError( + `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` + ); + } + } } /** @public */ diff --git a/src/parser/serializer.ts b/src/parser/serializer.ts index fbb47245..d304eded 100644 --- a/src/parser/serializer.ts +++ b/src/parser/serializer.ts @@ -495,6 +495,10 @@ function serializeBinary(buffer: Uint8Array, key: string, value: Binary, index: index += NumberUtils.setInt32LE(buffer, index, size); } + if (value.sub_type === Binary.SUBTYPE_VECTOR) { + Binary.validateVector(value); + } + if (size <= 16) { for (let i = 0; i < size; i++) buffer[index + i] = data[i]; } else { diff --git a/src/utils/number_utils.ts b/src/utils/number_utils.ts index 32f6f5cc..02f4dbeb 100644 --- a/src/utils/number_utils.ts +++ b/src/utils/number_utils.ts @@ -13,6 +13,8 @@ const isBigEndian = FLOAT_BYTES[7] === 0; * A collection of functions that get or set various numeric types and bit widths from a Uint8Array. */ export type NumberUtils = { + /** Is true if the current system is big endian. */ + isBigEndian: boolean; /** * Parses a signed int32 at offset. Throws a `RangeError` if value is negative. */ @@ -35,6 +37,8 @@ export type NumberUtils = { * @public */ export const NumberUtils: NumberUtils = { + isBigEndian, + getNonnegativeInt32LE(source: Uint8Array, offset: number): number { if (source[offset + 3] > 127) { throw new RangeError(`Size cannot be negative at offset: ${offset}`); diff --git a/test/node/binary.test.ts b/test/node/binary.test.ts index 1f94a619..2afd4130 100644 --- a/test/node/binary.test.ts +++ b/test/node/binary.test.ts @@ -1,6 +1,6 @@ import { expect } from 'chai'; import * as vm from 'node:vm'; -import { __isWeb__, Binary, BSON } from '../register-bson'; +import { __isWeb__, Binary, BSON, BSONError } from '../register-bson'; import * as util from 'node:util'; describe('class Binary', () => { @@ -249,4 +249,217 @@ describe('class Binary', () => { expect(roundTrippedBin.bin.toJSON()).to.equal(bin.toJSON()); }); }); + + describe('sub_type vector', () => { + describe('d_type constants', () => { + it('has Int8, Float32 and PackedBit', () => { + expect(Binary.VECTOR_TYPE).to.have.property('Int8', 0x03); + expect(Binary.VECTOR_TYPE).to.have.property('Float32', 0x27); + expect(Binary.VECTOR_TYPE).to.have.property('PackedBit', 0x10); + }); + }); + + describe('toInt8Array()', () => { + it('returns a copy of the bytes', function () { + const binary = Binary.fromInt8Array(new Int8Array([1, 2, 3])); + expect(binary.toInt8Array().buffer).to.not.equal(binary.buffer.buffer); + }); + + it('returns Int8Array when sub_type is vector and d_type is INT8', () => { + const int8Array = new Int8Array([1, 2, 3]); + const binary = Binary.fromInt8Array(int8Array); + expect(binary.toInt8Array()).to.deep.equal(int8Array); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when d_type is not INT8', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Float32, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary d_type field is not Int8'); + }); + }); + + describe('toFloat32Array()', () => { + it('returns a copy of the bytes', function () { + const binary = Binary.fromFloat32Array(new Float32Array([1.1, 2.2, 3.3])); + expect(binary.toFloat32Array().buffer).to.not.equal(binary.buffer.buffer); + }); + + it('returns Float32Array when sub_type is vector and d_type is FLOAT32', () => { + const float32Array = new Float32Array([1.1, 2.2, 3.3]); + const binary = Binary.fromFloat32Array(float32Array); + expect(binary.toFloat32Array()).to.deep.equal(float32Array); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toFloat32Array()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when d_type is not FLOAT32', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toFloat32Array()).to.throw( + BSONError, + 'Binary d_type field is not Float32' + ); + }); + + it('transforms endianness correctly', () => { + // The expectation is that this test is run on LE and BE machines to + // demonstrate that on BE machines we get the same result + const float32Vector = new Uint8Array([ + ...[Binary.VECTOR_TYPE.Float32, 0], // d_type, padding + ...[0, 0, 128, 191], // -1 + ...[0, 0, 128, 191] // -1 + ]); + const binary = new Binary(float32Vector, Binary.SUBTYPE_VECTOR); + + // For reference: + // [ 0, 0, 128, 191 ] is -1 in little endian + // [ 191, 128, 0, 0 ] is -1 in big endian + // REGARDLESS of platform, BSON is ALWAYS little endian + expect(binary.toFloat32Array()).to.deep.equal(new Float32Array([-1, -1])); + }); + }); + + describe('toBits()', () => { + it('returns Int8Array of bits when sub_type is vector and d_type is PACKED_BIT', () => { + const bits = new Int8Array([1, 0, 1, 1, 0, 0, 1, 0]); + const binary = Binary.fromBits(bits); + expect(binary.toBits()).to.deep.equal(bits); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toBits()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when d_type is not PACKED_BIT', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toBits()).to.throw(BSONError, 'Binary d_type field is not packed bit'); + }); + }); + + describe('toPackedBits()', () => { + it('returns Uint8Array of packed bits when sub_type is vector and d_type is PACKED_BIT', () => { + const bits = new Uint8Array([127, 8]); + const binary = Binary.fromPackedBits(bits, 3); + expect(binary.toPackedBits()).to.deep.equal(bits); + expect(binary.toBits()).to.deep.equal( + new Int8Array([0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1]) + ); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toPackedBits()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when d_type is not PACKED_BIT', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toPackedBits()).to.throw( + BSONError, + 'Binary d_type field is not packed bit' + ); + }); + }); + + describe('fromInt8Array()', () => { + it('creates Binary instance from Int8Array', () => { + const int8Array = new Int8Array([1, 2, 3]); + const binary = Binary.fromInt8Array(int8Array); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Int8); + expect(binary.toInt8Array()).to.deep.equal(int8Array); + }); + + it('creates empty Binary instance when Int8Array is empty', () => { + const binary = Binary.fromInt8Array(new Int8Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Int8); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toInt8Array()).to.deep.equal(new Int8Array(0)); + }); + }); + + describe('fromFloat32Array()', () => { + it('creates Binary instance from Float32Array', () => { + const float32Array = new Float32Array([1.1, 2.2, 3.3]); + const binary = Binary.fromFloat32Array(float32Array); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32); + expect(binary.toFloat32Array()).to.deep.equal(float32Array); + }); + + it('creates empty Binary instance when Float32Array is empty', () => { + const binary = Binary.fromFloat32Array(new Float32Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toFloat32Array()).to.deep.equal(new Float32Array(0)); + }); + + it('transforms endianness correctly', () => { + // The expectation is that this test is run on LE and BE machines to + // demonstrate that on BE machines we get the same result + const float32Array = new Float32Array([-1, -1]); + const binary = Binary.fromFloat32Array(float32Array); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32); + expect(binary.buffer[1]).to.equal(0); + + // For reference: + // [ 0, 0, 128, 191 ] is -1 in little endian + // [ 191, 128, 0, 0 ] is -1 in big endian + // REGARDLESS of platform, BSON is ALWAYS little endian + expect(Array.from(binary.buffer.subarray(2))).to.deep.equal([ + ...[0, 0, 128, 191], // -1 + ...[0, 0, 128, 191] // -1 + ]); + }); + }); + + describe('fromPackedBits()', () => { + it('creates Binary instance from packed bits', () => { + const bits = new Uint8Array([127, 8]); + const binary = Binary.fromPackedBits(bits, 3); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.buffer[1]).to.equal(3); + expect(binary.buffer.subarray(2)).to.deep.equal(bits); + }); + + it('creates empty Binary instance when bits are empty', () => { + const binary = Binary.fromBits(new Int8Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toBits()).to.deep.equal(new Int8Array(0)); + }); + }); + + describe('fromBits()', () => { + it('creates Binary instance from bits', () => { + const bits = new Int8Array([1, 0, 1, 1, 0, 0, 1, 0]); + const binary = Binary.fromBits(bits); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.toBits()).to.deep.equal(bits); + }); + + it('creates empty Binary instance when bits are empty', () => { + const binary = Binary.fromBits(new Int8Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toBits()).to.deep.equal(new Int8Array(0)); + }); + }); + }); }); diff --git a/test/node/bson_binary_vector.spec.test.ts b/test/node/bson_binary_vector.spec.test.ts index 67837305..b61c40cf 100644 --- a/test/node/bson_binary_vector.spec.test.ts +++ b/test/node/bson_binary_vector.spec.test.ts @@ -5,14 +5,6 @@ import { expect } from 'chai'; const { toHex, fromHex } = BSON.onDemand.ByteUtils; -const FLOAT = new Float64Array(1); -const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8); - -FLOAT[0] = -1; -// Little endian [0, 0, 0, 0, 0, 0, 240, 191] -// Big endian [191, 240, 0, 0, 0, 0, 0, 0] -const isBigEndian = FLOAT_BYTES[7] === 0; - type DTypeAlias = 'INT8' | 'FLOAT32' | 'PACKED_BIT'; type VectorTest = { description: string; @@ -35,67 +27,54 @@ function fixFloats(f: string | number): number { if (f === '-inf') { return -Infinity; } - throw new Error(`unknown float value: ${f}`); + throw new Error(`test format error: unknown float value: ${f}`); } function fixInt8s(f: number | string): number { - if (typeof f !== 'number') throw new Error('unexpected test data'); + if (typeof f !== 'number') throw new Error('test format error: unexpected test data'); if (f < -128 || f > 127) { - // TODO(NODE-6537): this must be a part of the final "make a binary from" API. - throw new BSONError(`int8 out of range: ${f}`); + // Javascript Int8Array only supports values from -128 to 127 + throw new Error(`unsupported_error: int8 out of range: ${f}`); } return f; } function fixBits(f: number | string): number { - if (typeof f !== 'number') throw new Error('unexpected test data'); + if (typeof f !== 'number') throw new Error('test format error: unexpected test data'); if (f > 255 || f < 0 || !Number.isSafeInteger(f)) { - // TODO(NODE-6537): this must be a part of the final "make a binary from" API. - throw new BSONError(`bit out of range: ${f}`); + // Javascript Uint8Array only supports values from 0 to 255 + throw new Error(`unsupported_error: bit out of range: ${f}`); } return f; } -/** TODO(NODE-6537): Replace the following with final "make a binary from" API */ -function VECTOR_TO_BINARY( +function make( vector: (number | string)[], dtype_hex: string, dtype_alias: DTypeAlias, padding: number ): Binary { + let binary: Binary; switch (dtype_alias) { case 'PACKED_BIT': - case 'INT8': { - const array = new Int8Array(vector.map(dtype_alias === 'INT8' ? fixInt8s : fixBits)); - const buffer = new Uint8Array(array.byteLength + 2); - buffer[0] = +dtype_hex; - buffer[1] = padding; - buffer.set(new Uint8Array(array.buffer), 2); - return new Binary(buffer, 9); - } - - case 'FLOAT32': { - const array = new Float32Array(vector.map(fixFloats)); - const buffer = new Uint8Array(array.byteLength + 2); - buffer[0] = +dtype_hex; - buffer[1] = padding; - if (isBigEndian) { - for (let i = 0; i < array.length; i++) { - const bytes = new Uint8Array(array.buffer, i * 4, 4); - bytes.reverse(); - buffer.set(bytes, i * 4 + 2); - } - } else { - buffer.set(new Uint8Array(array.buffer), 2); - } - return new Binary(buffer, 9); - } - + binary = Binary.fromPackedBits(new Uint8Array(vector.map(fixBits)), padding); + break; + case 'INT8': + binary = Binary.fromInt8Array(new Int8Array(vector.map(fixInt8s))); + break; + case 'FLOAT32': + binary = Binary.fromFloat32Array(new Float32Array(vector.map(fixFloats))); + break; default: throw new Error(`Unknown dtype_alias: ${dtype_alias}`); } + + binary.buffer[0] = +dtype_hex; + binary.buffer[1] = padding; + + return binary; } describe('BSON Binary Vector spec tests', () => { @@ -122,12 +101,7 @@ describe('BSON Binary Vector spec tests', () => { */ for (const test of valid) { it(`encode ${test.description}`, function () { - const bin = VECTOR_TO_BINARY( - test.vector, - test.dtype_hex, - test.dtype_alias, - test.padding - ); + const bin = make(test.vector, test.dtype_hex, test.dtype_alias, test.padding); const buffer = BSON.serialize({ [suite.test_key]: bin }); expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase()); @@ -151,41 +125,34 @@ describe('BSON Binary Vector spec tests', () => { * a document from the numeric values, dtype, and padding. */ for (const test of invalid) { - it(test.description, function () { - expect(() => { - // Errors are thrown when creating the binary because of invalid values in the vector. - const binary = VECTOR_TO_BINARY( - test.vector, - test.dtype_hex, - test.dtype_alias, - test.padding - ); - // vector assertions TODO(NODE-6537): Replace the following with final "make a binary from" API. - if (binary.sub_type === 0x09) { - const enum dtype { - float32 = 0x27, - int8 = 0x03, - bit = 0x10 - } - - const size = binary.position; - const data = binary.buffer; - const d_type = data[0] ?? 0; - const padding = data[1] ?? 0; - - if ((d_type === dtype.float32 || d_type === dtype.int8) && padding !== 0) { - throw new BSONError('padding must be zero for int8 and float32 vectors'); - } - - if (d_type === dtype.bit && padding !== 0 && size === 2) { - throw new BSONError('padding must be zero for packed bit vectors that are empty'); - } - - if (d_type === dtype.bit && padding > 7) { - throw new BSONError(`padding must be a value between 0 and 7. found: ${data[1]}`); - } - } - }).to.throw(BSONError); + it(`bson: ${test.description}`, function () { + let thrownError: Error | undefined; + try { + const bin = make(test.vector, test.dtype_hex, test.dtype_alias, test.padding); + BSON.serialize({ bin }); + } catch (error) { + thrownError = error; + } + + if (thrownError?.message.startsWith('unsupported_error')) { + this.skip(); + } + expect(thrownError).to.be.instanceOf(BSONError); + }); + + it(`extended json: ${test.description}`, function () { + let thrownError: Error | undefined; + try { + const bin = make(test.vector, test.dtype_hex, test.dtype_alias, test.padding); + BSON.EJSON.stringify({ bin }); + } catch (error) { + thrownError = error; + } + + if (thrownError?.message.startsWith('unsupported_error')) { + this.skip(); + } + expect(thrownError).to.be.instanceOf(BSONError); }); } });