From ff8936865e7ca12a45c5e7fd1ad77bf1c2ff5114 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 5 May 2025 12:42:06 +0200 Subject: [PATCH 1/9] Add Big Endian Support for Float32 in BinaryVectorWriter.WriteToBytes() Signed-off-by: Medha Tiwari --- .../Serialization/BinaryVectorReader.cs | 25 ++++++++---- .../Serialization/BinaryVectorWriter.cs | 38 ++++++++++++++++--- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs index ef83c201091..bc89ead7ee6 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs @@ -14,6 +14,7 @@ */ using System; +using System.Buffers.Binary; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; @@ -26,7 +27,6 @@ public static BinaryVector ReadBinaryVector(ReadOnlyMemory v where TItem : struct { var (items, padding, vectorDataType) = ReadBinaryVectorAsArray(vectorData); - return CreateBinaryVector(items, padding, vectorDataType); } @@ -41,29 +41,38 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) switch (vectorDataType) { case BinaryVectorDataType.Float32: - if ((vectorDataBytes.Span.Length & 3) != 0) { throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); } - if (BitConverter.IsLittleEndian) + if (typeof(TItem) != typeof(float)) { - var singles = MemoryMarshal.Cast(vectorDataBytes.Span); - items = (TItem[])(object)singles.ToArray(); + throw new NotSupportedException($"Expected float for Float32 vector type, but found {typeof(TItem)}."); } - else + + int count = vectorDataBytes.Length / 4; + float[] floatArray = new float[count]; + + for (int i = 0; i < count; i++) { - throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); + floatArray[i] = BitConverter.IsLittleEndian + ? MemoryMarshal.Read(vectorDataBytes.Span.Slice(i * 4, 4)) + : BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); } + + items = (TItem[])(object)floatArray; break; + case BinaryVectorDataType.Int8: var itemsSpan = MemoryMarshal.Cast(vectorDataBytes.Span); - items = (TItem[])(object)itemsSpan.ToArray(); + items = itemsSpan.ToArray(); break; + case BinaryVectorDataType.PackedBit: items = (TItem[])(object)vectorDataBytes.ToArray(); break; + default: throw new NotSupportedException($"Binary vector data type {vectorDataType} is not supported."); } diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs index 0e9d5e74f6d..beea9cabe92 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs @@ -14,6 +14,7 @@ */ using System; +using System.Buffers.Binary; using System.Runtime.InteropServices; namespace MongoDB.Bson.Serialization @@ -35,15 +36,42 @@ public static byte[] WriteToBytes(BinaryVector binaryVector) public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryVectorDataType binaryVectorDataType, byte padding) where TItem : struct { - if (!BitConverter.IsLittleEndian) + if (BitConverter.IsLittleEndian) { - throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); + var vectorDataBytes = MemoryMarshal.Cast(vectorData); + byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; + return result; } - var vectorDataBytes = MemoryMarshal.Cast(vectorData); - byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; + byte[] resultBytes; + switch (binaryVectorDataType) + { + case BinaryVectorDataType.Float32: + int length = vectorData.Length * sizeof(float); + resultBytes = new byte[2 + length]; + resultBytes[0] = (byte)binaryVectorDataType; + resultBytes[1] = padding; + var floatSpan = MemoryMarshal.Cast(vectorData); + Span floatOutput = resultBytes.AsSpan(2); + foreach (var value in floatSpan) + { + BinaryPrimitives.WriteSingleBigEndian(floatOutput, value); + floatOutput = floatOutput.Slice(4); + } + return resultBytes; + + case BinaryVectorDataType.Int8: + case BinaryVectorDataType.PackedBit: + var vectorDataBytes = MemoryMarshal.Cast(vectorData); + resultBytes = new byte[2 + vectorDataBytes.Length]; + resultBytes[0] = (byte)binaryVectorDataType; + resultBytes[1] = padding; + vectorDataBytes.CopyTo(resultBytes.AsSpan(2)); + return resultBytes; - return result; + default: + throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType} on Big Endian architecture yet."); + } } } } From 2c2cae1964050a6cf39fdd1e2d21ffb9751119f0 Mon Sep 17 00:00:00 2001 From: Medha Tiwari Date: Tue, 6 May 2025 10:50:42 +0200 Subject: [PATCH 2/9] Added comments for clarity Signed-off-by: Medha Tiwari --- .../Serialization/BinaryVectorReader.cs | 8 +++++--- .../Serialization/BinaryVectorWriter.cs | 14 ++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs index bc89ead7ee6..66ae990e5bb 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs @@ -51,14 +51,15 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) throw new NotSupportedException($"Expected float for Float32 vector type, but found {typeof(TItem)}."); } - int count = vectorDataBytes.Length / 4; + int count = vectorDataBytes.Length / 4; // 4 bytes per float float[] floatArray = new float[count]; for (int i = 0; i < count; i++) { + // Each float32 is 4 bytes. So to extract the i-th float, we slice 4 bytes from offset i * 4. Use little-endian or big-endian decoding based on platform. floatArray[i] = BitConverter.IsLittleEndian - ? MemoryMarshal.Read(vectorDataBytes.Span.Slice(i * 4, 4)) - : BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); + ? MemoryMarshal.Read(vectorDataBytes.Span.Slice(i * 4, 4)) // fast, unaligned read on little endian + : BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); // correctly reassemble 4 bytes as big-endian float } items = (TItem[])(object)floatArray; @@ -158,3 +159,4 @@ private static void ValidateItemTypeForBinaryVector(ReadOnlySpan vectorData, BinaryV { case BinaryVectorDataType.Float32: int length = vectorData.Length * sizeof(float); - resultBytes = new byte[2 + length]; - resultBytes[0] = (byte)binaryVectorDataType; - resultBytes[1] = padding; - var floatSpan = MemoryMarshal.Cast(vectorData); - Span floatOutput = resultBytes.AsSpan(2); + resultBytes = new byte[2 + length]; // Allocate output buffer: + resultBytes[0] = (byte)binaryVectorDataType; // - [0]: vector type + resultBytes[1] = padding; // - [1]: padding + var floatSpan = MemoryMarshal.Cast(vectorData); + Span floatOutput = resultBytes.AsSpan(2); // - [2...]: actual float data , skipping header foreach (var value in floatSpan) { + // Each float is 4 bytes - write in Big Endian format BinaryPrimitives.WriteSingleBigEndian(floatOutput, value); - floatOutput = floatOutput.Slice(4); + floatOutput = floatOutput.Slice(4); // advance to next 4-byte block } return resultBytes; @@ -75,3 +76,4 @@ public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryV } } } + From 2c4a16a765b43a523e528b6b902eec90b1f32d53 Mon Sep 17 00:00:00 2001 From: Medha Tiwari Date: Wed, 28 May 2025 14:42:01 +0200 Subject: [PATCH 3/9] Fix BinaryVectorSerializerTests to generate little-endian test data for float32 on all platforms Signed-off-by: Medha Tiwari --- .../BinaryVectorSerializerTests.cs | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs b/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs index 4394c626c76..b77aad21e31 100644 --- a/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs +++ b/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs @@ -365,10 +365,35 @@ private BsonBinaryData SerializeToBinaryData(TCollection collection private static (T[], byte[] VectorBson) GetTestData(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding) where T : struct { - var elementsSpan = new ReadOnlySpan(Enumerable.Range(0, elementsCount).Select(i => Convert.ChangeType(i, typeof(T)).As()).ToArray()); - byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. MemoryMarshal.Cast(elementsSpan)]; - - return (elementsSpan.ToArray(), vectorBsonData); + var elementsSpan = new ReadOnlySpan( + Enumerable.Range(0, elementsCount) + .Select(i => Convert.ChangeType(i, typeof(T)).As()) + .ToArray()); + if (typeof(T) == typeof(float) && dataType == BinaryVectorDataType.Float32) + { + var buffer = new byte[2 + elementsCount * 4]; // 4 bytes per float + buffer[0] = (byte)dataType; + buffer[1] = bitsPadding; + for (int i = 0; i < elementsCount; i++) + { + var floatBytes = BitConverter.GetBytes((float)(object)elementsSpan[i]); + if (!BitConverter.IsLittleEndian) + { + Array.Reverse(floatBytes); + } + Buffer.BlockCopy(floatBytes, 0, buffer, 2 + i * 4, 4); + } + return (elementsSpan.ToArray(), buffer); + } + else if ((typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)) && (dataType == BinaryVectorDataType.Int8 || dataType == BinaryVectorDataType.PackedBit)) + { + byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. MemoryMarshal.Cast(elementsSpan)]; + return (elementsSpan.ToArray(), vectorBsonData); + } + else + { + throw new NotSupportedException($"Type {typeof(T)} is not supported for data type {dataType}."); + } } private static (BinaryVector, byte[] VectorBson) GetTestDataBinaryVector(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding) From 0ee1694922604124b2f392a3d531d5d4b1b98c5d Mon Sep 17 00:00:00 2001 From: Medha Tiwari Date: Wed, 28 May 2025 14:44:30 +0200 Subject: [PATCH 4/9] Add BinaryPrimitivesCompat methods for float32 little-endian serialization Signed-off-by: Medha Tiwari --- src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs b/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs index 323649f6d08..82745fd126b 100644 --- a/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs +++ b/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs @@ -31,5 +31,35 @@ public static void WriteDoubleLittleEndian(Span destination, double value) { BinaryPrimitives.WriteInt64LittleEndian(destination, BitConverter.DoubleToInt64Bits(value)); } + public static float ReadSingleLittleEndian(ReadOnlySpan source) + { + if (source.Length < 4) + { + throw new ArgumentOutOfRangeException(nameof(source), "Source span is too small to contain a float."); + } + + int intValue = + source[0] | + (source[1] << 8) | + (source[2] << 16) | + (source[3] << 24); + + return BitConverter.Int32BitsToSingle(intValue); + } + + public static void WriteSingleLittleEndian(Span destination, float value) + { + if (destination.Length < 4) + { + throw new ArgumentOutOfRangeException(nameof(destination), "Destination span is too small to hold a float."); + } + + int intValue = BitConverter.SingleToInt32Bits(value); + destination[0] = (byte)(intValue); + destination[1] = (byte)(intValue >> 8); + destination[2] = (byte)(intValue >> 16); + destination[3] = (byte)(intValue >> 24); + } + } } From f43d935f961a68d07d095d5e5c06556a579b6c72 Mon Sep 17 00:00:00 2001 From: Medha Tiwari Date: Wed, 28 May 2025 14:50:49 +0200 Subject: [PATCH 5/9] Add float32 BinaryVector serialization/deserialization with endian handling Signed-off-by: Medha Tiwari --- .../Serialization/BinaryVectorReader.cs | 39 ++++++++----------- .../Serialization/BinaryVectorWriter.cs | 39 +++++++------------ 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs index 66ae990e5bb..30baa3cc444 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs @@ -14,10 +14,10 @@ */ using System; -using System.Buffers.Binary; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; +using MongoDB.Bson.IO; namespace MongoDB.Bson.Serialization { @@ -27,6 +27,7 @@ public static BinaryVector ReadBinaryVector(ReadOnlyMemory v where TItem : struct { var (items, padding, vectorDataType) = ReadBinaryVectorAsArray(vectorData); + return CreateBinaryVector(items, padding, vectorDataType); } @@ -41,39 +42,24 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) switch (vectorDataType) { case BinaryVectorDataType.Float32: + if ((vectorDataBytes.Span.Length & 3) != 0) { throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); } - if (typeof(TItem) != typeof(float)) - { - throw new NotSupportedException($"Expected float for Float32 vector type, but found {typeof(TItem)}."); - } - - int count = vectorDataBytes.Length / 4; // 4 bytes per float - float[] floatArray = new float[count]; - - for (int i = 0; i < count; i++) - { - // Each float32 is 4 bytes. So to extract the i-th float, we slice 4 bytes from offset i * 4. Use little-endian or big-endian decoding based on platform. - floatArray[i] = BitConverter.IsLittleEndian - ? MemoryMarshal.Read(vectorDataBytes.Span.Slice(i * 4, 4)) // fast, unaligned read on little endian - : BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); // correctly reassemble 4 bytes as big-endian float - } - + var floatArray = BitConverter.IsLittleEndian // We need not to use this condition here, just doing to keep the little endian logic intact + ? MemoryMarshal.Cast(vectorDataBytes.Span).ToArray() + : ToFloatArrayBigEndian(vectorDataBytes.Span); items = (TItem[])(object)floatArray; break; - case BinaryVectorDataType.Int8: var itemsSpan = MemoryMarshal.Cast(vectorDataBytes.Span); - items = itemsSpan.ToArray(); + items = (TItem[])(object)itemsSpan.ToArray(); break; - case BinaryVectorDataType.PackedBit: items = (TItem[])(object)vectorDataBytes.ToArray(); break; - default: throw new NotSupportedException($"Binary vector data type {vectorDataType} is not supported."); } @@ -157,6 +143,15 @@ private static void ValidateItemTypeForBinaryVector span) + { + var count = span.Length / 4; + var result = new float[count]; + for (int i = 0; i < count; i++) + { + result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4)); + } + return result; + } } } - diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs index 83a1d012e04..3774eb626e0 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs @@ -14,8 +14,8 @@ */ using System; -using System.Buffers.Binary; using System.Runtime.InteropServices; +using MongoDB.Bson.IO; namespace MongoDB.Bson.Serialization { @@ -36,44 +36,35 @@ public static byte[] WriteToBytes(BinaryVector binaryVector) public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryVectorDataType binaryVectorDataType, byte padding) where TItem : struct { - if (BitConverter.IsLittleEndian) - { - var vectorDataBytes = MemoryMarshal.Cast(vectorData); - byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; - return result; - } - byte[] resultBytes; + switch (binaryVectorDataType) { case BinaryVectorDataType.Float32: int length = vectorData.Length * sizeof(float); - resultBytes = new byte[2 + length]; // Allocate output buffer: - resultBytes[0] = (byte)binaryVectorDataType; // - [0]: vector type - resultBytes[1] = padding; // - [1]: padding - var floatSpan = MemoryMarshal.Cast(vectorData); - Span floatOutput = resultBytes.AsSpan(2); // - [2...]: actual float data , skipping header - foreach (var value in floatSpan) + resultBytes = new byte[2 + length]; + resultBytes[0] = (byte)binaryVectorDataType; + resultBytes[1] = padding; + + var floatSpan = MemoryMarshal.Cast(vectorData); + Span floatOutput = resultBytes.AsSpan(2); + + for (int i = 0; i < floatSpan.Length; i++) { - // Each float is 4 bytes - write in Big Endian format - BinaryPrimitives.WriteSingleBigEndian(floatOutput, value); - floatOutput = floatOutput.Slice(4); // advance to next 4-byte block + BinaryPrimitivesCompat.WriteSingleLittleEndian(floatOutput, floatSpan[i]); + floatOutput = floatOutput.Slice(4); } + return resultBytes; case BinaryVectorDataType.Int8: case BinaryVectorDataType.PackedBit: var vectorDataBytes = MemoryMarshal.Cast(vectorData); - resultBytes = new byte[2 + vectorDataBytes.Length]; - resultBytes[0] = (byte)binaryVectorDataType; - resultBytes[1] = padding; - vectorDataBytes.CopyTo(resultBytes.AsSpan(2)); - return resultBytes; + return [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; default: - throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType} on Big Endian architecture yet."); + throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType}."); } } } } - From 92b7ed242e8c474ace5a65b9e91283b0d52e0d84 Mon Sep 17 00:00:00 2001 From: Medha Tiwari Date: Thu, 29 May 2025 15:10:14 +0200 Subject: [PATCH 6/9] added tests for new methods in BinaryPrimitivesCompat Signed-off-by: Medha Tiwari --- .../IO/BinaryPrimitivesCompatTests.cs | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs diff --git a/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs b/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs new file mode 100644 index 00000000000..f5eeaf32c58 --- /dev/null +++ b/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs @@ -0,0 +1,52 @@ +using System; +using Xunit; +using MongoDB.Bson.IO; + +namespace MongoDB.Bson.Tests.IO +{ + public class BinaryPrimitivesCompatTests + { + [Theory] + [InlineData(0f)] + [InlineData(1.0f)] + [InlineData(-1.5f)] + [InlineData(float.MaxValue)] + [InlineData(float.MinValue)] + [InlineData(float.NaN)] + [InlineData(float.PositiveInfinity)] + [InlineData(float.NegativeInfinity)] + public void WriteAndReadSingleLittleEndian_should_roundtrip_correctly(float value) + { + Span buffer = new byte[4]; + + BinaryPrimitivesCompat.WriteSingleLittleEndian(buffer, value); + float result = BinaryPrimitivesCompat.ReadSingleLittleEndian(buffer); + + if (float.IsNaN(value)) + { + Assert.True(float.IsNaN(result)); + } + else + { + Assert.Equal(value, result); + } + } + + [Fact] + public void ReadSingleLittleEndian_should_throw_on_insufficient_length() + { + var shortBuffer = new byte[3]; + Assert.Throws(() => + BinaryPrimitivesCompat.ReadSingleLittleEndian(shortBuffer)); + } + + [Fact] + public void WriteSingleLittleEndian_should_throw_on_insufficient_length() + { + var shortBuffer = new byte[3]; + Assert.Throws(() => + BinaryPrimitivesCompat.WriteSingleLittleEndian(shortBuffer, 1.23f)); + } + } +} + From 530ecdae1a94850ecad7fa0bfa08f4f022dd6aee Mon Sep 17 00:00:00 2001 From: Medha Tiwari Date: Thu, 29 May 2025 15:10:44 +0200 Subject: [PATCH 7/9] resolved all the review comments Signed-off-by: Medha Tiwari --- src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs | 49 ++++++++++++++++- .../Serialization/BinaryVectorReader.cs | 36 +++++++----- .../Serialization/BinaryVectorWriter.cs | 14 +++-- .../BinaryVectorSerializerTests.cs | 55 +++++++++---------- 4 files changed, 107 insertions(+), 47 deletions(-) diff --git a/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs b/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs index 82745fd126b..3846df477c8 100644 --- a/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs +++ b/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs @@ -15,6 +15,7 @@ using System; using System.Buffers.Binary; +using System.Runtime.InteropServices; namespace MongoDB.Bson.IO { @@ -31,35 +32,79 @@ public static void WriteDoubleLittleEndian(Span destination, double value) { BinaryPrimitives.WriteInt64LittleEndian(destination, BitConverter.DoubleToInt64Bits(value)); } + public static float ReadSingleLittleEndian(ReadOnlySpan source) { +#if NET6_0_OR_GREATER + return BinaryPrimitives.ReadSingleLittleEndian(source); +#else if (source.Length < 4) { throw new ArgumentOutOfRangeException(nameof(source), "Source span is too small to contain a float."); } + // Manually construct a 32-bit integer from 4 bytes in Little Endian order. + // BSON mandates that all multibyte values-including float32-must be encoded + // using Little Endian byte order, regardless of the system architecture. + // + // This method ensures platform-agnostic behavior by explicitly assembling + // the bytes in the correct order, rather than relying on the system's native endianness. + // + // Given a byte sequence [a, b, c, d], representing a float encoded in Little Endian, + // the expression below constructs the 32-bit integer as: + // intValue = a + (b << 8) + (c << 16) + (d << 24) + // + // This preserves the intended bit pattern when converting back to float using + // BitConverter.Int32BitsToSingle. + // + // Example: + // A float value of 1.0f is represented in IEEE-754 binary32 format as: + // [0x00, 0x00, 0x80, 0x3F] (Little Endian) + // On a Big Endian system, naive interpretation would yield an incorrect value, + // but this method assembles the int as: + // 0x00 + (0x00 << 8) + (0x80 << 16) + (0x3F << 24) = 0x3F800000, + // which correctly maps to 1.0f. + // + // This guarantees BSON-compliant serialization across all platforms. int intValue = source[0] | (source[1] << 8) | (source[2] << 16) | (source[3] << 24); - return BitConverter.Int32BitsToSingle(intValue); + // This struct emulates BitConverter.Int32BitsToSingle for platforms like net472. + return new FloatIntUnion { IntValue = intValue }.FloatValue; +#endif } public static void WriteSingleLittleEndian(Span destination, float value) { +#if NET6_0_OR_GREATER + BinaryPrimitives.WriteSingleLittleEndian(destination, value); +#else if (destination.Length < 4) { throw new ArgumentOutOfRangeException(nameof(destination), "Destination span is too small to hold a float."); } - int intValue = BitConverter.SingleToInt32Bits(value); + // This struct emulates BitConverter.SingleToInt32Bits for platforms like net472. + int intValue = new FloatIntUnion { FloatValue = value }.IntValue; + destination[0] = (byte)(intValue); destination[1] = (byte)(intValue >> 8); destination[2] = (byte)(intValue >> 16); destination[3] = (byte)(intValue >> 24); +#endif } + // This layout trick allows safely reinterpreting float as int and vice versa. + // It ensures identical memory layout for both fields, used for low-level bit conversion + // in environments like net472 which lack BitConverter.SingleToInt32Bits and its inverse. + [StructLayout(LayoutKind.Explicit)] + private struct FloatIntUnion + { + [FieldOffset(0)] public float FloatValue; + [FieldOffset(0)] public int IntValue; + } } } diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs index 30baa3cc444..91418b883a9 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs @@ -48,9 +48,7 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); } - var floatArray = BitConverter.IsLittleEndian // We need not to use this condition here, just doing to keep the little endian logic intact - ? MemoryMarshal.Cast(vectorDataBytes.Span).ToArray() - : ToFloatArrayBigEndian(vectorDataBytes.Span); + var floatArray = ReadSinglesArrayLittleEndian(vectorDataBytes.Span); items = (TItem[])(object)floatArray; break; case BinaryVectorDataType.Int8: @@ -119,6 +117,28 @@ TExpectedItem[] AsTypedArrayOrThrow() return result; } } + + private static float[] ReadSinglesArrayLittleEndian(ReadOnlySpan span) + { + if ((span.Length & 3) != 0) + { + throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); + } + int count = span.Length / 4; + float[] result = new float[count]; + if (BitConverter.IsLittleEndian) + { + MemoryMarshal.Cast(span).CopyTo(result); + } + else + { + for (int i = 0; i < count; i++) + { + result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4)); + } + } + return result; + } public static void ValidateItemType(BinaryVectorDataType binaryVectorDataType) { @@ -143,15 +163,5 @@ private static void ValidateItemTypeForBinaryVector span) - { - var count = span.Length / 4; - var result = new float[count]; - for (int i = 0; i < count; i++) - { - result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4)); - } - return result; - } } } diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs index 3774eb626e0..7ec961e891e 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs @@ -41,7 +41,7 @@ public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryV switch (binaryVectorDataType) { case BinaryVectorDataType.Float32: - int length = vectorData.Length * sizeof(float); + var length = vectorData.Length * sizeof(float); resultBytes = new byte[2 + length]; resultBytes[0] = (byte)binaryVectorDataType; resultBytes[1] = padding; @@ -49,10 +49,16 @@ public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryV var floatSpan = MemoryMarshal.Cast(vectorData); Span floatOutput = resultBytes.AsSpan(2); - for (int i = 0; i < floatSpan.Length; i++) + if (BitConverter.IsLittleEndian) { - BinaryPrimitivesCompat.WriteSingleLittleEndian(floatOutput, floatSpan[i]); - floatOutput = floatOutput.Slice(4); + MemoryMarshal.Cast(floatSpan).CopyTo(floatOutput); + } + else + { + for (int i = 0; i < floatSpan.Length; i++) + { + BinaryPrimitivesCompat.WriteSingleLittleEndian(floatOutput.Slice(i * 4, 4), floatSpan[i]); + } } return resultBytes; diff --git a/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs b/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs index b77aad21e31..99eec9324b9 100644 --- a/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs +++ b/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs @@ -365,35 +365,16 @@ private BsonBinaryData SerializeToBinaryData(TCollection collection private static (T[], byte[] VectorBson) GetTestData(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding) where T : struct { - var elementsSpan = new ReadOnlySpan( - Enumerable.Range(0, elementsCount) - .Select(i => Convert.ChangeType(i, typeof(T)).As()) - .ToArray()); - if (typeof(T) == typeof(float) && dataType == BinaryVectorDataType.Float32) - { - var buffer = new byte[2 + elementsCount * 4]; // 4 bytes per float - buffer[0] = (byte)dataType; - buffer[1] = bitsPadding; - for (int i = 0; i < elementsCount; i++) - { - var floatBytes = BitConverter.GetBytes((float)(object)elementsSpan[i]); - if (!BitConverter.IsLittleEndian) - { - Array.Reverse(floatBytes); - } - Buffer.BlockCopy(floatBytes, 0, buffer, 2 + i * 4, 4); - } - return (elementsSpan.ToArray(), buffer); - } - else if ((typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)) && (dataType == BinaryVectorDataType.Int8 || dataType == BinaryVectorDataType.PackedBit)) - { - byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. MemoryMarshal.Cast(elementsSpan)]; + var elementsSpan = new ReadOnlySpan( + Enumerable.Range(0, elementsCount) + .Select(i => Convert.ChangeType(i, typeof(T)).As()) + .ToArray()); + var elementsBytesLittleEndian = BitConverter.IsLittleEndian + ? MemoryMarshal.Cast(elementsSpan) + : ToLittleEndian(elementsSpan, dataType); + + byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. elementsBytesLittleEndian]; return (elementsSpan.ToArray(), vectorBsonData); - } - else - { - throw new NotSupportedException($"Type {typeof(T)} is not supported for data type {dataType}."); - } } private static (BinaryVector, byte[] VectorBson) GetTestDataBinaryVector(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding) @@ -442,5 +423,23 @@ public class BinaryVectorNoAttributeHolder public BinaryVectorFloat32 ValuesFloat { get; set; } } + + private static byte[] ToLittleEndian(ReadOnlySpan span, BinaryVectorDataType dataType) where T : struct + { + // Types that do NOT need conversion safe on BE + if (dataType == BinaryVectorDataType.Int8 || dataType == BinaryVectorDataType.PackedBit) + { + return MemoryMarshal.Cast(span).ToArray(); + } + int elementSize = Marshal.SizeOf(); + byte[] result = new byte[span.Length * elementSize]; + for (int i = 0; i < span.Length; i++) + { + byte[] bytes = BitConverter.GetBytes((dynamic)span[i]); + Array.Reverse(bytes); // Ensure LE order + Buffer.BlockCopy(bytes, 0, result, i * elementSize, elementSize); + } + return result; + } } } From 02579c183cc3a5bc818e2c2b77afcd099d88ff9f Mon Sep 17 00:00:00 2001 From: Medha Tiwari Date: Fri, 30 May 2025 11:39:05 +0200 Subject: [PATCH 8/9] resolved all the comments --- src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs | 28 +------- .../Serialization/BinaryVectorReader.cs | 16 ++--- .../Serialization/BinaryVectorWriter.cs | 15 ++-- .../IO/BinaryPrimitivesCompatTests.cs | 71 ++++++++++++++----- .../BinaryVectorSerializerTests.cs | 27 +++---- 5 files changed, 84 insertions(+), 73 deletions(-) diff --git a/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs b/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs index 3846df477c8..87ea9d48fb8 100644 --- a/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs +++ b/src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs @@ -43,29 +43,8 @@ public static float ReadSingleLittleEndian(ReadOnlySpan source) throw new ArgumentOutOfRangeException(nameof(source), "Source span is too small to contain a float."); } - // Manually construct a 32-bit integer from 4 bytes in Little Endian order. - // BSON mandates that all multibyte values-including float32-must be encoded - // using Little Endian byte order, regardless of the system architecture. - // - // This method ensures platform-agnostic behavior by explicitly assembling - // the bytes in the correct order, rather than relying on the system's native endianness. - // - // Given a byte sequence [a, b, c, d], representing a float encoded in Little Endian, - // the expression below constructs the 32-bit integer as: - // intValue = a + (b << 8) + (c << 16) + (d << 24) - // - // This preserves the intended bit pattern when converting back to float using - // BitConverter.Int32BitsToSingle. - // - // Example: - // A float value of 1.0f is represented in IEEE-754 binary32 format as: - // [0x00, 0x00, 0x80, 0x3F] (Little Endian) - // On a Big Endian system, naive interpretation would yield an incorrect value, - // but this method assembles the int as: - // 0x00 + (0x00 << 8) + (0x80 << 16) + (0x3F << 24) = 0x3F800000, - // which correctly maps to 1.0f. - // - // This guarantees BSON-compliant serialization across all platforms. + // Constructs a 32-bit float from 4 Little Endian bytes in a platform-agnostic way. + // Ensures correct bit pattern regardless of system endianness. int intValue = source[0] | (source[1] << 8) | @@ -97,9 +76,6 @@ public static void WriteSingleLittleEndian(Span destination, float value) #endif } - // This layout trick allows safely reinterpreting float as int and vice versa. - // It ensures identical memory layout for both fields, used for low-level bit conversion - // in environments like net472 which lack BitConverter.SingleToInt32Bits and its inverse. [StructLayout(LayoutKind.Explicit)] private struct FloatIntUnion { diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs index 91418b883a9..db8ba1633f8 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs @@ -42,12 +42,6 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) switch (vectorDataType) { case BinaryVectorDataType.Float32: - - if ((vectorDataBytes.Span.Length & 3) != 0) - { - throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); - } - var floatArray = ReadSinglesArrayLittleEndian(vectorDataBytes.Span); items = (TItem[])(object)floatArray; break; @@ -124,15 +118,17 @@ private static float[] ReadSinglesArrayLittleEndian(ReadOnlySpan span) { throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); } - int count = span.Length / 4; - float[] result = new float[count]; + + float[] result; if (BitConverter.IsLittleEndian) { - MemoryMarshal.Cast(span).CopyTo(result); + result = MemoryMarshal.Cast(span).ToArray(); } else { - for (int i = 0; i < count; i++) + var count = span.Length / 4; + result = new float[count]; + for (int i = 0; i < count; i++) { result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4)); } diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs index 7ec961e891e..fd73b9ec943 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs @@ -36,18 +36,17 @@ public static byte[] WriteToBytes(BinaryVector binaryVector) public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryVectorDataType binaryVectorDataType, byte padding) where TItem : struct { - byte[] resultBytes; - switch (binaryVectorDataType) { case BinaryVectorDataType.Float32: - var length = vectorData.Length * sizeof(float); - resultBytes = new byte[2 + length]; - resultBytes[0] = (byte)binaryVectorDataType; - resultBytes[1] = padding; + byte[] result; + var length = vectorData.Length * 4; + result = new byte[2 + length]; + result[0] = (byte)binaryVectorDataType; + result[1] = padding; var floatSpan = MemoryMarshal.Cast(vectorData); - Span floatOutput = resultBytes.AsSpan(2); + var floatOutput = result.AsSpan(2); if (BitConverter.IsLittleEndian) { @@ -61,7 +60,7 @@ public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryV } } - return resultBytes; + return result; case BinaryVectorDataType.Int8: case BinaryVectorDataType.PackedBit: diff --git a/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs b/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs index f5eeaf32c58..fe329a549fc 100644 --- a/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs +++ b/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs @@ -1,11 +1,65 @@ +/* Copyright 2010-present MongoDB Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + using System; using Xunit; +using FluentAssertions; using MongoDB.Bson.IO; namespace MongoDB.Bson.Tests.IO { public class BinaryPrimitivesCompatTests { + [Fact] + public void ReadSingleLittleEndian_should_read_correctly() + { + var bytes = new byte[] { 0x00, 0x00, 0x80, 0x3F }; // 1.0f in little endian + var result = BinaryPrimitivesCompat.ReadSingleLittleEndian(bytes); + result.Should().Be(1.0f); + } + + [Fact] + public void ReadSingleLittleEndian_should_throw_on_insufficient_length() + { + var shortBuffer = new byte[3]; + var exception = Record.Exception(() => + BinaryPrimitivesCompat.ReadSingleLittleEndian(shortBuffer)); + + exception.Should().BeOfType(); + exception.Message.Should().Contain("length"); + } + + [Fact] + public void WriteSingleLittleEndian_should_throw_on_insufficient_length() + { + var shortBuffer = new byte[3]; + var exception = Record.Exception(() => + BinaryPrimitivesCompat.WriteSingleLittleEndian(shortBuffer, 1.23f)); + + exception.Should().BeOfType(); + exception.Message.Should().Contain("length"); + } + + [Fact] + public void WriteSingleLittleEndian_should_write_correctly() + { + Span buffer = new byte[4]; + BinaryPrimitivesCompat.WriteSingleLittleEndian(buffer, 1.0f); + buffer.ToArray().Should().Equal(0x00, 0x00, 0x80, 0x3F); // 1.0f little-endian + } + [Theory] [InlineData(0f)] [InlineData(1.0f)] @@ -31,22 +85,5 @@ public void WriteAndReadSingleLittleEndian_should_roundtrip_correctly(float valu Assert.Equal(value, result); } } - - [Fact] - public void ReadSingleLittleEndian_should_throw_on_insufficient_length() - { - var shortBuffer = new byte[3]; - Assert.Throws(() => - BinaryPrimitivesCompat.ReadSingleLittleEndian(shortBuffer)); - } - - [Fact] - public void WriteSingleLittleEndian_should_throw_on_insufficient_length() - { - var shortBuffer = new byte[3]; - Assert.Throws(() => - BinaryPrimitivesCompat.WriteSingleLittleEndian(shortBuffer, 1.23f)); - } } } - diff --git a/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs b/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs index 99eec9324b9..274e93de373 100644 --- a/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs +++ b/tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs @@ -371,7 +371,7 @@ private static (T[], byte[] VectorBson) GetTestData(BinaryVectorDataType data .ToArray()); var elementsBytesLittleEndian = BitConverter.IsLittleEndian ? MemoryMarshal.Cast(elementsSpan) - : ToLittleEndian(elementsSpan, dataType); + : BigEndianToLittleEndian(elementsSpan, dataType); byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. elementsBytesLittleEndian]; return (elementsSpan.ToArray(), vectorBsonData); @@ -415,31 +415,34 @@ private static IBsonSerializer CreateBinaryVectorSerializer(BinaryVectorDataT return serializer; } - public class BinaryVectorNoAttributeHolder - { - public BinaryVectorInt8 ValuesInt8 { get; set; } - - public BinaryVectorPackedBit ValuesPackedBit { get; set; } - - public BinaryVectorFloat32 ValuesFloat { get; set; } - } - - private static byte[] ToLittleEndian(ReadOnlySpan span, BinaryVectorDataType dataType) where T : struct + private static byte[] BigEndianToLittleEndian(ReadOnlySpan span, BinaryVectorDataType dataType) where T : struct { // Types that do NOT need conversion safe on BE if (dataType == BinaryVectorDataType.Int8 || dataType == BinaryVectorDataType.PackedBit) { return MemoryMarshal.Cast(span).ToArray(); } - int elementSize = Marshal.SizeOf(); + + var elementSize = Marshal.SizeOf(); byte[] result = new byte[span.Length * elementSize]; + for (int i = 0; i < span.Length; i++) { byte[] bytes = BitConverter.GetBytes((dynamic)span[i]); Array.Reverse(bytes); // Ensure LE order Buffer.BlockCopy(bytes, 0, result, i * elementSize, elementSize); } + return result; } + + public class BinaryVectorNoAttributeHolder + { + public BinaryVectorInt8 ValuesInt8 { get; set; } + + public BinaryVectorPackedBit ValuesPackedBit { get; set; } + + public BinaryVectorFloat32 ValuesFloat { get; set; } + } } } From c547a1504f0e7aee30380c79d4023b7fbd2aa94d Mon Sep 17 00:00:00 2001 From: Medha Tiwari Date: Fri, 30 May 2025 20:59:52 +0200 Subject: [PATCH 9/9] another set of changes to resolve minor issues Signed-off-by: Medha Tiwari --- src/MongoDB.Bson/Serialization/BinaryVectorReader.cs | 4 ++-- src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs | 3 +-- .../MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs | 8 ++++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs index db8ba1633f8..3b751f3c9d6 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs @@ -127,8 +127,8 @@ private static float[] ReadSinglesArrayLittleEndian(ReadOnlySpan span) else { var count = span.Length / 4; - result = new float[count]; - for (int i = 0; i < count; i++) + result = new float[count]; + for (int i = 0; i < count; i++) { result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4)); } diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs index fd73b9ec943..9f1ba73b075 100644 --- a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs +++ b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs @@ -39,9 +39,8 @@ public static byte[] WriteToBytes(ReadOnlySpan vectorData, BinaryV switch (binaryVectorDataType) { case BinaryVectorDataType.Float32: - byte[] result; var length = vectorData.Length * 4; - result = new byte[2 + length]; + var result = new byte[2 + length]; result[0] = (byte)binaryVectorDataType; result[1] = padding; diff --git a/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs b/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs index fe329a549fc..03ff78e467c 100644 --- a/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs +++ b/tests/MongoDB.Bson.Tests/IO/BinaryPrimitivesCompatTests.cs @@ -37,8 +37,8 @@ public void ReadSingleLittleEndian_should_throw_on_insufficient_length() var exception = Record.Exception(() => BinaryPrimitivesCompat.ReadSingleLittleEndian(shortBuffer)); - exception.Should().BeOfType(); - exception.Message.Should().Contain("length"); + var e = exception.Should().BeOfType().Subject; + e.ParamName.Should().Be("length"); } [Fact] @@ -48,8 +48,8 @@ public void WriteSingleLittleEndian_should_throw_on_insufficient_length() var exception = Record.Exception(() => BinaryPrimitivesCompat.WriteSingleLittleEndian(shortBuffer, 1.23f)); - exception.Should().BeOfType(); - exception.Message.Should().Contain("length"); + var e = exception.Should().BeOfType().Subject; + e.ParamName.Should().Be("length"); } [Fact]