-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Add Big Endian Support for Float32 in BinaryVectorWriter.WriteToBytes<T>() #1682
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ff89368
2c2cae1
2c4a16a
0ee1694
f43d935
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,5 +31,35 @@ public static void WriteDoubleLittleEndian(Span<byte> destination, double value) | |
{ | ||
BinaryPrimitives.WriteInt64LittleEndian(destination, BitConverter.DoubleToInt64Bits(value)); | ||
} | ||
public static float ReadSingleLittleEndian(ReadOnlySpan<byte> source) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please add a fallback to |
||
{ | ||
if (source.Length < 4) | ||
{ | ||
throw new ArgumentOutOfRangeException(nameof(source), "Source span is too small to contain a float."); | ||
} | ||
|
||
int intValue = | ||
source[0] | | ||
(source[1] << 8) | | ||
(source[2] << 16) | | ||
(source[3] << 24); | ||
|
||
return BitConverter.Int32BitsToSingle(intValue); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Making sure, does this conversion to We need these methods to support both platforms. |
||
} | ||
|
||
public static void WriteSingleLittleEndian(Span<byte> destination, float value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need unit tests tests for new methods. |
||
{ | ||
if (destination.Length < 4) | ||
{ | ||
throw new ArgumentOutOfRangeException(nameof(destination), "Destination span is too small to hold a float."); | ||
} | ||
|
||
int intValue = BitConverter.SingleToInt32Bits(value); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This fails on net472. |
||
destination[0] = (byte)(intValue); | ||
destination[1] = (byte)(intValue >> 8); | ||
destination[2] = (byte)(intValue >> 16); | ||
destination[3] = (byte)(intValue >> 24); | ||
} | ||
|
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Runtime.InteropServices; | ||
using MongoDB.Bson.IO; | ||
|
||
namespace MongoDB.Bson.Serialization | ||
{ | ||
|
@@ -47,15 +48,10 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType) | |
throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes."); | ||
} | ||
|
||
if (BitConverter.IsLittleEndian) | ||
{ | ||
var singles = MemoryMarshal.Cast<byte, float>(vectorDataBytes.Span); | ||
items = (TItem[])(object)singles.ToArray(); | ||
} | ||
else | ||
{ | ||
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); | ||
} | ||
var floatArray = BitConverter.IsLittleEndian // We need not to use this condition here, just doing to keep the little endian logic intact | ||
? MemoryMarshal.Cast<byte, float>(vectorDataBytes.Span).ToArray() | ||
: ToFloatArrayBigEndian(vectorDataBytes.Span); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now thinking more about this, I think |
||
items = (TItem[])(object)floatArray; | ||
break; | ||
case BinaryVectorDataType.Int8: | ||
var itemsSpan = MemoryMarshal.Cast<byte, TItem>(vectorDataBytes.Span); | ||
|
@@ -147,5 +143,15 @@ private static void ValidateItemTypeForBinaryVector<TItem, TItemExpectedType, TB | |
throw new NotSupportedException($"Expected {typeof(TItemExpectedType)} for {typeof(TBinaryVectorType)}, but found {typeof(TItem)}."); | ||
} | ||
} | ||
private static float[] ToFloatArrayBigEndian(ReadOnlySpan<byte> span) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add an empty line before the method, and locate it in alphabetical order relative to other private methods. |
||
{ | ||
var count = span.Length / 4; | ||
var result = new float[count]; | ||
for (int i = 0; i < count; i++) | ||
{ | ||
result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4)); | ||
} | ||
return result; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
|
||
using System; | ||
using System.Runtime.InteropServices; | ||
using MongoDB.Bson.IO; | ||
|
||
namespace MongoDB.Bson.Serialization | ||
{ | ||
|
@@ -35,15 +36,35 @@ public static byte[] WriteToBytes<TItem>(BinaryVector<TItem> binaryVector) | |
public static byte[] WriteToBytes<TItem>(ReadOnlySpan<TItem> vectorData, BinaryVectorDataType binaryVectorDataType, byte padding) | ||
where TItem : struct | ||
{ | ||
if (!BitConverter.IsLittleEndian) | ||
byte[] resultBytes; | ||
|
||
switch (binaryVectorDataType) | ||
{ | ||
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet."); | ||
} | ||
case BinaryVectorDataType.Float32: | ||
int length = vectorData.Length * sizeof(float); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor: Prefer |
||
resultBytes = new byte[2 + length]; | ||
resultBytes[0] = (byte)binaryVectorDataType; | ||
resultBytes[1] = padding; | ||
|
||
var floatSpan = MemoryMarshal.Cast<TItem, float>(vectorData); | ||
Span<byte> floatOutput = resultBytes.AsSpan(2); | ||
|
||
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData); | ||
byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; | ||
for (int i = 0; i < floatSpan.Length; i++) | ||
{ | ||
BinaryPrimitivesCompat.WriteSingleLittleEndian(floatOutput, floatSpan[i]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as previously with Read. We don't want to introduce a loops instead of simple cast in the case of little endian. |
||
floatOutput = floatOutput.Slice(4); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use consistent convention for slicing spans both in Read and Write. |
||
} | ||
|
||
return result; | ||
return resultBytes; | ||
|
||
case BinaryVectorDataType.Int8: | ||
case BinaryVectorDataType.PackedBit: | ||
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData); | ||
return [(byte)binaryVectorDataType, padding, .. vectorDataBytes]; | ||
|
||
default: | ||
throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType}."); | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -365,10 +365,35 @@ private BsonBinaryData SerializeToBinaryData<TCollection>(TCollection collection | |
private static (T[], byte[] VectorBson) GetTestData<T>(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding) | ||
where T : struct | ||
{ | ||
var elementsSpan = new ReadOnlySpan<T>(Enumerable.Range(0, elementsCount).Select(i => Convert.ChangeType(i, typeof(T)).As<T>()).ToArray()); | ||
byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. MemoryMarshal.Cast<T, byte>(elementsSpan)]; | ||
|
||
return (elementsSpan.ToArray(), vectorBsonData); | ||
var elementsSpan = new ReadOnlySpan<T>( | ||
Enumerable.Range(0, elementsCount) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The indentation should be:
|
||
.Select(i => Convert.ChangeType(i, typeof(T)).As<T>()) | ||
.ToArray()); | ||
if (typeof(T) == typeof(float) && dataType == BinaryVectorDataType.Float32) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would suggest the following alternative:
While |
||
{ | ||
var buffer = new byte[2 + elementsCount * 4]; // 4 bytes per float | ||
buffer[0] = (byte)dataType; | ||
buffer[1] = bitsPadding; | ||
for (int i = 0; i < elementsCount; i++) | ||
{ | ||
var floatBytes = BitConverter.GetBytes((float)(object)elementsSpan[i]); | ||
if (!BitConverter.IsLittleEndian) | ||
{ | ||
Array.Reverse(floatBytes); | ||
} | ||
Buffer.BlockCopy(floatBytes, 0, buffer, 2 + i * 4, 4); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While we should not be relying on |
||
} | ||
return (elementsSpan.ToArray(), buffer); | ||
} | ||
else if ((typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)) && (dataType == BinaryVectorDataType.Int8 || dataType == BinaryVectorDataType.PackedBit)) | ||
{ | ||
byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. MemoryMarshal.Cast<T, byte>(elementsSpan)]; | ||
return (elementsSpan.ToArray(), vectorBsonData); | ||
} | ||
else | ||
{ | ||
throw new NotSupportedException($"Type {typeof(T)} is not supported for data type {dataType}."); | ||
} | ||
} | ||
|
||
private static (BinaryVector<T>, byte[] VectorBson) GetTestDataBinaryVector<T>(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add an empty time before the new method, and place it in alphabetical order.