Skip to content

Optimize BSON decoding #1667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Apr 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9e109b4
Optimize BSON decoding
vbabanin Mar 31, 2025
c91b341
Merge branch 'refs/heads/main' into string-read-optm
vbabanin Apr 3, 2025
04965cd
Add scratch buffer read optimization.
vbabanin Apr 3, 2025
955b8c5
Remove TODO.
vbabanin Apr 3, 2025
dda00e3
Address static check warnings.
vbabanin Apr 3, 2025
579e2b1
Fix index calculation.
vbabanin Apr 3, 2025
8914813
Allocate direct buffer in tests.
vbabanin Apr 3, 2025
9422cf9
Remove redundant branching.
vbabanin Apr 4, 2025
95e890f
Split when & then comments.
vbabanin Apr 4, 2025
84e7728
Rename tests.
vbabanin Apr 4, 2025
1306fe8
Merge branch 'main' into string-read-optm
vbabanin Apr 5, 2025
a3d3f44
Rename method.
vbabanin Apr 5, 2025
0266a90
Merge remote-tracking branch 'vbabanin/string-read-optm' into string-…
vbabanin Apr 5, 2025
bdfe6d2
Fix static checks.
vbabanin Apr 8, 2025
95ab04b
Fix test.
vbabanin Apr 9, 2025
7199945
Add comments.
vbabanin Apr 12, 2025
d43b83d
Merge branch 'refs/heads/main' into string-read-optm
vbabanin Apr 17, 2025
7fc074f
Merge branch 'main' into string-read-optm
vbabanin Apr 17, 2025
5792d35
Fix ByteBufferBsonOutput buffer caching logic.
vbabanin Apr 17, 2025
0084f4f
Revert SWAR optimization.
vbabanin Apr 23, 2025
cf51555
Remove PlatformUtil.
vbabanin Apr 24, 2025
9c20c99
Fix static checks.
vbabanin Apr 24, 2025
a094261
Merge branch 'main' into string-read-optm
vbabanin Apr 24, 2025
ce754d6
Make computeCStringLength private.
vbabanin Apr 24, 2025
4718ea9
Merge branch 'main' into string-read-optm
vbabanin Apr 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bson/src/main/org/bson/ByteBuf.java
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ public interface ByteBuf {
* @return {@code true} if, and only if, this buffer is backed by an array and is not read-only
* @since 5.5
*/
boolean hasArray();
boolean isBackedByArray();

/**
* Returns the offset of the first byte within the backing byte array of
Expand Down
2 changes: 1 addition & 1 deletion bson/src/main/org/bson/ByteBufNIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public byte[] array() {
}

@Override
public boolean hasArray() {
public boolean isBackedByArray() {
return buf.hasArray();
}

Expand Down
63 changes: 46 additions & 17 deletions bson/src/main/org/bson/io/ByteBufferBsonInput.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@
public class ByteBufferBsonInput implements BsonInput {

private static final String[] ONE_BYTE_ASCII_STRINGS = new String[Byte.MAX_VALUE + 1];
/* A dynamically sized scratch buffer, that is reused across BSON String reads:
* 1. Reduces garbage collection by avoiding new byte array creation.
* 2. Improves cache utilization through temporal locality.
* 3. Avoids JVM allocation and zeroing cost for new memory allocations.
*/
private byte[] scratchBuffer;


static {
for (int b = 0; b < ONE_BYTE_ASCII_STRINGS.length; b++) {
Expand Down Expand Up @@ -127,15 +134,12 @@ public String readString() {

@Override
public String readCString() {
int mark = buffer.position();
skipCString();
int size = buffer.position() - mark;
buffer.position(mark);
int size = computeCStringLength(buffer.position());
return readString(size);
}

private String readString(final int size) {
if (size == 2) {
private String readString(final int bsonStringSize) {
if (bsonStringSize == 2) {
byte asciiByte = buffer.get(); // if only one byte in the string, it must be ascii.
byte nullByte = buffer.get(); // read null terminator
if (nullByte != 0) {
Expand All @@ -146,26 +150,51 @@ private String readString(final int size) {
}
return ONE_BYTE_ASCII_STRINGS[asciiByte]; // this will throw if asciiByte is negative
} else {
byte[] bytes = new byte[size - 1];
buffer.get(bytes);
byte nullByte = buffer.get();
if (nullByte != 0) {
throw new BsonSerializationException("Found a BSON string that is not null-terminated");
if (buffer.isBackedByArray()) {
int position = buffer.position();
int arrayOffset = buffer.arrayOffset();
int newPosition = position + bsonStringSize;
buffer.position(newPosition);

byte[] array = buffer.array();
if (array[arrayOffset + newPosition - 1] != 0) {
throw new BsonSerializationException("Found a BSON string that is not null-terminated");
}
return new String(array, arrayOffset + position, bsonStringSize - 1, StandardCharsets.UTF_8);
} else if (scratchBuffer == null || bsonStringSize > scratchBuffer.length) {
int scratchBufferSize = bsonStringSize + (bsonStringSize >>> 1); //1.5 times the size
scratchBuffer = new byte[scratchBufferSize];
}

buffer.get(scratchBuffer, 0, bsonStringSize);
if (scratchBuffer[bsonStringSize - 1] != 0) {
throw new BsonSerializationException("BSON string not null-terminated");
}
return new String(bytes, StandardCharsets.UTF_8);
return new String(scratchBuffer, 0, bsonStringSize - 1, StandardCharsets.UTF_8);
}
}

@Override
public void skipCString() {
ensureOpen();
boolean checkNext = true;
while (checkNext) {
if (!buffer.hasRemaining()) {
throw new BsonSerializationException("Found a BSON string that is not null-terminated");
int pos = buffer.position();
int length = computeCStringLength(pos);
buffer.position(pos + length);
}

private int computeCStringLength(final int prevPos) {
ensureOpen();
int pos = buffer.position();
int limit = buffer.limit();

while (pos < limit) {
if (buffer.get(pos++) == 0) {
return (pos - prevPos);
}
checkNext = buffer.get() != 0;
}

buffer.position(pos);
throw new BsonSerializationException("Found a BSON string that is not null-terminated");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati
int curBufferLimit = curBuffer.limit();
int remaining = curBufferLimit - curBufferPos;

if (curBuffer.hasArray()) {
if (curBuffer.isBackedByArray()) {
byte[] dst = curBuffer.array();
int arrayOffset = curBuffer.arrayOffset();
if (remaining >= str.length() + 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ public byte[] array() {
}

@Override
public boolean hasArray() {
public boolean isBackedByArray() {
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,15 @@ <T extends BsonDocument> T getResponseDocument(final int messageId, final Decode
}

/**
* Returns a read-only buffer containing the response body. Care should be taken to not use the returned buffer after this instance has
* Returns a buffer containing the response body. Care should be taken to not use the returned buffer after this instance has
* been closed.
*
* @return a read-only buffer containing the response body
* NOTE: do not modify this buffer, it is being made writable for performance reasons to avoid redundant copying.
*
* @return a buffer containing the response body
*/
public ByteBuf getBodyByteBuffer() {
return bodyByteBuffer.asReadOnly();
return bodyByteBuffer;
}

public void reset() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ public byte[] array() {
}

@Override
public boolean hasArray() {
public boolean isBackedByArray() {
return proxied.hasArray();
}

Expand Down
Loading