Skip to content

Optimize BsonArray Index encoding #1673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions bson/src/main/org/bson/BsonBinaryWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.List;
import java.util.Stack;

import static java.lang.Math.max;
import static java.lang.String.format;
import static org.bson.assertions.Assertions.notNull;

Expand All @@ -37,13 +38,34 @@ public class BsonBinaryWriter extends AbstractBsonWriter {

private final BsonOutput bsonOutput;
private final Stack<Integer> maxDocumentSizeStack = new Stack<>();
private static final int ARRAY_INDEXES_CACHE_SIZE = 256;
private static final String[] ARRAY_INDEXES_CACHE = new String[ARRAY_INDEXES_CACHE_SIZE];
private static final int ARRAY_INDEXES_CACHE_SIZE = 1000;
private static final byte[] ARRAY_INDEXES_BUFFER;
Copy link
Member Author

@vbabanin vbabanin Apr 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In prior benchmarks, the reference-chasing layout (i.e., byte[][]) showed ~25% lower throughput compared to a flat byte[] layout, primarily due to fragmented sequential locality and indirect memory access.

private static final int[] ARRAY_INDEXES_OFFSETS;
private static final int[] ARRAY_INDEXES_LENGTHS;
private Mark mark;

static {
ARRAY_INDEXES_LENGTHS = new int[ARRAY_INDEXES_CACHE_SIZE];
ARRAY_INDEXES_OFFSETS = new int[ARRAY_INDEXES_CACHE_SIZE];
int totalSize = 0;
for (int i = 0; i < ARRAY_INDEXES_CACHE_SIZE; i++) {
ARRAY_INDEXES_CACHE[i] = Integer.toString(i);
totalSize += (int) (Math.log10(max(i, 1))
+ 1 // number of digits
+ 1); // +1 for null terminator
}
ARRAY_INDEXES_BUFFER = new byte[totalSize];

// Fill buffer
int offset = 0;
for (int i = 0; i < ARRAY_INDEXES_CACHE_SIZE; i++) {
String string = Integer.toString(i);
int length = string.length();
for (int j = 0; j < length; j++) {
ARRAY_INDEXES_BUFFER[offset++] = (byte) string.charAt(j);
}
ARRAY_INDEXES_BUFFER[offset++] = 0;
ARRAY_INDEXES_OFFSETS[i] = offset - (length + 1);
ARRAY_INDEXES_LENGTHS[i] = length + 1; // +1 for null terminator
}
}

Expand Down Expand Up @@ -409,7 +431,9 @@ private void writeCurrentName() {
if (index >= ARRAY_INDEXES_CACHE_SIZE) {
bsonOutput.writeCString(Integer.toString(index));
} else {
bsonOutput.writeCString(ARRAY_INDEXES_CACHE[index]);
bsonOutput.writeBytes(ARRAY_INDEXES_BUFFER,
ARRAY_INDEXES_OFFSETS[index],
ARRAY_INDEXES_LENGTHS[index]);
}
} else {
bsonOutput.writeCString(getName());
Expand Down
8 changes: 4 additions & 4 deletions bson/src/test/unit/org/bson/BsonBinaryWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public void shouldThrowWhenMaxDocumentSizeIsExceeded() {
writer.writeEndDocument();
fail();
} catch (BsonMaximumSizeExceededException e) {
assertEquals("Document size of 1037 is larger than maximum of 1024.", e.getMessage());
assertEquals("Document size of 12917 is larger than maximum of 12904.", e.getMessage());
Copy link
Member

@rozza rozza Apr 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did this get missed last time? Yes it did - thanks for fixing.

}
}

Expand Down Expand Up @@ -201,16 +201,16 @@ public void testWriteArray() {
public void testWriteArrayElements() throws IOException {
ByteArrayOutputStream expectedOutput = new ByteArrayOutputStream();
expectedOutput.write(new byte[]{
88, 11, 0, 0, //document length
-52, 25, 0, 0, //document length
4, // array type
97, 49, 0, // "a1" name + null terminator
79, 11, 0, 0}); // array length
-61, 25, 0, 0}); // array length


writer.writeStartDocument();
writer.writeStartArray("a1");
int arrayIndex = 0;
while (arrayIndex < 500) {
while (arrayIndex < 1100) {
writer.writeBoolean(true);

expectedOutput.write(BsonType.BOOLEAN.getValue());
Expand Down