Skip to content

Commit ea85540

Browse files
authored
Optimize String write (#1651)
- Remove redundant bounds checking. - Add ASCII fast-path. - Add tests for UTF-8 encoding across buffer boundaries and malformed surrogate pairs. - Fix ByteBuf leak in pipe() method - two buffers were not released. JAVA-5816
1 parent 04b9f85 commit ea85540

File tree

8 files changed

+1039
-151
lines changed

8 files changed

+1039
-151
lines changed

bson/src/main/org/bson/ByteBuf.java

+20
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,26 @@ public interface ByteBuf {
184184
*/
185185
byte[] array();
186186

187+
/**
188+
* <p>States whether this buffer is backed by an accessible byte array.</p>
189+
*
190+
* <p>If this method returns {@code true} then the {@link #array()} and {@link #arrayOffset()} methods may safely be invoked.</p>
191+
*
192+
* @return {@code true} if, and only if, this buffer is backed by an array and is not read-only
193+
* @since 5.5
194+
*/
195+
boolean hasArray();
196+
197+
/**
198+
* Returns the offset of the first byte within the backing byte array of
199+
* this buffer.
200+
*
201+
* @throws java.nio.ReadOnlyBufferException If this buffer is backed by an array but is read-only
202+
* @throws UnsupportedOperationException if this buffer is not backed by an accessible array
203+
* @since 5.5
204+
*/
205+
int arrayOffset();
206+
187207
/**
188208
* Returns this buffer's limit.
189209
*

bson/src/main/org/bson/ByteBufNIO.java

+10
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,16 @@ public byte[] array() {
132132
return buf.array();
133133
}
134134

135+
@Override
136+
public boolean hasArray() {
137+
return buf.hasArray();
138+
}
139+
140+
@Override
141+
public int arrayOffset() {
142+
return buf.arrayOffset();
143+
}
144+
135145
@Override
136146
public int limit() {
137147
return buf.limit();

bson/src/main/org/bson/io/OutputBuffer.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ public void writeLong(final long value) {
197197
writeInt64(value);
198198
}
199199

200-
private int writeCharacters(final String str, final boolean checkForNullCharacters) {
200+
protected int writeCharacters(final String str, final boolean checkForNullCharacters) {
201201
int len = str.length();
202202
int total = 0;
203203

driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java

+194-10
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package com.mongodb.internal.connection;
1818

19+
import org.bson.BsonSerializationException;
1920
import org.bson.ByteBuf;
2021
import org.bson.io.OutputBuffer;
2122

@@ -25,8 +26,10 @@
2526
import java.util.ArrayList;
2627
import java.util.List;
2728

29+
import static com.mongodb.assertions.Assertions.assertFalse;
2830
import static com.mongodb.assertions.Assertions.assertTrue;
2931
import static com.mongodb.assertions.Assertions.notNull;
32+
import static java.lang.String.format;
3033

3134
/**
3235
* <p>This class is not part of the public API and may be removed or changed at any time</p>
@@ -182,11 +185,17 @@ private ByteBuf getCurrentByteBuffer() {
182185
return currentByteBuffer;
183186
}
184187

188+
private ByteBuf getNextByteBuffer() {
189+
assertFalse(bufferList.get(curBufferIndex).hasRemaining());
190+
return getByteBufferAtIndex(++curBufferIndex);
191+
}
192+
185193
private ByteBuf getByteBufferAtIndex(final int index) {
186194
if (bufferList.size() < index + 1) {
187-
bufferList.add(bufferProvider.getBuffer(index >= (MAX_SHIFT - INITIAL_SHIFT)
188-
? MAX_BUFFER_SIZE
189-
: Math.min(INITIAL_BUFFER_SIZE << index, MAX_BUFFER_SIZE)));
195+
ByteBuf buffer = bufferProvider.getBuffer(index >= (MAX_SHIFT - INITIAL_SHIFT)
196+
? MAX_BUFFER_SIZE
197+
: Math.min(INITIAL_BUFFER_SIZE << index, MAX_BUFFER_SIZE));
198+
bufferList.add(buffer);
190199
}
191200
return bufferList.get(index);
192201
}
@@ -229,6 +238,16 @@ public List<ByteBuf> getByteBuffers() {
229238
return buffers;
230239
}
231240

241+
public List<ByteBuf> getDuplicateByteBuffers() {
242+
ensureOpen();
243+
244+
List<ByteBuf> buffers = new ArrayList<>(bufferList.size());
245+
for (final ByteBuf cur : bufferList) {
246+
buffers.add(cur.duplicate().order(ByteOrder.LITTLE_ENDIAN));
247+
}
248+
return buffers;
249+
}
250+
232251

233252
@Override
234253
public int pipe(final OutputStream out) throws IOException {
@@ -237,14 +256,18 @@ public int pipe(final OutputStream out) throws IOException {
237256
byte[] tmp = new byte[INITIAL_BUFFER_SIZE];
238257

239258
int total = 0;
240-
for (final ByteBuf cur : getByteBuffers()) {
241-
ByteBuf dup = cur.duplicate();
242-
while (dup.hasRemaining()) {
243-
int numBytesToCopy = Math.min(dup.remaining(), tmp.length);
244-
dup.get(tmp, 0, numBytesToCopy);
245-
out.write(tmp, 0, numBytesToCopy);
259+
List<ByteBuf> byteBuffers = getByteBuffers();
260+
try {
261+
for (final ByteBuf cur : byteBuffers) {
262+
while (cur.hasRemaining()) {
263+
int numBytesToCopy = Math.min(cur.remaining(), tmp.length);
264+
cur.get(tmp, 0, numBytesToCopy);
265+
out.write(tmp, 0, numBytesToCopy);
266+
}
267+
total += cur.limit();
246268
}
247-
total += dup.limit();
269+
} finally {
270+
byteBuffers.forEach(ByteBuf::release);
248271
}
249272
return total;
250273
}
@@ -370,4 +393,165 @@ private static final class BufferPositionPair {
370393
this.position = position;
371394
}
372395
}
396+
397+
protected int writeCharacters(final String str, final boolean checkNullTermination) {
398+
int stringLength = str.length();
399+
int sp = 0;
400+
int prevPos = position;
401+
402+
ByteBuf curBuffer = getCurrentByteBuffer();
403+
int curBufferPos = curBuffer.position();
404+
int curBufferLimit = curBuffer.limit();
405+
int remaining = curBufferLimit - curBufferPos;
406+
407+
if (curBuffer.hasArray()) {
408+
byte[] dst = curBuffer.array();
409+
int arrayOffset = curBuffer.arrayOffset();
410+
if (remaining >= str.length() + 1) {
411+
// Write ASCII characters directly to the array until we hit a non-ASCII character.
412+
sp = writeOnArrayAscii(str, dst, arrayOffset + curBufferPos, checkNullTermination);
413+
curBufferPos += sp;
414+
// If the whole string was written as ASCII, append the null terminator.
415+
if (sp == stringLength) {
416+
dst[arrayOffset + curBufferPos++] = 0;
417+
position += sp + 1;
418+
curBuffer.position(curBufferPos);
419+
return sp + 1;
420+
}
421+
// Otherwise, update the position to reflect the partial write.
422+
position += sp;
423+
curBuffer.position(curBufferPos);
424+
}
425+
}
426+
427+
// We get here, when the buffer is not backed by an array, or when the string contains at least one non-ASCII characters.
428+
return writeOnBuffers(str,
429+
checkNullTermination,
430+
sp,
431+
stringLength,
432+
curBufferLimit,
433+
curBufferPos,
434+
curBuffer,
435+
prevPos);
436+
}
437+
438+
private int writeOnBuffers(final String str,
439+
final boolean checkNullTermination,
440+
final int stringPointer,
441+
final int stringLength,
442+
final int bufferLimit,
443+
final int bufferPos,
444+
final ByteBuf buffer,
445+
final int prevPos) {
446+
int remaining;
447+
int sp = stringPointer;
448+
int curBufferPos = bufferPos;
449+
int curBufferLimit = bufferLimit;
450+
ByteBuf curBuffer = buffer;
451+
while (sp < stringLength) {
452+
remaining = curBufferLimit - curBufferPos;
453+
int c = str.charAt(sp);
454+
455+
if (checkNullTermination && c == 0x0) {
456+
throw new BsonSerializationException(
457+
format("BSON cstring '%s' is not valid because it contains a null character " + "at index %d", str, sp));
458+
}
459+
460+
if (c < 0x80) {
461+
if (remaining == 0) {
462+
curBuffer = getNextByteBuffer();
463+
curBufferPos = 0;
464+
curBufferLimit = curBuffer.limit();
465+
}
466+
curBuffer.put((byte) c);
467+
curBufferPos++;
468+
position++;
469+
} else if (c < 0x800) {
470+
if (remaining < 2) {
471+
// Not enough space: use write() to handle buffer boundary
472+
write((byte) (0xc0 + (c >> 6)));
473+
write((byte) (0x80 + (c & 0x3f)));
474+
475+
curBuffer = getCurrentByteBuffer();
476+
curBufferPos = curBuffer.position();
477+
curBufferLimit = curBuffer.limit();
478+
} else {
479+
curBuffer.put((byte) (0xc0 + (c >> 6)));
480+
curBuffer.put((byte) (0x80 + (c & 0x3f)));
481+
curBufferPos += 2;
482+
position += 2;
483+
}
484+
} else {
485+
// Handle multibyte characters (may involve surrogate pairs).
486+
c = Character.codePointAt(str, sp);
487+
/*
488+
Malformed surrogate pairs are encoded as-is (3 byte code unit) without substituting any code point.
489+
This known deviation from the spec and current functionality remains for backward compatibility.
490+
Ticket: JAVA-5575
491+
*/
492+
if (c < 0x10000) {
493+
if (remaining < 3) {
494+
write((byte) (0xe0 + (c >> 12)));
495+
write((byte) (0x80 + ((c >> 6) & 0x3f)));
496+
write((byte) (0x80 + (c & 0x3f)));
497+
498+
curBuffer = getCurrentByteBuffer();
499+
curBufferPos = curBuffer.position();
500+
curBufferLimit = curBuffer.limit();
501+
} else {
502+
curBuffer.put((byte) (0xe0 + (c >> 12)));
503+
curBuffer.put((byte) (0x80 + ((c >> 6) & 0x3f)));
504+
curBuffer.put((byte) (0x80 + (c & 0x3f)));
505+
curBufferPos += 3;
506+
position += 3;
507+
}
508+
} else {
509+
if (remaining < 4) {
510+
write((byte) (0xf0 + (c >> 18)));
511+
write((byte) (0x80 + ((c >> 12) & 0x3f)));
512+
write((byte) (0x80 + ((c >> 6) & 0x3f)));
513+
write((byte) (0x80 + (c & 0x3f)));
514+
515+
curBuffer = getCurrentByteBuffer();
516+
curBufferPos = curBuffer.position();
517+
curBufferLimit = curBuffer.limit();
518+
} else {
519+
curBuffer.put((byte) (0xf0 + (c >> 18)));
520+
curBuffer.put((byte) (0x80 + ((c >> 12) & 0x3f)));
521+
curBuffer.put((byte) (0x80 + ((c >> 6) & 0x3f)));
522+
curBuffer.put((byte) (0x80 + (c & 0x3f)));
523+
curBufferPos += 4;
524+
position += 4;
525+
}
526+
}
527+
}
528+
sp += Character.charCount(c);
529+
}
530+
531+
getCurrentByteBuffer().put((byte) 0);
532+
position++;
533+
return position - prevPos;
534+
}
535+
536+
private static int writeOnArrayAscii(final String str,
537+
final byte[] dst,
538+
final int arrayPosition,
539+
final boolean checkNullTermination) {
540+
int pos = arrayPosition;
541+
int sp = 0;
542+
// Fast common path: This tight loop is JIT-friendly (simple, no calls, few branches),
543+
// It might be unrolled for performance.
544+
for (; sp < str.length(); sp++, pos++) {
545+
char c = str.charAt(sp);
546+
if (checkNullTermination && c == 0) {
547+
throw new BsonSerializationException(
548+
format("BSON cstring '%s' is not valid because it contains a null character " + "at index %d", str, sp));
549+
}
550+
if (c >= 0x80) {
551+
break;
552+
}
553+
dst[pos] = (byte) c;
554+
}
555+
return sp;
556+
}
373557
}

driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java

+10
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,16 @@ public byte[] array() {
213213
throw new UnsupportedOperationException("Not implemented yet!");
214214
}
215215

216+
@Override
217+
public boolean hasArray() {
218+
return false;
219+
}
220+
221+
@Override
222+
public int arrayOffset() {
223+
throw new UnsupportedOperationException("Not implemented yet!");
224+
}
225+
216226
@Override
217227
public ByteBuf limit(final int newLimit) {
218228
if (newLimit < 0 || newLimit > capacity()) {

driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java

+10
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,16 @@ public byte[] array() {
124124
return proxied.array();
125125
}
126126

127+
@Override
128+
public boolean hasArray() {
129+
return proxied.hasArray();
130+
}
131+
132+
@Override
133+
public int arrayOffset() {
134+
return proxied.arrayOffset();
135+
}
136+
127137
@Override
128138
public int limit() {
129139
if (isWriting) {

driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufSpecification.groovy

+1-5
Original file line numberDiff line numberDiff line change
@@ -249,11 +249,7 @@ class ByteBufSpecification extends Specification {
249249
@Override
250250
ByteBuf getBuffer(final int size) {
251251
io.netty.buffer.ByteBuf buffer = allocator.directBuffer(size, size)
252-
try {
253-
new NettyByteBuf(buffer.retain())
254-
} finally {
255-
buffer.release();
256-
}
252+
new NettyByteBuf(buffer)
257253
}
258254
}
259255
}

0 commit comments

Comments
 (0)