Skip to content

Commit dd5fe4d

Browse files
committed
Add tests for valid surrogates.
JAVA-5816
1 parent 43f1663 commit dd5fe4d

File tree

1 file changed

+125
-24
lines changed

1 file changed

+125
-24
lines changed

driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java

+125-24
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ final class ByteBufferBsonOutputTest {
7474
private static final List<Integer> ALL_SURROGATE_CODE_POINTS = Stream.concat(
7575
range(MIN_LOW_SURROGATE, MAX_LOW_SURROGATE).boxed(),
7676
range(MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()).collect(toList());
77+
public static final List<Integer> ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS = rangeClosed(0x10000, MAX_CODE_POINT)
78+
.boxed()
79+
.collect(toList());
7780

7881
static Stream<BufferProvider> bufferProviders() {
7982
return Stream.of(
@@ -630,10 +633,18 @@ void shouldGrowToMaximumAllowedSizeOfByteBuffer(final boolean useBranch, final B
630633
try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) {
631634
byte[] v = new byte[0x2000000];
632635
ThreadLocalRandom.current().nextBytes(v);
633-
Consumer<ByteBufferBsonOutput> assertByteBuffers = effectiveOut -> assertEquals(
634-
asList(1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
635-
1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 24),
636-
effectiveOut.getByteBuffers().stream().map(ByteBuf::capacity).collect(toList()));
636+
Consumer<ByteBufferBsonOutput> assertByteBuffers = effectiveOut -> {
637+
List<ByteBuf> byteBuffers = new ArrayList<>();
638+
try {
639+
byteBuffers = effectiveOut.getByteBuffers();
640+
assertEquals(
641+
asList(1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
642+
1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 24),
643+
byteBuffers.stream().map(ByteBuf::capacity).collect(toList()));
644+
} finally {
645+
byteBuffers.forEach(ByteBuf::release);
646+
}
647+
};
637648
Consumer<ByteBufferBsonOutput> assertions = effectiveOut -> {
638649
effectiveOut.writeBytes(v);
639650
assertEquals(v.length, effectiveOut.size());
@@ -835,8 +846,8 @@ void shouldWriteInt32AbsoluteValueWithinSpanningBuffers(
835846
final List<byte[]> expectedBuffers,
836847
final BufferProvider bufferProvider) {
837848

838-
try (ByteBufferBsonOutput output =
839-
new ByteBufferBsonOutput(size -> bufferProvider.getBuffer(Integer.BYTES))) {
849+
List<ByteBuf> buffers = new ArrayList<>();
850+
try (ByteBufferBsonOutput output = new ByteBufferBsonOutput(size -> bufferProvider.getBuffer(Integer.BYTES))) {
840851

841852
//given
842853
initialData.forEach(output::writeBytes);
@@ -845,9 +856,11 @@ void shouldWriteInt32AbsoluteValueWithinSpanningBuffers(
845856
output.writeInt32(absolutePosition, intValue);
846857

847858
//then
848-
List<ByteBuf> buffers = output.getByteBuffers();
859+
buffers = output.getByteBuffers();
849860
assertEquals(expectedBuffers.size(), buffers.size(), "Number of buffers mismatch");
850861
assertBufferContents(expectedBuffers, buffers);
862+
}finally {
863+
buffers.forEach(ByteBuf::release);
851864
}
852865
}
853866

@@ -1086,10 +1099,10 @@ class Utf8StringTests {
10861099
@MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders")
10871100
void shouldWriteCStringAcrossBuffersUTF8(final BufferProvider bufferProvider) throws IOException {
10881101
for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) {
1089-
String str = new String(Character.toChars(codePoint)) + "a";
1090-
byte[] expectedStringEncoding = str.getBytes(StandardCharsets.UTF_8);
1102+
String stringToEncode = new String(Character.toChars(codePoint)) + "a";
1103+
byte[] expectedStringEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8);
10911104
int bufferAllocationSize = expectedStringEncoding.length + "\u0000".length();
1092-
testWriteCStringAcrossBuffers(bufferProvider, codePoint, bufferAllocationSize, str, expectedStringEncoding);
1105+
testWriteCStringAcrossBuffers(bufferProvider, codePoint, bufferAllocationSize, stringToEncode, expectedStringEncoding);
10931106
}
10941107
}
10951108

@@ -1098,11 +1111,11 @@ void shouldWriteCStringAcrossBuffersUTF8(final BufferProvider bufferProvider) th
10981111
@MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders")
10991112
void shouldWriteCStringAcrossBuffersUTF8WithBranch(final BufferProvider bufferProvider) throws IOException {
11001113
for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) {
1101-
String str = new String(Character.toChars(codePoint)) + "a";
1102-
int bufferAllocationSize = str.getBytes(StandardCharsets.UTF_8).length + "\u0000".length();
1103-
byte[] expectedEncoding = str.getBytes(StandardCharsets.UTF_8);
1114+
String stringToEncode = new String(Character.toChars(codePoint)) + "a";
1115+
int bufferAllocationSize = stringToEncode.getBytes(StandardCharsets.UTF_8).length + "\u0000".length();
1116+
byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8);
11041117

1105-
testWriteCStringAcrossBufferWithBranch(bufferProvider, codePoint, bufferAllocationSize, str, expectedEncoding);
1118+
testWriteCStringAcrossBufferWithBranch(bufferProvider, codePoint, bufferAllocationSize, stringToEncode, expectedEncoding);
11061119
}
11071120
}
11081121

@@ -1112,11 +1125,15 @@ void shouldWriteCStringAcrossBuffersUTF8WithBranch(final BufferProvider bufferPr
11121125
void shouldWriteStringAcrossBuffersUTF8(final BufferProvider bufferProvider) throws IOException {
11131126
for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) {
11141127
// given
1115-
String str = new String(Character.toChars(codePoint)) + "a";
1128+
String stringToEncode = new String(Character.toChars(codePoint)) + "a";
11161129
//4 bytes for the length prefix, bytes for encoded String, and 1 byte for the null terminator
1117-
int bufferAllocationSize = Integer.BYTES + str.getBytes(StandardCharsets.UTF_8).length + "\u0000".length();
1118-
byte[] expectedEncoding = str.getBytes(StandardCharsets.UTF_8);
1119-
testWriteStringAcrossBuffers(bufferProvider, codePoint, bufferAllocationSize, str, expectedEncoding);
1130+
int bufferAllocationSize = Integer.BYTES + stringToEncode.getBytes(StandardCharsets.UTF_8).length + "\u0000".length();
1131+
byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8);
1132+
testWriteStringAcrossBuffers(bufferProvider,
1133+
codePoint,
1134+
bufferAllocationSize,
1135+
stringToEncode,
1136+
expectedEncoding);
11201137
}
11211138
}
11221139

@@ -1192,6 +1209,78 @@ void shouldWriteCStringWithMalformedSurrogatesWithBranch(final BufferProvider bu
11921209
}
11931210
}
11941211

1212+
@DisplayName("should write surrogate String across buffers")
1213+
@ParameterizedTest
1214+
@MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders")
1215+
void shouldWriteStringWithSurrogatePairs(final BufferProvider bufferProvider) throws IOException {
1216+
for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) {
1217+
String stringToEncode = new String(toSurrogatePair(surrogateCodePoint));
1218+
byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8);
1219+
int bufferAllocationSize = expectedEncoding.length + "\u0000".length();
1220+
1221+
testWriteCStringAcrossBufferWithBranch(
1222+
bufferProvider,
1223+
surrogateCodePoint,
1224+
bufferAllocationSize,
1225+
stringToEncode,
1226+
expectedEncoding);
1227+
}
1228+
}
1229+
1230+
@DisplayName("should write surrogate String across buffers with branch")
1231+
@ParameterizedTest
1232+
@MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders")
1233+
void shouldWriteStringWithSurrogatePairsWithBranch(final BufferProvider bufferProvider) throws IOException {
1234+
for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) {
1235+
String stringToEncode = new String(toSurrogatePair(surrogateCodePoint));
1236+
byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8);
1237+
int bufferAllocationSize = expectedEncoding.length + "\u0000".length();
1238+
1239+
testWriteStringAcrossBuffersWithBranch(
1240+
bufferProvider,
1241+
bufferAllocationSize,
1242+
stringToEncode,
1243+
surrogateCodePoint,
1244+
expectedEncoding);
1245+
}
1246+
}
1247+
1248+
@DisplayName("should write surrogate CString across buffers")
1249+
@ParameterizedTest
1250+
@MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders")
1251+
void shouldWriteCStringWithSurrogatePairs(final BufferProvider bufferProvider) throws IOException {
1252+
for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) {
1253+
String stringToEncode = new String(toSurrogatePair(surrogateCodePoint));
1254+
byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8);
1255+
int bufferAllocationSize = expectedEncoding.length + "\u0000".length();
1256+
1257+
testWriteCStringAcrossBufferWithBranch(
1258+
bufferProvider,
1259+
surrogateCodePoint,
1260+
bufferAllocationSize,
1261+
stringToEncode,
1262+
expectedEncoding);
1263+
}
1264+
}
1265+
1266+
@DisplayName("should write surrogate CString across buffers with branch")
1267+
@ParameterizedTest
1268+
@MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders")
1269+
void shouldWriteCStringWithSurrogatePairsWithBranch(final BufferProvider bufferProvider) throws IOException {
1270+
for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) {
1271+
String stringToEncode = new String(toSurrogatePair(surrogateCodePoint));
1272+
byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8);
1273+
int bufferAllocationSize = expectedEncoding.length + "\u0000".length();
1274+
1275+
testWriteCStringAcrossBufferWithBranch(
1276+
bufferProvider,
1277+
surrogateCodePoint,
1278+
bufferAllocationSize,
1279+
stringToEncode,
1280+
expectedEncoding);
1281+
}
1282+
}
1283+
11951284
/*
11961285
Tests that malformed surrogate pairs are encoded as-is without substituting any code point.
11971286
This known bug and corresponding test remain for backward compatibility.
@@ -1207,14 +1296,14 @@ void shouldWriteStringWithMalformedSurrogates(final BufferProvider bufferProvide
12071296
(byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)),
12081297
(byte) (0x80 | (surrogateCodePoint & 0x3F))
12091298
};
1210-
String str = new String(Character.toChars(surrogateCodePoint));
1299+
String stringToEncode = new String(Character.toChars(surrogateCodePoint));
12111300
int bufferAllocationSize = expectedEncoding.length + "\u0000".length();
12121301

1213-
testWriteCStringAcrossBufferWithBranch(
1302+
testWriteStringAcrossBuffers(
12141303
bufferProvider,
12151304
surrogateCodePoint,
12161305
bufferAllocationSize,
1217-
str,
1306+
stringToEncode,
12181307
expectedEncoding);
12191308
}
12201309
}
@@ -1281,7 +1370,7 @@ private void testWriteCStringAcrossBuffers(final BufferProvider bufferProvider,
12811370
private void testWriteStringAcrossBuffers(final BufferProvider bufferProvider,
12821371
final Integer codePoint,
12831372
final int bufferAllocationSize,
1284-
final String str,
1373+
final String stringToEncode,
12851374
final byte[] expectedEncoding) throws IOException {
12861375
for (int startingOffset = 0; startingOffset <= bufferAllocationSize; startingOffset++) {
12871376
//given
@@ -1293,7 +1382,7 @@ private void testWriteStringAcrossBuffers(final BufferProvider bufferProvider,
12931382
actualBsonOutput.write(new byte[startingOffset]);
12941383

12951384
// when
1296-
actualBsonOutput.writeString(str);
1385+
actualBsonOutput.writeString(stringToEncode);
12971386

12981387
// then
12991388
actualByteBuffers = actualBsonOutput.getDuplicateByteBuffers();
@@ -1395,7 +1484,8 @@ private void assertEncodedStringSize(final Integer codePoint,
13951484
startingOffset));
13961485
}
13971486

1398-
private void testWriteCStringAcrossBufferWithBranch(final BufferProvider bufferProvider, final Integer codePoint,
1487+
private void testWriteCStringAcrossBufferWithBranch(final BufferProvider bufferProvider,
1488+
final Integer codePoint,
13991489
final int bufferAllocationSize,
14001490
final String str, final byte[] expectedEncoding) throws IOException {
14011491
for (int startingOffset = 0; startingOffset <= bufferAllocationSize; startingOffset++) {
@@ -1488,6 +1578,17 @@ private void assertEncodedResult(final int codePoint,
14881578
codePoint,
14891579
startingOffset));
14901580
}
1581+
1582+
public char[] toSurrogatePair(int codePoint) {
1583+
if (!Character.isValidCodePoint(codePoint) || codePoint < 0x10000) {
1584+
throw new IllegalArgumentException("Invalid code point: " + codePoint);
1585+
}
1586+
char[] result = new char[2];
1587+
result[0] = Character.highSurrogate(codePoint);
1588+
result[1] = Character.lowSurrogate(codePoint);
1589+
return result;
1590+
}
1591+
14911592
}
14921593

14931594
private static byte[] getBytes(final OutputBuffer basicOutputBuffer) throws IOException {

0 commit comments

Comments
 (0)