@@ -1336,4 +1336,56 @@ - (void)testResumingAQueryShouldUseBloomFilterToAvoidFullRequery {
1336
1336
}
1337
1337
}
1338
1338
1339
+ - (void )testBloomFilterShouldCorrectlyEncodeComplexUnicodeCharacters {
1340
+ using firebase::firestore::testutil::CaptureExistenceFilterMismatches;
1341
+ using firebase::firestore::util::TestingHooks;
1342
+
1343
+ // TODO(b/291365820): Stop skipping this test when running against the Firestore emulator once
1344
+ // the emulator is improved to include a bloom filter in the existence filter messages that it
1345
+ // sends.
1346
+ XCTSkipIf ([FSTIntegrationTestCase isRunningAgainstEmulator ],
1347
+ " Skip this test when running against the Firestore emulator because the emulator does "
1348
+ " not include a bloom filter when it sends existence filter messages, making it "
1349
+ " impossible for this test to verify the correctness of the bloom filter." );
1350
+
1351
+ // Set this test to stop when the first failure occurs because some test assertion failures make
1352
+ // the rest of the test not applicable or will even crash.
1353
+ [self setContinueAfterFailure: NO ];
1354
+
1355
+ // Firestore does not do any Unicode normalization on the document IDs. Therefore, two document
1356
+ // IDs that are canonically-equivalent (i.e. they visually appear identical) but are represented
1357
+ // by a different sequence of Unicode code points are treated as distinct document IDs.
1358
+ NSArray <NSString *> *testDocIds;
1359
+ {
1360
+ NSMutableArray <NSString *> *testDocIdsAccumulator = [[NSMutableArray alloc ] init ];
1361
+ [testDocIdsAccumulator addObject: @" DocumentToDelete" ];
1362
+ // The next two strings both end with "e" with an accent: the first uses the dedicated Unicode
1363
+ // code point for this character, while the second uses the standard lowercase "e" followed by
1364
+ // the accent combining character.
1365
+ [testDocIdsAccumulator addObject: @" LowercaseEWithAcuteAccent_\u00E9 " ];
1366
+ [testDocIdsAccumulator addObject: @" LowercaseEWithAcuteAccent_\u0065\u0301 " ];
1367
+ // The next two strings both end with an "e" with two different accents applied via the
1368
+ // following two combining characters. The combining characters are specified in a different
1369
+ // order and Firestore treats these document IDs as unique, despite the order of the combining
1370
+ // characters being irrelevant.
1371
+ [testDocIdsAccumulator addObject: @" LowercaseEWithMultipleAccents_\u0065\u0301\u0327 " ];
1372
+ [testDocIdsAccumulator addObject: @" LowercaseEWithMultipleAccents_\u0065\u0327\u0301 " ];
1373
+ // The next string contains a character outside the BMP (the "basic multilingual plane"); that
1374
+ // is, its code point is greater than 0xFFFF. Since NSString stores text in sequences of 16-bit
1375
+ // code units, using the UTF-16 encoding (according to
1376
+ // https://www.objc.io/issues/9-strings/unicode) it is stored as a surrogate pair, two 16-bit
1377
+ // code units U+D83D and U+DE00, to represent this character. Make sure that its presence is
1378
+ // correctly tested in the bloom filter, which uses UTF-8 encoding.
1379
+ [testDocIdsAccumulator addObject: @" Smiley_\U0001F600 " ];
1380
+
1381
+ testDocIds = [NSArray arrayWithArray: testDocIdsAccumulator];
1382
+ }
1383
+
1384
+ // Verify assumptions about the equivalence of strings in `testDocIds`.
1385
+ XCTAssertEqualObjects (testDocIds[1 ].decomposedStringWithCanonicalMapping , testDocIds[2 ].decomposedStringWithCanonicalMapping );
1386
+ XCTAssertEqualObjects (testDocIds[3 ].decomposedStringWithCanonicalMapping , testDocIds[4 ].decomposedStringWithCanonicalMapping );
1387
+ XCTAssertEqual ([testDocIds[5 ] characterAtIndex: 7 ], 0xD83D );
1388
+ XCTAssertEqual ([testDocIds[5 ] characterAtIndex: 8 ], 0xDE00 );
1389
+ }
1390
+
1339
1391
@end
0 commit comments