From f5984759e24aa38bae35cf43beece467236d0009 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Tue, 11 Feb 2025 14:38:50 -0500 Subject: [PATCH 01/14] change the compareUtf8Strings to lazy encoding --- packages/firestore/src/util/misc.ts | 40 +++ .../test/integration/api/database.test.ts | 239 ++++++++++++++++++ 2 files changed, 279 insertions(+) diff --git a/packages/firestore/src/util/misc.ts b/packages/firestore/src/util/misc.ts index acaff77abb6..9df1e0fb62a 100644 --- a/packages/firestore/src/util/misc.ts +++ b/packages/firestore/src/util/misc.ts @@ -16,6 +16,7 @@ */ import { randomBytes } from '../platform/random_bytes'; +import { newTextEncoder } from '../platform/text_serializer'; import { debugAssert } from './assert'; @@ -74,6 +75,45 @@ export interface Equatable { isEqual(other: T): boolean; } +/** Compare strings in UTF-8 encoded byte order */ +export function compareUtf8Strings(left: string, right: string): number { + let i = 0; + while (i < left.length && i < right.length) { + const leftCodePoint = left.codePointAt(i)!; + const rightCodePoint = right.codePointAt(i)!; + + if (leftCodePoint !== rightCodePoint) { + if (leftCodePoint < 128 && rightCodePoint < 128) { + // ASCII comparison + return primitiveComparator(leftCodePoint, rightCodePoint); + } else { + // Lazy instantiate TextEncoder + const encoder = newTextEncoder(); + + // UTF-8 encoded byte comparison, substring 2 indexes to cover surrogate pairs + const leftBytes = encoder.encode(left.substring(i, i + 2)); + const rightBytes = encoder.encode(right.substring(i, i + 2)); + for ( + let j = 0; + j < Math.min(leftBytes.length, rightBytes.length); + j++ + ) { + const comparison = primitiveComparator(leftBytes[j], rightBytes[j]); + if (comparison !== 0) { + return comparison; + } + } + } + } + + // Increment by 2 for surrogate pairs, 1 otherwise + i += leftCodePoint > 0xffff ? 2 : 1; + } + + // Compare lengths if all characters are equal + return primitiveComparator(left.length, right.length); +} + export interface Iterable { forEach: (cb: (v: V) => void) => void; } diff --git a/packages/firestore/test/integration/api/database.test.ts b/packages/firestore/test/integration/api/database.test.ts index 1cda49d9229..de0bd3a4dd1 100644 --- a/packages/firestore/test/integration/api/database.test.ts +++ b/packages/firestore/test/integration/api/database.test.ts @@ -2424,4 +2424,243 @@ apiDescribe('Database', persistence => { }); }); }); + + describe('Sort unicode strings', () => { + const expectedDocs = [ + 'b', + 'a', + 'h', + 'i', + 'c', + 'f', + 'e', + 'd', + 'g', + 'k', + 'j' + ]; + it('snapshot listener sorts unicode strings the same as server', async () => { + const testDocs = { + 'a': { value: 'Łukasiewicz' }, + 'b': { value: 'Sierpiński' }, + 'c': { value: '岩澤' }, + 'd': { value: '🄟' }, + 'e': { value: 'P' }, + 'f': { value: '︒' }, + 'g': { value: '🐵' }, + 'h': { value: '你好' }, + 'i': { value: '你顥' }, + 'j': { value: '😁' }, + 'k': { value: '😀' }, + }; + + return withTestCollection(persistence, testDocs, async collectionRef => { + const orderedQuery = query(collectionRef, orderBy('value')); + + const getSnapshot = await getDocsFromServer(orderedQuery); + expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); + + const storeEvent = new EventsAccumulator(); + const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); + const watchSnapshot = await storeEvent.awaitEvent(); + expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); + + unsubscribe(); + + await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); + }); + }); + + it('snapshot listener sorts unicode strings in array the same as server', async () => { + const testDocs = { + 'a': { value: ['Łukasiewicz'] }, + 'b': { value: ['Sierpiński'] }, + 'c': { value: ['岩澤'] }, + 'd': { value: ['🄟'] }, + 'e': { value: ['P'] }, + 'f': { value: ['︒'] }, + 'g': { value: ['🐵'] }, + 'h': { value: ['你好'] }, + 'i': { value: ['你顥'] }, + 'j': { value: ['😁'] }, + 'k': { value: ['😀'] } + }; + + return withTestCollection(persistence, testDocs, async collectionRef => { + const orderedQuery = query(collectionRef, orderBy('value')); + + const getSnapshot = await getDocsFromServer(orderedQuery); + expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); + + const storeEvent = new EventsAccumulator(); + const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); + const watchSnapshot = await storeEvent.awaitEvent(); + expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); + + unsubscribe(); + + await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); + }); + }); + + it('snapshot listener sorts unicode strings in map the same as server', async () => { + const testDocs = { + 'a': { value: { foo: 'Łukasiewicz' } }, + 'b': { value: { foo: 'Sierpiński' } }, + 'c': { value: { foo: '岩澤' } }, + 'd': { value: { foo: '🄟' } }, + 'e': { value: { foo: 'P' } }, + 'f': { value: { foo: '︒' } }, + 'g': { value: { foo: '🐵' } }, + 'h': { value: { foo: '你好' } }, + 'i': { value: { foo: '你顥' } }, + 'j': { value: { foo: '😁' } }, + 'k': { value: { foo: '😀' } } + }; + + return withTestCollection(persistence, testDocs, async collectionRef => { + const orderedQuery = query(collectionRef, orderBy('value')); + + const getSnapshot = await getDocsFromServer(orderedQuery); + expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); + + const storeEvent = new EventsAccumulator(); + const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); + const watchSnapshot = await storeEvent.awaitEvent(); + expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); + + unsubscribe(); + + await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); + }); + }); + + it('snapshot listener sorts unicode strings in map key the same as server', async () => { + const testDocs = { + 'a': { value: { 'Łukasiewicz': true } }, + 'b': { value: { 'Sierpiński': true } }, + 'c': { value: { '岩澤': true } }, + 'd': { value: { '🄟': true } }, + 'e': { value: { 'P': true } }, + 'f': { value: { '︒': true } }, + 'g': { value: { '🐵': true } }, + 'h': { value: { '你好': true } }, + 'i': { value: { '你顥': true } }, + 'j': { value: { '😁': true } }, + 'k': { value: { '😀': true } } + }; + + return withTestCollection(persistence, testDocs, async collectionRef => { + const orderedQuery = query(collectionRef, orderBy('value')); + + const getSnapshot = await getDocsFromServer(orderedQuery); + expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); + + const storeEvent = new EventsAccumulator(); + const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); + const watchSnapshot = await storeEvent.awaitEvent(); + expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); + + unsubscribe(); + + await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); + }); + }); + + it('snapshot listener sorts unicode strings in document key the same as server', async () => { + const testDocs = { + 'Łukasiewicz': { value: true }, + 'Sierpiński': { value: true }, + '岩澤': { value: true }, + '🄟': { value: true }, + 'P': { value: true }, + '︒': { value: true }, + '🐵': { value: true }, + '你好': { value: true }, + '你顥': { value: true }, + '😁': { value: true }, + '😀': { value: true } + }; + + return withTestCollection(persistence, testDocs, async collectionRef => { + const orderedQuery = query(collectionRef, orderBy(documentId())); + + const getSnapshot = await getDocsFromServer(orderedQuery); + const expectedDocs = [ + 'Sierpiński', + 'Łukasiewicz', + '你好', + '你顥', + '岩澤', + '︒', + 'P', + '🄟', + '🐵', + '😀', + '😁' + ]; + expect(toIds(getSnapshot)).to.deep.equal(expectedDocs); + + const storeEvent = new EventsAccumulator(); + const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); + const watchSnapshot = await storeEvent.awaitEvent(); + expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); + + unsubscribe(); + + await checkOnlineAndOfflineResultsMatch(orderedQuery, ...expectedDocs); + }); + }); + + // eslint-disable-next-line no-restricted-properties + (persistence.storage === 'indexeddb' ? it.skip : it)( + 'snapshot listener sorts unicode strings in document key the same as server with persistence', + async () => { + const testDocs = { + 'Łukasiewicz': { value: true }, + 'Sierpiński': { value: true }, + '岩澤': { value: true }, + '🄟': { value: true }, + 'P': { value: true }, + '︒': { value: true }, + '🐵': { value: true }, + '你好': { value: true }, + '你顥': { value: true }, + '😁': { value: true }, + '😀': { value: true } + }; + + return withTestCollection( + persistence, + testDocs, + async collectionRef => { + const orderedQuery = query(collectionRef, orderBy('value')); + + const getSnapshot = await getDocsFromServer(orderedQuery); + expect(toIds(getSnapshot)).to.deep.equal([ + 'Sierpiński', + 'Łukasiewicz', + '你好', + '你顥', + '岩澤', + '︒', + 'P', + '🄟', + '🐵', + '😀', + '😁' + ]); + + const storeEvent = new EventsAccumulator(); + const unsubscribe = onSnapshot(orderedQuery, storeEvent.storeEvent); + const watchSnapshot = await storeEvent.awaitEvent(); + // TODO: IndexedDB sorts string lexicographically, and misses the document with ID '🄟','🐵' + expect(toIds(watchSnapshot)).to.deep.equal(toIds(getSnapshot)); + + unsubscribe(); + } + ); + } + ); + }); }); From c1536841a3d490a7a5f5da49876f14d4ec271b07 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Tue, 11 Feb 2025 15:10:03 -0500 Subject: [PATCH 02/14] add back the utf-8 string comparison --- .../src/local/indexeddb_remote_document_cache.ts | 4 ++++ packages/firestore/src/model/path.ts | 11 +++-------- packages/firestore/src/model/values.ts | 10 +++++++--- packages/firestore/src/util/misc.ts | 2 +- .../firestore/test/integration/api/database.test.ts | 2 +- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/packages/firestore/src/local/indexeddb_remote_document_cache.ts b/packages/firestore/src/local/indexeddb_remote_document_cache.ts index b3d4658d53d..9b23c64fcf5 100644 --- a/packages/firestore/src/local/indexeddb_remote_document_cache.ts +++ b/packages/firestore/src/local/indexeddb_remote_document_cache.ts @@ -655,5 +655,9 @@ export function dbKeyComparator(l: DocumentKey, r: DocumentKey): number { return cmp; } + // TODO(b/329441702): Document IDs should be sorted by UTF-8 encoded byte + // order, but IndexedDB sorts strings lexicographically. Document ID + // comparison here still relies on primitive comparison to avoid mismatches + // observed in snapshot listeners with Unicode characters in documentIds return primitiveComparator(left[left.length - 1], right[right.length - 1]); } diff --git a/packages/firestore/src/model/path.ts b/packages/firestore/src/model/path.ts index 64cb0376a0e..c375b4c56d2 100644 --- a/packages/firestore/src/model/path.ts +++ b/packages/firestore/src/model/path.ts @@ -19,6 +19,7 @@ import { Integer } from '@firebase/webchannel-wrapper/bloom-blob'; import { debugAssert, fail } from '../util/assert'; import { Code, FirestoreError } from '../util/error'; +import { compareUtf8Strings, primitiveComparator } from '../util/misc'; export const DOCUMENT_KEY_NAME = '__name__'; @@ -181,7 +182,7 @@ abstract class BasePath> { return comparison; } } - return Math.sign(p1.length - p2.length); + return primitiveComparator(p1.length, p2.length); } private static compareSegments(lhs: string, rhs: string): number { @@ -201,13 +202,7 @@ abstract class BasePath> { ); } else { // both non-numeric - if (lhs < rhs) { - return -1; - } - if (lhs > rhs) { - return 1; - } - return 0; + return compareUtf8Strings(lhs, rhs); } } diff --git a/packages/firestore/src/model/values.ts b/packages/firestore/src/model/values.ts index 1977767515e..30d8688b776 100644 --- a/packages/firestore/src/model/values.ts +++ b/packages/firestore/src/model/values.ts @@ -25,7 +25,11 @@ import { Value } from '../protos/firestore_proto_api'; import { fail } from '../util/assert'; -import { arrayEquals, primitiveComparator } from '../util/misc'; +import { + arrayEquals, + compareUtf8Strings, + primitiveComparator +} from '../util/misc'; import { forEach, objectSize } from '../util/obj'; import { isNegativeZero } from '../util/types'; @@ -251,7 +255,7 @@ export function valueCompare(left: Value, right: Value): number { getLocalWriteTime(right) ); case TypeOrder.StringValue: - return primitiveComparator(left.stringValue!, right.stringValue!); + return compareUtf8Strings(left.stringValue!, right.stringValue!); case TypeOrder.BlobValue: return compareBlobs(left.bytesValue!, right.bytesValue!); case TypeOrder.RefValue: @@ -400,7 +404,7 @@ function compareMaps(left: MapValue, right: MapValue): number { rightKeys.sort(); for (let i = 0; i < leftKeys.length && i < rightKeys.length; ++i) { - const keyCompare = primitiveComparator(leftKeys[i], rightKeys[i]); + const keyCompare = compareUtf8Strings(leftKeys[i], rightKeys[i]); if (keyCompare !== 0) { return keyCompare; } diff --git a/packages/firestore/src/util/misc.ts b/packages/firestore/src/util/misc.ts index 9df1e0fb62a..9747f182d0a 100644 --- a/packages/firestore/src/util/misc.ts +++ b/packages/firestore/src/util/misc.ts @@ -107,7 +107,7 @@ export function compareUtf8Strings(left: string, right: string): number { } // Increment by 2 for surrogate pairs, 1 otherwise - i += leftCodePoint > 0xffff ? 2 : 1; + i += leftCodePoint > 0xffff ? 2 : 1; } // Compare lengths if all characters are equal diff --git a/packages/firestore/test/integration/api/database.test.ts b/packages/firestore/test/integration/api/database.test.ts index de0bd3a4dd1..8cbe99b3cd9 100644 --- a/packages/firestore/test/integration/api/database.test.ts +++ b/packages/firestore/test/integration/api/database.test.ts @@ -2451,7 +2451,7 @@ apiDescribe('Database', persistence => { 'h': { value: '你好' }, 'i': { value: '你顥' }, 'j': { value: '😁' }, - 'k': { value: '😀' }, + 'k': { value: '😀' } }; return withTestCollection(persistence, testDocs, async collectionRef => { From c628669b1ad91884c9f1010efb3a4e42e37f4671 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Wed, 19 Feb 2025 10:17:24 -0500 Subject: [PATCH 03/14] add changeset --- .changeset/large-pants-hide.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changeset/large-pants-hide.md diff --git a/.changeset/large-pants-hide.md b/.changeset/large-pants-hide.md new file mode 100644 index 00000000000..fbaf7bd6008 --- /dev/null +++ b/.changeset/large-pants-hide.md @@ -0,0 +1,6 @@ +--- +'@firebase/firestore': patch +'firebase': patch +--- + +Use lazy encoding in UTF-8 encoded byte comparison for strings. From f325c933dd26a3a2148304bf25162a0fde579d09 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Wed, 19 Feb 2025 10:34:17 -0500 Subject: [PATCH 04/14] Update misc.ts --- packages/firestore/src/util/misc.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/firestore/src/util/misc.ts b/packages/firestore/src/util/misc.ts index 9747f182d0a..27835ff0e16 100644 --- a/packages/firestore/src/util/misc.ts +++ b/packages/firestore/src/util/misc.ts @@ -103,6 +103,9 @@ export function compareUtf8Strings(left: string, right: string): number { return comparison; } } + + // Compare lengths if all bytes are equal + return primitiveComparator(leftBytes.length, rightBytes.length); } } From 9758b8e06007b5a084e2bd165b9bbcc8eba2a7d8 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Wed, 19 Feb 2025 17:50:35 -0500 Subject: [PATCH 05/14] add unit test --- packages/firestore/src/util/misc.ts | 25 +- .../firestore/test/unit/util/misc.test.ts | 250 +++++++++++++++++- 2 files changed, 263 insertions(+), 12 deletions(-) diff --git a/packages/firestore/src/util/misc.ts b/packages/firestore/src/util/misc.ts index 27835ff0e16..513556704ff 100644 --- a/packages/firestore/src/util/misc.ts +++ b/packages/firestore/src/util/misc.ts @@ -77,8 +77,7 @@ export interface Equatable { /** Compare strings in UTF-8 encoded byte order */ export function compareUtf8Strings(left: string, right: string): number { - let i = 0; - while (i < left.length && i < right.length) { + for (let i = 0; i < left.length && i < right.length; i++) { const leftCodePoint = left.codePointAt(i)!; const rightCodePoint = right.codePointAt(i)!; @@ -90,9 +89,8 @@ export function compareUtf8Strings(left: string, right: string): number { // Lazy instantiate TextEncoder const encoder = newTextEncoder(); - // UTF-8 encoded byte comparison, substring 2 indexes to cover surrogate pairs - const leftBytes = encoder.encode(left.substring(i, i + 2)); - const rightBytes = encoder.encode(right.substring(i, i + 2)); + const leftBytes = encoder.encode(getUtf8SafeSubstring(left, i)); + const rightBytes = encoder.encode(getUtf8SafeSubstring(right, i)); for ( let j = 0; j < Math.min(leftBytes.length, rightBytes.length); @@ -103,20 +101,25 @@ export function compareUtf8Strings(left: string, right: string): number { return comparison; } } - - // Compare lengths if all bytes are equal - return primitiveComparator(leftBytes.length, rightBytes.length); } } - - // Increment by 2 for surrogate pairs, 1 otherwise - i += leftCodePoint > 0xffff ? 2 : 1; } // Compare lengths if all characters are equal return primitiveComparator(left.length, right.length); } +function getUtf8SafeSubstring(str: string, index: number): string { + const firstCodePoint = str.codePointAt(index)!; + if (firstCodePoint > 0xffff) { + // It's a surrogate pair, return the whole pair + return str.substring(index, index + 2); + } else { + // It's a single code point, return it + return str.substring(index, index + 1); + } +} + export interface Iterable { forEach: (cb: (v: V) => void) => void; } diff --git a/packages/firestore/test/unit/util/misc.test.ts b/packages/firestore/test/unit/util/misc.test.ts index cc75419c026..f66332e6862 100644 --- a/packages/firestore/test/unit/util/misc.test.ts +++ b/packages/firestore/test/unit/util/misc.test.ts @@ -18,7 +18,7 @@ import { expect } from 'chai'; import { debugCast } from '../../../src/util/assert'; -import { immediateSuccessor } from '../../../src/util/misc'; +import { compareUtf8Strings, immediateSuccessor } from '../../../src/util/misc'; import { mask } from '../../util/helpers'; describe('immediateSuccessor', () => { @@ -53,3 +53,251 @@ describe('FieldMask', () => { ); }); }); + +class StringPair { + constructor(public s1: string, public s2: string) {} +} + +class StringPairGenerator { + constructor(private stringGenerator: StringGenerator) {} + + next(): StringPair { + const prefix = this.stringGenerator.next(); + const s1 = prefix + this.stringGenerator.next(); + const s2 = prefix + this.stringGenerator.next(); + return new StringPair(s1, s2); + } +} + +class StringGenerator { + private static readonly DEFAULT_SURROGATE_PAIR_PROBABILITY = 0.33; + private static readonly DEFAULT_MAX_LENGTH = 20; + + // The first Unicode code point that is in the basic multilingual plane ("BMP") and, + // therefore requires 1 UTF-16 code unit to be represented in UTF-16. + private static readonly MIN_BMP_CODE_POINT = 0x00000000; + + // The last Unicode code point that is in the basic multilingual plane ("BMP") and, + // therefore requires 1 UTF-16 code unit to be represented in UTF-16. + private static readonly MAX_BMP_CODE_POINT = 0x0000ffff; + + // The first Unicode code point that is outside of the basic multilingual plane ("BMP") and, + // therefore requires 2 UTF-16 code units, a surrogate pair, to be represented in UTF-16. + private static readonly MIN_SUPPLEMENTARY_CODE_POINT = 0x00010000; + + // The last Unicode code point that is outside of the basic multilingual plane ("BMP") and, + // therefore requires 2 UTF-16 code units, a surrogate pair, to be represented in UTF-16. + private static readonly MAX_SUPPLEMENTARY_CODE_POINT = 0x0010ffff; + + private readonly rnd: Random; + private readonly surrogatePairProbability: number; + private readonly maxLength: number; + + constructor(seed: number); + constructor(rnd: Random, surrogatePairProbability: number, maxLength: number); + constructor( + seedOrRnd: number | Random, + surrogatePairProbability?: number, + maxLength?: number + ) { + if (typeof seedOrRnd === 'number') { + this.rnd = new Random(seedOrRnd); + this.surrogatePairProbability = + StringGenerator.DEFAULT_SURROGATE_PAIR_PROBABILITY; + this.maxLength = StringGenerator.DEFAULT_MAX_LENGTH; + } else { + this.rnd = seedOrRnd; + this.surrogatePairProbability = StringGenerator.validateProbability( + 'surrogate pair', + surrogatePairProbability! + ); + this.maxLength = StringGenerator.validateLength( + 'maximum string', + maxLength! + ); + } + } + + private static validateProbability( + name: string, + probability: number + ): number { + if (!Number.isFinite(probability)) { + throw new Error( + `invalid ${name} probability: ${probability} (must be between 0.0 and 1.0, inclusive)` + ); + } else if (probability < 0.0) { + throw new Error( + `invalid ${name} probability: ${probability} (must be greater than or equal to zero)` + ); + } else if (probability > 1.0) { + throw new Error( + `invalid ${name} probability: ${probability} (must be less than or equal to 1)` + ); + } + return probability; + } + + private static validateLength(name: string, length: number): number { + if (length < 0) { + throw new Error( + `invalid ${name} length: ${length} (must be greater than or equal to zero)` + ); + } + return length; + } + + next(): string { + const length = this.rnd.nextInt(this.maxLength + 1); + const sb = new StringBuilder(); + while (sb.length() < length) { + const codePoint = this.nextCodePoint(); + sb.appendCodePoint(codePoint); + } + return sb.toString(); + } + + private isNextSurrogatePair(): boolean { + return StringGenerator.nextBoolean(this.rnd, this.surrogatePairProbability); + } + + private static nextBoolean(rnd: Random, probability: number): boolean { + if (probability === 0.0) { + return false; + } else if (probability === 1.0) { + return true; + } else { + return rnd.nextFloat() < probability; + } + } + + private nextCodePoint(): number { + if (this.isNextSurrogatePair()) { + return this.nextSurrogateCodePoint(); + } else { + return this.nextNonSurrogateCodePoint(); + } + } + + private nextSurrogateCodePoint(): number { + const highSurrogateMin = 0xd800; + const highSurrogateMax = 0xdbff; + const lowSurrogateMin = 0xdc00; + const lowSurrogateMax = 0xdfff; + + const highSurrogate = this.nextCodePointRange( + highSurrogateMin, + highSurrogateMax + ); + const lowSurrogate = this.nextCodePointRange( + lowSurrogateMin, + lowSurrogateMax + ); + + return (highSurrogate - 0xd800) * 0x400 + (lowSurrogate - 0xdc00) + 0x10000; + } + + private nextNonSurrogateCodePoint(): number { + return this.nextCodePointRange( + StringGenerator.MIN_BMP_CODE_POINT, + StringGenerator.MAX_BMP_CODE_POINT + ); + } + + private nextCodePointRange(min: number, max: number): number { + const rangeSize = max - min + 1; + const offset = this.rnd.nextInt(rangeSize); + return min + offset; + } + + // private nextCodePointRange(min: number, max: number, expectedCharCount: number): number { + // const rangeSize = max - min; + // const offset = this.rnd.nextInt(rangeSize); + // const codePoint = min + offset; + // if (String.fromCharCode(codePoint).length !== expectedCharCount) { + // throw new Error( + // `internal error vqgqnxcy97: Character.charCount(${codePoint}) returned ${ + // String.fromCharCode(codePoint).length + // }, but expected ${expectedCharCount}`, + // ); + // } + // return codePoint; + // } +} + +class Random { + private seed: number; + + constructor(seed: number) { + this.seed = seed; + } + + nextInt(max: number): number { + this.seed = (this.seed * 9301 + 49297) % 233280; + const rnd = this.seed / 233280; + return Math.floor(rnd * max); + } + + nextFloat(): number { + this.seed = (this.seed * 9301 + 49297) % 233280; + return this.seed / 233280; + } +} + +class StringBuilder { + private buffer: string[] = []; + + append(str: string): StringBuilder { + this.buffer.push(str); + return this; + } + + appendCodePoint(codePoint: number): StringBuilder { + this.buffer.push(String.fromCodePoint(codePoint)); + return this; + } + + toString(): string { + return this.buffer.join(''); + } + + length(): number { + return this.buffer.join('').length; + } +} + +describe('CompareUtf8Strings', () => { + it('testCompareUtf8Strings', () => { + const errors = []; + const seed = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER); + let passCount = 0; + const stringGenerator = new StringGenerator(new Random(seed), 0.33, 20); + const stringPairGenerator = new StringPairGenerator(stringGenerator); + + for (let i = 0; i < 1000000 && errors.length < 10; i++) { + const { s1, s2 } = stringPairGenerator.next(); + + const actual = compareUtf8Strings(s1, s2); + const expected = Buffer.from(s1, 'utf8').compare(Buffer.from(s2, 'utf8')); + + if (actual === expected) { + passCount++; + } else { + errors.push( + `compareUtf8Strings(s1="${s1}", s2="${s2}") returned ${actual}, ` + + `but expected ${expected} (i=${i}, s1.length=${s1.length}, s2.length=${s2.length})` + ); + } + } + + if (errors.length > 0) { + console.error( + `${errors.length} test cases failed, ${passCount} test cases passed, seed=${seed};` + ); + errors.forEach((error, index) => + console.error(`errors[${index}]: ${error}`) + ); + throw new Error('Test failed'); + } + }).timeout(10000); +}); From a2eb6dacaaf8145c856ae56f37fbf3f3f48e7f02 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Thu, 20 Feb 2025 11:16:30 -0500 Subject: [PATCH 06/14] format --- packages/firestore/test/unit/util/misc.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/firestore/test/unit/util/misc.test.ts b/packages/firestore/test/unit/util/misc.test.ts index f66332e6862..a51df285ec0 100644 --- a/packages/firestore/test/unit/util/misc.test.ts +++ b/packages/firestore/test/unit/util/misc.test.ts @@ -55,7 +55,7 @@ describe('FieldMask', () => { }); class StringPair { - constructor(public s1: string, public s2: string) {} + constructor(readonly s1: string, readonly s2: string) {} } class StringPairGenerator { From 48a5d6a186e536f3c322578baf94fea2396e1d78 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Thu, 20 Feb 2025 12:08:43 -0500 Subject: [PATCH 07/14] extend the timeout limit for the compareUtf8Strings test --- packages/firestore/test/unit/util/misc.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/firestore/test/unit/util/misc.test.ts b/packages/firestore/test/unit/util/misc.test.ts index a51df285ec0..579cc13c2cb 100644 --- a/packages/firestore/test/unit/util/misc.test.ts +++ b/packages/firestore/test/unit/util/misc.test.ts @@ -299,5 +299,5 @@ describe('CompareUtf8Strings', () => { ); throw new Error('Test failed'); } - }).timeout(10000); + }).timeout(20000); }); From 9653bdc4345339fb41014a444377d5ccc484952c Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Thu, 20 Feb 2025 12:49:13 -0500 Subject: [PATCH 08/14] rename unit test --- packages/firestore/src/util/misc.ts | 7 ++++--- packages/firestore/test/unit/util/misc.test.ts | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/firestore/src/util/misc.ts b/packages/firestore/src/util/misc.ts index 513556704ff..04c4891165e 100644 --- a/packages/firestore/src/util/misc.ts +++ b/packages/firestore/src/util/misc.ts @@ -89,6 +89,7 @@ export function compareUtf8Strings(left: string, right: string): number { // Lazy instantiate TextEncoder const encoder = newTextEncoder(); + // Substring and do UTF-8 encoded byte comparison const leftBytes = encoder.encode(getUtf8SafeSubstring(left, i)); const rightBytes = encoder.encode(getUtf8SafeSubstring(right, i)); for ( @@ -96,9 +97,9 @@ export function compareUtf8Strings(left: string, right: string): number { j < Math.min(leftBytes.length, rightBytes.length); j++ ) { - const comparison = primitiveComparator(leftBytes[j], rightBytes[j]); - if (comparison !== 0) { - return comparison; + const comp = primitiveComparator(leftBytes[j], rightBytes[j]); + if (comp !== 0) { + return comp; } } } diff --git a/packages/firestore/test/unit/util/misc.test.ts b/packages/firestore/test/unit/util/misc.test.ts index 579cc13c2cb..c1a2eeb82b7 100644 --- a/packages/firestore/test/unit/util/misc.test.ts +++ b/packages/firestore/test/unit/util/misc.test.ts @@ -267,7 +267,7 @@ class StringBuilder { } describe('CompareUtf8Strings', () => { - it('testCompareUtf8Strings', () => { + it('compareUtf8Strings should return correct results', () => { const errors = []; const seed = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER); let passCount = 0; From bf7adce7c0040eec7790e58a97299032c6f6ed82 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Wed, 26 Feb 2025 12:04:39 -0500 Subject: [PATCH 09/14] fix unit test to not include invalid surrogate --- packages/firestore/src/util/misc.ts | 14 ++++++- .../firestore/test/unit/util/misc.test.ts | 40 +++---------------- 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/packages/firestore/src/util/misc.ts b/packages/firestore/src/util/misc.ts index 04c4891165e..353b563d33a 100644 --- a/packages/firestore/src/util/misc.ts +++ b/packages/firestore/src/util/misc.ts @@ -77,7 +77,8 @@ export interface Equatable { /** Compare strings in UTF-8 encoded byte order */ export function compareUtf8Strings(left: string, right: string): number { - for (let i = 0; i < left.length && i < right.length; i++) { + let i = 0; + while (i < left.length && i < right.length) { const leftCodePoint = left.codePointAt(i)!; const rightCodePoint = right.codePointAt(i)!; @@ -89,7 +90,7 @@ export function compareUtf8Strings(left: string, right: string): number { // Lazy instantiate TextEncoder const encoder = newTextEncoder(); - // Substring and do UTF-8 encoded byte comparison + // UTF-8 encode the character at index i for byte comparison. const leftBytes = encoder.encode(getUtf8SafeSubstring(left, i)); const rightBytes = encoder.encode(getUtf8SafeSubstring(right, i)); for ( @@ -102,8 +103,17 @@ export function compareUtf8Strings(left: string, right: string): number { return comp; } } + // EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte + // representations are identical. This can happen with malformed input + // (invalid surrogate pairs). The backend also actively prevents invalid + // surrogates as INVALID_ARGUMENT errors, so we almost never receive + // invalid strings from backend. + // Fallback to code point comparison for graceful handling. + return primitiveComparator(leftCodePoint, rightCodePoint); } } + // Increment by 2 for surrogate pairs, 1 otherwise + i += leftCodePoint > 0xffff ? 2 : 1; } // Compare lengths if all characters are equal diff --git a/packages/firestore/test/unit/util/misc.test.ts b/packages/firestore/test/unit/util/misc.test.ts index c1a2eeb82b7..10143b5a08d 100644 --- a/packages/firestore/test/unit/util/misc.test.ts +++ b/packages/firestore/test/unit/util/misc.test.ts @@ -73,22 +73,6 @@ class StringGenerator { private static readonly DEFAULT_SURROGATE_PAIR_PROBABILITY = 0.33; private static readonly DEFAULT_MAX_LENGTH = 20; - // The first Unicode code point that is in the basic multilingual plane ("BMP") and, - // therefore requires 1 UTF-16 code unit to be represented in UTF-16. - private static readonly MIN_BMP_CODE_POINT = 0x00000000; - - // The last Unicode code point that is in the basic multilingual plane ("BMP") and, - // therefore requires 1 UTF-16 code unit to be represented in UTF-16. - private static readonly MAX_BMP_CODE_POINT = 0x0000ffff; - - // The first Unicode code point that is outside of the basic multilingual plane ("BMP") and, - // therefore requires 2 UTF-16 code units, a surrogate pair, to be represented in UTF-16. - private static readonly MIN_SUPPLEMENTARY_CODE_POINT = 0x00010000; - - // The last Unicode code point that is outside of the basic multilingual plane ("BMP") and, - // therefore requires 2 UTF-16 code units, a surrogate pair, to be represented in UTF-16. - private static readonly MAX_SUPPLEMENTARY_CODE_POINT = 0x0010ffff; - private readonly rnd: Random; private readonly surrogatePairProbability: number; private readonly maxLength: number; @@ -198,10 +182,12 @@ class StringGenerator { } private nextNonSurrogateCodePoint(): number { - return this.nextCodePointRange( - StringGenerator.MIN_BMP_CODE_POINT, - StringGenerator.MAX_BMP_CODE_POINT - ); + let codePoint; + do { + codePoint = this.nextCodePointRange(0, 0xffff); // BMP range + } while (codePoint >= 0xd800 && codePoint <= 0xdfff); // Exclude surrogate range + + return codePoint; } private nextCodePointRange(min: number, max: number): number { @@ -209,20 +195,6 @@ class StringGenerator { const offset = this.rnd.nextInt(rangeSize); return min + offset; } - - // private nextCodePointRange(min: number, max: number, expectedCharCount: number): number { - // const rangeSize = max - min; - // const offset = this.rnd.nextInt(rangeSize); - // const codePoint = min + offset; - // if (String.fromCharCode(codePoint).length !== expectedCharCount) { - // throw new Error( - // `internal error vqgqnxcy97: Character.charCount(${codePoint}) returned ${ - // String.fromCharCode(codePoint).length - // }, but expected ${expectedCharCount}`, - // ); - // } - // return codePoint; - // } } class Random { From a3aea95fd7894e9b45c1bd373d6245ed27d7832c Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Wed, 26 Feb 2025 14:00:01 -0500 Subject: [PATCH 10/14] port changes from java --- .../firestore/test/unit/util/misc.test.ts | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/packages/firestore/test/unit/util/misc.test.ts b/packages/firestore/test/unit/util/misc.test.ts index 10143b5a08d..44c6dc403d0 100644 --- a/packages/firestore/test/unit/util/misc.test.ts +++ b/packages/firestore/test/unit/util/misc.test.ts @@ -92,40 +92,33 @@ class StringGenerator { } else { this.rnd = seedOrRnd; this.surrogatePairProbability = StringGenerator.validateProbability( - 'surrogate pair', surrogatePairProbability! ); - this.maxLength = StringGenerator.validateLength( - 'maximum string', - maxLength! - ); + this.maxLength = StringGenerator.validateLength(maxLength!); } } - private static validateProbability( - name: string, - probability: number - ): number { + private static validateProbability(probability: number): number { if (!Number.isFinite(probability)) { throw new Error( - `invalid ${name} probability: ${probability} (must be between 0.0 and 1.0, inclusive)` + `invalid surrogate pair probability: ${probability} (must be between 0.0 and 1.0, inclusive)` ); } else if (probability < 0.0) { throw new Error( - `invalid ${name} probability: ${probability} (must be greater than or equal to zero)` + `invalid surrogate pair probability: ${probability} (must be greater than or equal to zero)` ); } else if (probability > 1.0) { throw new Error( - `invalid ${name} probability: ${probability} (must be less than or equal to 1)` + `invalid surrogate pair probability: ${probability} (must be less than or equal to 1)` ); } return probability; } - private static validateLength(name: string, length: number): number { + private static validateLength(length: number): number { if (length < 0) { throw new Error( - `invalid ${name} length: ${length} (must be greater than or equal to zero)` + `invalid maximum string length: ${length} (must be greater than or equal to zero)` ); } return length; From 4b528d9d816f2c289a83632612b3e1c93f30a6f1 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Thu, 27 Feb 2025 10:54:27 -0500 Subject: [PATCH 11/14] resolve comment --- packages/firestore/src/util/misc.ts | 37 ++++++++++++++++------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/packages/firestore/src/util/misc.ts b/packages/firestore/src/util/misc.ts index 353b563d33a..42fa568835b 100644 --- a/packages/firestore/src/util/misc.ts +++ b/packages/firestore/src/util/misc.ts @@ -93,23 +93,19 @@ export function compareUtf8Strings(left: string, right: string): number { // UTF-8 encode the character at index i for byte comparison. const leftBytes = encoder.encode(getUtf8SafeSubstring(left, i)); const rightBytes = encoder.encode(getUtf8SafeSubstring(right, i)); - for ( - let j = 0; - j < Math.min(leftBytes.length, rightBytes.length); - j++ - ) { - const comp = primitiveComparator(leftBytes[j], rightBytes[j]); - if (comp !== 0) { - return comp; - } + + const comp = compareByteArrays(leftBytes, rightBytes); + if (comp !== 0) { + return comp; + } else { + // EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte + // representations are identical. This can happen with malformed input + // (invalid surrogate pairs). The backend also actively prevents invalid + // surrogates as INVALID_ARGUMENT errors, so we almost never receive + // invalid strings from backend. + // Fallback to code point comparison for graceful handling. + return primitiveComparator(leftCodePoint, rightCodePoint); } - // EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte - // representations are identical. This can happen with malformed input - // (invalid surrogate pairs). The backend also actively prevents invalid - // surrogates as INVALID_ARGUMENT errors, so we almost never receive - // invalid strings from backend. - // Fallback to code point comparison for graceful handling. - return primitiveComparator(leftCodePoint, rightCodePoint); } } // Increment by 2 for surrogate pairs, 1 otherwise @@ -131,6 +127,15 @@ function getUtf8SafeSubstring(str: string, index: number): string { } } +function compareByteArrays(left: Uint8Array, right: Uint8Array): number { + for (let i = 0; i < left.length && i < right.length; ++i) { + if (left[i] !== right[i]) { + return primitiveComparator(left[i], right[i]); + } + } + return primitiveComparator(left.length, right.length); +} + export interface Iterable { forEach: (cb: (v: V) => void) => void; } From e60df166632f813247769e83e34cad70fdde48c2 Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Fri, 28 Feb 2025 14:01:38 -0500 Subject: [PATCH 12/14] resolve comments --- .../firestore/test/unit/util/misc.test.ts | 357 +++++++++--------- 1 file changed, 184 insertions(+), 173 deletions(-) diff --git a/packages/firestore/test/unit/util/misc.test.ts b/packages/firestore/test/unit/util/misc.test.ts index 44c6dc403d0..0829259a8e8 100644 --- a/packages/firestore/test/unit/util/misc.test.ts +++ b/packages/firestore/test/unit/util/misc.test.ts @@ -54,215 +54,226 @@ describe('FieldMask', () => { }); }); -class StringPair { - constructor(readonly s1: string, readonly s2: string) {} -} - -class StringPairGenerator { - constructor(private stringGenerator: StringGenerator) {} - - next(): StringPair { - const prefix = this.stringGenerator.next(); - const s1 = prefix + this.stringGenerator.next(); - const s2 = prefix + this.stringGenerator.next(); - return new StringPair(s1, s2); - } -} - -class StringGenerator { - private static readonly DEFAULT_SURROGATE_PAIR_PROBABILITY = 0.33; - private static readonly DEFAULT_MAX_LENGTH = 20; - - private readonly rnd: Random; - private readonly surrogatePairProbability: number; - private readonly maxLength: number; - - constructor(seed: number); - constructor(rnd: Random, surrogatePairProbability: number, maxLength: number); - constructor( - seedOrRnd: number | Random, - surrogatePairProbability?: number, - maxLength?: number - ) { - if (typeof seedOrRnd === 'number') { - this.rnd = new Random(seedOrRnd); - this.surrogatePairProbability = - StringGenerator.DEFAULT_SURROGATE_PAIR_PROBABILITY; - this.maxLength = StringGenerator.DEFAULT_MAX_LENGTH; - } else { - this.rnd = seedOrRnd; - this.surrogatePairProbability = StringGenerator.validateProbability( - surrogatePairProbability! - ); - this.maxLength = StringGenerator.validateLength(maxLength!); +describe('CompareUtf8Strings', () => { + it('compareUtf8Strings should return correct results', () => { + const errors = []; + const seed = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER); + let passCount = 0; + const stringGenerator = new StringGenerator(new Random(seed), 0.33, 20); + const stringPairGenerator = new StringPairGenerator(stringGenerator); + + for (let i = 0; i < 1000000 && errors.length < 10; i++) { + const { s1, s2 } = stringPairGenerator.next(); + + const actual = compareUtf8Strings(s1, s2); + const expected = Buffer.from(s1, 'utf8').compare(Buffer.from(s2, 'utf8')); + + if (actual === expected) { + passCount++; + } else { + errors.push( + `compareUtf8Strings(s1="${s1}", s2="${s2}") returned ${actual}, ` + + `but expected ${expected} (i=${i}, s1.length=${s1.length}, s2.length=${s2.length})` + ); + } } - } - private static validateProbability(probability: number): number { - if (!Number.isFinite(probability)) { - throw new Error( - `invalid surrogate pair probability: ${probability} (must be between 0.0 and 1.0, inclusive)` - ); - } else if (probability < 0.0) { - throw new Error( - `invalid surrogate pair probability: ${probability} (must be greater than or equal to zero)` + if (errors.length > 0) { + console.error( + `${errors.length} test cases failed, ${passCount} test cases passed, seed=${seed};` ); - } else if (probability > 1.0) { - throw new Error( - `invalid surrogate pair probability: ${probability} (must be less than or equal to 1)` + errors.forEach((error, index) => + console.error(`errors[${index}]: ${error}`) ); + throw new Error('Test failed'); } - return probability; - } + }).timeout(20000); - private static validateLength(length: number): number { - if (length < 0) { - throw new Error( - `invalid maximum string length: ${length} (must be greater than or equal to zero)` - ); - } - return length; + class StringPair { + constructor(readonly s1: string, readonly s2: string) {} } - next(): string { - const length = this.rnd.nextInt(this.maxLength + 1); - const sb = new StringBuilder(); - while (sb.length() < length) { - const codePoint = this.nextCodePoint(); - sb.appendCodePoint(codePoint); + class StringPairGenerator { + constructor(private stringGenerator: StringGenerator) {} + + next(): StringPair { + const prefix = this.stringGenerator.next(); + const s1 = prefix + this.stringGenerator.next(); + const s2 = prefix + this.stringGenerator.next(); + return new StringPair(s1, s2); } - return sb.toString(); } - private isNextSurrogatePair(): boolean { - return StringGenerator.nextBoolean(this.rnd, this.surrogatePairProbability); - } + class StringGenerator { + private static readonly DEFAULT_SURROGATE_PAIR_PROBABILITY = 0.33; + private static readonly DEFAULT_MAX_LENGTH = 20; - private static nextBoolean(rnd: Random, probability: number): boolean { - if (probability === 0.0) { - return false; - } else if (probability === 1.0) { - return true; - } else { - return rnd.nextFloat() < probability; - } - } + // Pseudo-random number generator. Seed can be set for repeatable tests. + private readonly rnd: Random; + private readonly surrogatePairProbability: number; + private readonly maxLength: number; - private nextCodePoint(): number { - if (this.isNextSurrogatePair()) { - return this.nextSurrogateCodePoint(); - } else { - return this.nextNonSurrogateCodePoint(); + constructor(seed: number); + constructor( + rnd: Random, + surrogatePairProbability: number, + maxLength: number + ); + constructor( + seedOrRnd: number | Random, + surrogatePairProbability?: number, + maxLength?: number + ) { + if (typeof seedOrRnd === 'number') { + this.rnd = new Random(seedOrRnd); + this.surrogatePairProbability = + StringGenerator.DEFAULT_SURROGATE_PAIR_PROBABILITY; + this.maxLength = StringGenerator.DEFAULT_MAX_LENGTH; + } else { + this.rnd = seedOrRnd; + this.surrogatePairProbability = StringGenerator.validateProbability( + surrogatePairProbability! + ); + this.maxLength = StringGenerator.validateLength(maxLength!); + } } - } - private nextSurrogateCodePoint(): number { - const highSurrogateMin = 0xd800; - const highSurrogateMax = 0xdbff; - const lowSurrogateMin = 0xdc00; - const lowSurrogateMax = 0xdfff; + private static validateProbability(probability: number): number { + if (!Number.isFinite(probability)) { + throw new Error( + `invalid surrogate pair probability: ${probability} (must be between 0.0 and 1.0, inclusive)` + ); + } else if (probability < 0.0) { + throw new Error( + `invalid surrogate pair probability: ${probability} (must be greater than or equal to zero)` + ); + } else if (probability > 1.0) { + throw new Error( + `invalid surrogate pair probability: ${probability} (must be less than or equal to 1)` + ); + } + return probability; + } - const highSurrogate = this.nextCodePointRange( - highSurrogateMin, - highSurrogateMax - ); - const lowSurrogate = this.nextCodePointRange( - lowSurrogateMin, - lowSurrogateMax - ); + private static validateLength(length: number): number { + if (length < 0) { + throw new Error( + `invalid maximum string length: ${length} (must be greater than or equal to zero)` + ); + } + return length; + } - return (highSurrogate - 0xd800) * 0x400 + (lowSurrogate - 0xdc00) + 0x10000; - } + next(): string { + const length = this.rnd.nextInt(this.maxLength + 1); + const sb = new StringBuilder(); + while (sb.length() < length) { + const codePoint = this.nextCodePoint(); + sb.appendCodePoint(codePoint); + } + return sb.toString(); + } - private nextNonSurrogateCodePoint(): number { - let codePoint; - do { - codePoint = this.nextCodePointRange(0, 0xffff); // BMP range - } while (codePoint >= 0xd800 && codePoint <= 0xdfff); // Exclude surrogate range + private isNextSurrogatePair(): boolean { + return StringGenerator.nextBoolean( + this.rnd, + this.surrogatePairProbability + ); + } - return codePoint; - } + private static nextBoolean(rnd: Random, probability: number): boolean { + if (probability === 0.0) { + return false; + } else if (probability === 1.0) { + return true; + } else { + return rnd.nextFloat() < probability; + } + } - private nextCodePointRange(min: number, max: number): number { - const rangeSize = max - min + 1; - const offset = this.rnd.nextInt(rangeSize); - return min + offset; - } -} + private nextCodePoint(): number { + if (this.isNextSurrogatePair()) { + return this.nextSurrogateCodePoint(); + } else { + return this.nextNonSurrogateCodePoint(); + } + } -class Random { - private seed: number; + private nextSurrogateCodePoint(): number { + const highSurrogateMin = 0xd800; + const highSurrogateMax = 0xdbff; + const lowSurrogateMin = 0xdc00; + const lowSurrogateMax = 0xdfff; - constructor(seed: number) { - this.seed = seed; - } + const highSurrogate = this.nextCodePointRange( + highSurrogateMin, + highSurrogateMax + ); + const lowSurrogate = this.nextCodePointRange( + lowSurrogateMin, + lowSurrogateMax + ); - nextInt(max: number): number { - this.seed = (this.seed * 9301 + 49297) % 233280; - const rnd = this.seed / 233280; - return Math.floor(rnd * max); - } + return ( + (highSurrogate - 0xd800) * 0x400 + (lowSurrogate - 0xdc00) + 0x10000 + ); + } - nextFloat(): number { - this.seed = (this.seed * 9301 + 49297) % 233280; - return this.seed / 233280; - } -} + private nextNonSurrogateCodePoint(): number { + let codePoint; + do { + codePoint = this.nextCodePointRange(0, 0xffff); // BMP range + } while (codePoint >= 0xd800 && codePoint <= 0xdfff); // Exclude surrogate range -class StringBuilder { - private buffer: string[] = []; + return codePoint; + } - append(str: string): StringBuilder { - this.buffer.push(str); - return this; + private nextCodePointRange(min: number, max: number): number { + const rangeSize = max - min + 1; + const offset = this.rnd.nextInt(rangeSize); + return min + offset; + } } - appendCodePoint(codePoint: number): StringBuilder { - this.buffer.push(String.fromCodePoint(codePoint)); - return this; - } + class Random { + private seed: number; - toString(): string { - return this.buffer.join(''); - } + constructor(seed: number) { + this.seed = seed; + } + + nextInt(max: number): number { + // Update the seed with pseudo-randomized numbers using a Linear Congruential Generator (LCG). + this.seed = (this.seed * 9301 + 49297) % 233280; + const rnd = this.seed / 233280; + return Math.floor(rnd * max); + } - length(): number { - return this.buffer.join('').length; + nextFloat(): number { + this.seed = (this.seed * 9301 + 49297) % 233280; + return this.seed / 233280; + } } -} -describe('CompareUtf8Strings', () => { - it('compareUtf8Strings should return correct results', () => { - const errors = []; - const seed = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER); - let passCount = 0; - const stringGenerator = new StringGenerator(new Random(seed), 0.33, 20); - const stringPairGenerator = new StringPairGenerator(stringGenerator); + class StringBuilder { + private buffer: string[] = []; - for (let i = 0; i < 1000000 && errors.length < 10; i++) { - const { s1, s2 } = stringPairGenerator.next(); + append(str: string): StringBuilder { + this.buffer.push(str); + return this; + } - const actual = compareUtf8Strings(s1, s2); - const expected = Buffer.from(s1, 'utf8').compare(Buffer.from(s2, 'utf8')); + appendCodePoint(codePoint: number): StringBuilder { + this.buffer.push(String.fromCodePoint(codePoint)); + return this; + } - if (actual === expected) { - passCount++; - } else { - errors.push( - `compareUtf8Strings(s1="${s1}", s2="${s2}") returned ${actual}, ` + - `but expected ${expected} (i=${i}, s1.length=${s1.length}, s2.length=${s2.length})` - ); - } + toString(): string { + return this.buffer.join(''); } - if (errors.length > 0) { - console.error( - `${errors.length} test cases failed, ${passCount} test cases passed, seed=${seed};` - ); - errors.forEach((error, index) => - console.error(`errors[${index}]: ${error}`) - ); - throw new Error('Test failed'); + length(): number { + return this.buffer.join('').length; } - }).timeout(20000); + } }); From fa684eb6641c197d4a656b93485bdd0ca3c32a7c Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Thu, 6 Mar 2025 12:05:33 -0500 Subject: [PATCH 13/14] skip the unit test to pass flaky github check --- packages/firestore/test/unit/util/misc.test.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/firestore/test/unit/util/misc.test.ts b/packages/firestore/test/unit/util/misc.test.ts index 0829259a8e8..f8900b0eedb 100644 --- a/packages/firestore/test/unit/util/misc.test.ts +++ b/packages/firestore/test/unit/util/misc.test.ts @@ -54,7 +54,11 @@ describe('FieldMask', () => { }); }); -describe('CompareUtf8Strings', () => { +// TODO(b/401243560): This test is contributing to the flakiness of the github +// check "(Firestore) Node.js and Browser (Chrome) Tests". Re-enable this test +// once b/401243560 is fixed. +// eslint-disable-next-line no-restricted-properties +describe.skip('CompareUtf8Strings', () => { it('compareUtf8Strings should return correct results', () => { const errors = []; const seed = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER); From 9174b3467a85ce2dce9a9a73ed83e375c932365a Mon Sep 17 00:00:00 2001 From: milaGGL <107142260+milaGGL@users.noreply.github.com> Date: Thu, 6 Mar 2025 12:33:25 -0500 Subject: [PATCH 14/14] Revert "skip the unit test to pass flaky github check" This reverts commit fa684eb6641c197d4a656b93485bdd0ca3c32a7c. --- packages/firestore/test/unit/util/misc.test.ts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/packages/firestore/test/unit/util/misc.test.ts b/packages/firestore/test/unit/util/misc.test.ts index f8900b0eedb..0829259a8e8 100644 --- a/packages/firestore/test/unit/util/misc.test.ts +++ b/packages/firestore/test/unit/util/misc.test.ts @@ -54,11 +54,7 @@ describe('FieldMask', () => { }); }); -// TODO(b/401243560): This test is contributing to the flakiness of the github -// check "(Firestore) Node.js and Browser (Chrome) Tests". Re-enable this test -// once b/401243560 is fixed. -// eslint-disable-next-line no-restricted-properties -describe.skip('CompareUtf8Strings', () => { +describe('CompareUtf8Strings', () => { it('compareUtf8Strings should return correct results', () => { const errors = []; const seed = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER);