Skip to content

Commit 170d05b

Browse files
fix: update retry policy to not retry streams that have not made progress (#1946)
fix: update retry policy to not retry streams with retryable error that have not made progress receiving documents
1 parent f83a995 commit 170d05b

File tree

5 files changed

+367
-52
lines changed

5 files changed

+367
-52
lines changed

dev/src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1637,7 +1637,7 @@ export class Firestore implements firestore.Firestore {
16371637
function streamReady(): void {
16381638
if (!streamInitialized) {
16391639
streamInitialized = true;
1640-
logger('Firestore._initializeStream', requestTag, 'Releasing stream');
1640+
logger('Firestore._initializeStream', requestTag, 'Stream ready');
16411641
resolve(resultStream);
16421642
}
16431643
}

dev/src/reference.ts

Lines changed: 74 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ import {defaultConverter} from './types';
4444
import {
4545
autoId,
4646
Deferred,
47+
getTotalTimeout,
4748
isPermanentRpcError,
4849
mapToArray,
4950
requestTag,
@@ -2569,6 +2570,15 @@ export class Query<
25692570
return isPermanentRpcError(err, methodName);
25702571
}
25712572

2573+
_hasRetryTimedOut(methodName: string, startTime: number): boolean {
2574+
const totalTimeout = getTotalTimeout(methodName);
2575+
if (totalTimeout === 0) {
2576+
return false;
2577+
}
2578+
2579+
return Date.now() - startTime >= totalTimeout;
2580+
}
2581+
25722582
/**
25732583
* Internal streaming method that accepts an optional transaction ID.
25742584
*
@@ -2579,6 +2589,7 @@ export class Query<
25792589
*/
25802590
_stream(transactionId?: Uint8Array): NodeJS.ReadableStream {
25812591
const tag = requestTag();
2592+
const startTime = Date.now();
25822593

25832594
let lastReceivedDocument: QueryDocumentSnapshot<
25842595
AppModelType,
@@ -2638,8 +2649,9 @@ export class Query<
26382649
let streamActive: Deferred<boolean>;
26392650
do {
26402651
streamActive = new Deferred<boolean>();
2652+
const methodName = 'runQuery';
26412653
backendStream = await this._firestore.requestStream(
2642-
'runQuery',
2654+
methodName,
26432655
/* bidirectional= */ false,
26442656
request,
26452657
tag
@@ -2656,12 +2668,28 @@ export class Query<
26562668
'Query failed with retryable stream error:',
26572669
err
26582670
);
2659-
// Enqueue a "no-op" write into the stream and resume the query
2660-
// once it is processed. This allows any enqueued results to be
2661-
// consumed before resuming the query so that the query resumption
2662-
// can start at the correct document.
2671+
2672+
// Enqueue a "no-op" write into the stream and wait for it to be
2673+
// read by the downstream consumer. This ensures that all enqueued
2674+
// results in the stream are consumed, which will give us an accurate
2675+
// value for `lastReceivedDocument`.
26632676
stream.write(NOOP_MESSAGE, () => {
2664-
if (lastReceivedDocument) {
2677+
if (this._hasRetryTimedOut(methodName, startTime)) {
2678+
logger(
2679+
'Query._stream',
2680+
tag,
2681+
'Query failed with retryable stream error but the total retry timeout has exceeded.'
2682+
);
2683+
stream.destroy(err);
2684+
streamActive.resolve(/* active= */ false);
2685+
} else if (lastReceivedDocument) {
2686+
logger(
2687+
'Query._stream',
2688+
tag,
2689+
'Query failed with retryable stream error and progress was made receiving ' +
2690+
'documents, so the stream is being retried.'
2691+
);
2692+
26652693
// Restart the query but use the last document we received as
26662694
// the query cursor. Note that we do not use backoff here. The
26672695
// call to `requestStream()` will backoff should the restart
@@ -2673,8 +2701,21 @@ export class Query<
26732701
} else {
26742702
request = this.startAfter(lastReceivedDocument).toProto();
26752703
}
2704+
2705+
// Set lastReceivedDocument to null before each retry attempt to ensure the retry makes progress
2706+
lastReceivedDocument = null;
2707+
2708+
streamActive.resolve(/* active= */ true);
2709+
} else {
2710+
logger(
2711+
'Query._stream',
2712+
tag,
2713+
'Query failed with retryable stream error however no progress was made receiving ' +
2714+
'documents, so the stream is being closed.'
2715+
);
2716+
stream.destroy(err);
2717+
streamActive.resolve(/* active= */ false);
26762718
}
2677-
streamActive.resolve(/* active= */ true);
26782719
});
26792720
} else {
26802721
logger(
@@ -3320,48 +3361,33 @@ export class AggregateQuery<
33203361
// catch below.
33213362
const request = this.toProto(transactionId);
33223363

3323-
let streamActive: Deferred<boolean>;
3324-
do {
3325-
streamActive = new Deferred<boolean>();
3326-
const backendStream = await firestore.requestStream(
3327-
'runAggregationQuery',
3328-
/* bidirectional= */ false,
3329-
request,
3330-
tag
3331-
);
3332-
stream.on('close', () => {
3333-
backendStream.resume();
3334-
backendStream.end();
3335-
});
3336-
backendStream.on('error', err => {
3337-
backendStream.unpipe(stream);
3338-
// If a non-transactional query failed, attempt to restart.
3339-
// Transactional queries are retried via the transaction runner.
3340-
if (
3341-
!transactionId &&
3342-
!isPermanentRpcError(err, 'runAggregationQuery')
3343-
) {
3344-
logger(
3345-
'AggregateQuery._stream',
3346-
tag,
3347-
'AggregateQuery failed with retryable stream error:',
3348-
err
3349-
);
3350-
streamActive.resolve(/* active= */ true);
3351-
} else {
3352-
logger(
3353-
'AggregateQuery._stream',
3354-
tag,
3355-
'AggregateQuery failed with stream error:',
3356-
err
3357-
);
3358-
stream.destroy(err);
3359-
streamActive.resolve(/* active= */ false);
3360-
}
3361-
});
3364+
const backendStream = await firestore.requestStream(
3365+
'runAggregationQuery',
3366+
/* bidirectional= */ false,
3367+
request,
3368+
tag
3369+
);
3370+
stream.on('close', () => {
33623371
backendStream.resume();
3363-
backendStream.pipe(stream);
3364-
} while (await streamActive.promise);
3372+
backendStream.end();
3373+
});
3374+
backendStream.on('error', err => {
3375+
// TODO(group-by) When group-by queries are supported for aggregates
3376+
// consider implementing retries if the stream is making progress
3377+
// receiving results for groups. See the use of lastReceivedDocument
3378+
// in the retry strategy for runQuery.
3379+
3380+
backendStream.unpipe(stream);
3381+
logger(
3382+
'AggregateQuery._stream',
3383+
tag,
3384+
'AggregateQuery failed with stream error:',
3385+
err
3386+
);
3387+
stream.destroy(err);
3388+
});
3389+
backendStream.resume();
3390+
backendStream.pipe(stream);
33653391
})
33663392
.catch(e => stream.destroy(e));
33673393

dev/src/util.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,21 @@ export function getRetryCodes(methodName: string): number[] {
178178
return getServiceConfig(methodName)?.retry?.retryCodes ?? [];
179179
}
180180

181+
/**
182+
* Gets the total timeout in milliseconds from the retry settings in
183+
* the service config for the given RPC. If the total timeout is not
184+
* set, then `0` is returned.
185+
*
186+
* @private
187+
* @internal
188+
*/
189+
export function getTotalTimeout(methodName: string): number {
190+
return (
191+
getServiceConfig(methodName)?.retry?.backoffSettings?.totalTimeoutMillis ??
192+
0
193+
);
194+
}
195+
181196
/**
182197
* Returns the backoff setting from the service configuration.
183198
* @private

dev/test/aggregateQuery.ts

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,19 +126,31 @@ describe('aggregate query interface', () => {
126126
});
127127
});
128128

129-
it('handles stream exception at initialization', () => {
129+
it('handles stream exception at initialization', async () => {
130+
let attempts = 0;
130131
const query = firestore.collection('collectionId').count();
131132

132133
query._stream = () => {
134+
++attempts;
133135
throw new Error('Expected error');
134136
};
135137

136-
return expect(query.get()).to.eventually.rejectedWith('Expected error');
138+
await query
139+
.get()
140+
.then(() => {
141+
throw new Error('Unexpected success in Promise');
142+
})
143+
.catch(err => {
144+
expect(err.message).to.equal('Expected error');
145+
expect(attempts).to.equal(1);
146+
});
137147
});
138148

139149
it('handles stream exception during initialization', async () => {
150+
let attempts = 0;
140151
const overrides: ApiOverride = {
141152
runAggregationQuery: () => {
153+
++attempts;
142154
return stream(new Error('Expected error'));
143155
},
144156
};
@@ -152,6 +164,31 @@ describe('aggregate query interface', () => {
152164
})
153165
.catch(err => {
154166
expect(err.message).to.equal('Expected error');
167+
expect(attempts).to.equal(5);
168+
});
169+
});
170+
171+
it('handles message without result during initialization', async () => {
172+
let attempts = 0;
173+
const overrides: ApiOverride = {
174+
runAggregationQuery: () => {
175+
++attempts;
176+
return stream({readTime: {seconds: 5, nanos: 6}});
177+
},
178+
};
179+
firestore = await createInstance(overrides);
180+
181+
const query = firestore.collection('collectionId').count();
182+
await query
183+
.get()
184+
.then(() => {
185+
throw new Error('Unexpected success in Promise');
186+
})
187+
.catch(err => {
188+
expect(err.message).to.equal(
189+
'RunAggregationQueryResponse is missing result'
190+
);
191+
expect(attempts).to.equal(1);
155192
});
156193
});
157194
});

0 commit comments

Comments
 (0)