Skip to content

Commit f7c53ae

Browse files
committed
Consistently handle exceptions in reactive streams
* Fixed `PublisherCoroutine` and `rxObservable` ignoring cancellations. * Fatal exceptions are not treated in a special manner by us anymore. Instead, we follow the requirement in the reactive streams specification that, in case some method of `Subscriber` throws, that subscriber MUST be considered cancelled, and the exception MUST be reported in some place other than `onError`. * Fixed `trySend` sometimes throwing in `PublisherCoroutine` and `rxObservable`. * When an exception happens inside a cancellation handler, we now consistently throw the original exception passed to the handler, with the new exception added as suppressed.
1 parent 12f4dbc commit f7c53ae

22 files changed

+422
-342
lines changed

reactive/kotlinx-coroutines-jdk9/test/IntegrationTest.kt

+21-1
Original file line numberDiff line numberDiff line change
@@ -129,4 +129,24 @@ class IntegrationTest(
129129
assertEquals(n, last)
130130
}
131131

132-
}
132+
}
133+
134+
internal suspend inline fun <reified E: Throwable> assertCallsExceptionHandlerWith(
135+
crossinline operation: suspend (CoroutineExceptionHandler) -> Unit): E
136+
{
137+
val caughtExceptions = mutableListOf<Throwable>()
138+
val exceptionHandler = object: AbstractCoroutineContextElement(CoroutineExceptionHandler),
139+
CoroutineExceptionHandler
140+
{
141+
override fun handleException(context: CoroutineContext, exception: Throwable) {
142+
caughtExceptions += exception
143+
}
144+
}
145+
return withContext(exceptionHandler) {
146+
operation(exceptionHandler)
147+
caughtExceptions.single().let {
148+
assertTrue(it is E, it.toString())
149+
it
150+
}
151+
}
152+
}

reactive/kotlinx-coroutines-jdk9/test/PublishTest.kt

+35-34
Original file line numberDiff line numberDiff line change
@@ -123,42 +123,43 @@ class PublishTest : TestBase() {
123123

124124
@Test
125125
fun testOnNextError() = runTest {
126+
val latch = CompletableDeferred<Unit>()
126127
expect(1)
127-
val publisher = flowPublish(currentDispatcher()) {
128-
expect(4)
129-
try {
130-
send("OK")
131-
} catch(e: Throwable) {
132-
expect(6)
133-
assert(e is TestException)
134-
}
128+
assertCallsExceptionHandlerWith<TestException> { exceptionHandler ->
129+
val publisher = flowPublish(currentDispatcher() + exceptionHandler) {
130+
expect(4)
131+
try {
132+
send("OK")
133+
} catch(e: Throwable) {
134+
expect(6)
135+
assert(e is TestException)
136+
latch.complete(Unit)
137+
}
138+
}
139+
expect(2)
140+
publisher.subscribe(object : JFlow.Subscriber<String> {
141+
override fun onComplete() {
142+
expectUnreached()
143+
}
144+
145+
override fun onSubscribe(s: JFlow.Subscription) {
146+
expect(3)
147+
s.request(1)
148+
}
149+
150+
override fun onNext(t: String) {
151+
expect(5)
152+
assertEquals("OK", t)
153+
throw TestException()
154+
}
155+
156+
override fun onError(t: Throwable) {
157+
expectUnreached()
158+
}
159+
})
160+
latch.await()
135161
}
136-
expect(2)
137-
val latch = CompletableDeferred<Unit>()
138-
publisher.subscribe(object : JFlow.Subscriber<String> {
139-
override fun onComplete() {
140-
expectUnreached()
141-
}
142-
143-
override fun onSubscribe(s: JFlow.Subscription) {
144-
expect(3)
145-
s.request(1)
146-
}
147-
148-
override fun onNext(t: String) {
149-
expect(5)
150-
assertEquals("OK", t)
151-
throw TestException()
152-
}
153-
154-
override fun onError(t: Throwable) {
155-
expect(7)
156-
assert(t is TestException)
157-
latch.complete(Unit)
158-
}
159-
})
160-
latch.await()
161-
finish(8)
162+
finish(7)
162163
}
163164

164165
@Test

reactive/kotlinx-coroutines-reactive/src/Publish.kt

+91-64
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import kotlinx.coroutines.selects.*
1010
import kotlinx.coroutines.sync.*
1111
import org.reactivestreams.*
1212
import kotlin.coroutines.*
13-
import kotlin.internal.*
1413

1514
/**
1615
* Creates cold reactive [Publisher] that runs a given [block] in a coroutine.
@@ -69,34 +68,32 @@ public class PublisherCoroutine<in T>(
6968
) : AbstractCoroutine<Unit>(parentContext, false, true), ProducerScope<T>, Subscription, SelectClause2<T, SendChannel<T>> {
7069
override val channel: SendChannel<T> get() = this
7170

72-
// Mutex is locked when either nRequested == 0 or while subscriber.onXXX is being invoked
71+
// Mutex is locked when either nRequested == 0, unless `isCompleted`, or while subscriber.onXXX is being invoked
7372
private val mutex = Mutex(locked = true)
7473
private val _nRequested = atomic(0L) // < 0 when closed (CLOSED or SIGNALLED)
7574

7675
@Volatile
77-
private var cancelled = false // true when Subscription.cancel() is invoked
76+
private var cancelled = false // true after Subscription.cancel() is invoked
7877

7978
override val isClosedForSend: Boolean get() = isCompleted
8079
override fun close(cause: Throwable?): Boolean = cancelCoroutine(cause)
8180
override fun invokeOnClose(handler: (Throwable?) -> Unit): Nothing =
8281
throw UnsupportedOperationException("PublisherCoroutine doesn't support invokeOnClose")
8382

84-
override fun trySend(element: T): ChannelResult<Unit> {
85-
if (!mutex.tryLock()) return ChannelResult.failure()
86-
doLockedNext(element)
87-
return ChannelResult.success(Unit)
88-
}
83+
// TODO: will throw if `null` is passed -- is throwing this kind of programmer-induced errors okay?
84+
override fun trySend(element: T): ChannelResult<Unit> =
85+
if (!mutex.tryLock()) {
86+
ChannelResult.failure()
87+
} else {
88+
when (val throwable = doLockedNext(element)) {
89+
null -> ChannelResult.success(Unit)
90+
else -> ChannelResult.closed(throwable)
91+
}
92+
}
8993

9094
public override suspend fun send(element: T) {
91-
// fast-path -- try send without suspension
92-
if (offer(element)) return
93-
// slow-path does suspend
94-
return sendSuspend(element)
95-
}
96-
97-
private suspend fun sendSuspend(element: T) {
9895
mutex.lock()
99-
doLockedNext(element)
96+
doLockedNext(element)?.let { throw it }
10097
}
10198

10299
override val onSend: SelectClause2<T, SendChannel<T>>
@@ -106,13 +103,13 @@ public class PublisherCoroutine<in T>(
106103
@Suppress("PARAMETER_NAME_CHANGED_ON_OVERRIDE")
107104
override fun <R> registerSelectClause2(select: SelectInstance<R>, element: T, block: suspend (SendChannel<T>) -> R) {
108105
mutex.onLock.registerSelectClause2(select, null) {
109-
doLockedNext(element)
106+
doLockedNext(element)?.let { throw it }
110107
block(this)
111108
}
112109
}
113110

114111
/*
115-
* This code is not trivial because of the two properties:
112+
* This code is not trivial because of the following properties:
116113
* 1. It ensures conformance to the reactive specification that mandates that onXXX invocations should not
117114
* be concurrent. It uses Mutex to protect all onXXX invocation and ensure conformance even when multiple
118115
* coroutines are invoking `send` function.
@@ -121,27 +118,60 @@ public class PublisherCoroutine<in T>(
121118
* globally-scoped coroutine that is invoking `send` outside of this context. Without extra precaution this may
122119
* lead to `onNext` that is concurrent with `onComplete/onError`, so that is why signalling for
123120
* `onComplete/onError` is also done under the same mutex.
121+
* 3. The reactive specification forbids emitting more elements than requested, so `onNext` is forbidden until the
122+
* subscriber actually requests some elements. This is implemented by the mutex being locked when emitting
123+
* elements is not permitted (`_nRequested.value == 0`).
124124
*/
125125

126-
// assert: mutex.isLocked()
127-
private fun doLockedNext(elem: T) {
128-
// check if already closed for send, note that isActive becomes false as soon as cancel() is invoked,
129-
// because the job is cancelled, so this check also ensure conformance to the reactive specification's
130-
// requirement that after cancellation requested we don't call onXXX
126+
/**
127+
* Attempts to emit a value to the subscriber and, if back-pressure permits this, unlock the mutex.
128+
*
129+
* Requires that the caller has locked the mutex before this invocation.
130+
*
131+
* If the channel is closed, returns the corresponding [Throwable]; otherwise, returns `null` to denote success.
132+
*
133+
* @throws NullPointerException if the passed element is `null`
134+
*/
135+
private fun doLockedNext(elem: T): Throwable? {
136+
if (elem == null) {
137+
throw NullPointerException("Can not emit null")
138+
}
139+
/** This guards against the case when the caller of this function managed to lock the mutex not because some
140+
* elements were requested--and thus it is permitted to call `onNext`--but because the channel was closed.
141+
*
142+
* It may look like there is a race condition here between `isActive` and a concurrent cancellation, but it's
143+
* okay for a cancellation to happen during `onNext`, as the reactive spec only requires that we *eventually*
144+
* stop signalling the subscriber. */
131145
if (!isActive) {
132146
unlockAndCheckCompleted()
133-
throw getCancellationException()
147+
return getCancellationException()
134148
}
135-
// notify subscriber
149+
// notify the subscriber
136150
try {
137151
subscriber.onNext(elem)
138-
} catch (e: Throwable) {
139-
// If onNext fails with exception, then we cancel coroutine (with this exception) and then rethrow it
140-
// to abort the corresponding send/offer invocation. From the standpoint of coroutines machinery,
141-
// this failure is essentially equivalent to a failure of a child coroutine.
142-
cancelCoroutine(e)
152+
} catch (cause: Throwable) {
153+
/** The reactive streams spec forbids the subscribers from throwing from [Subscriber.onNext] unless the
154+
* element is `null`, which we check not to be the case. Therefore, we report this exception to the handler
155+
* for uncaught exceptions and consider the subscription cancelled, as mandated by
156+
* https://github.com/reactive-streams/reactive-streams-jvm/blob/v1.0.3/README.md#2.13.
157+
*
158+
* Some reactive implementations, like RxJava or Reactor, are known to throw from [Subscriber.onNext] if the
159+
* execution encounters an exception they consider to be "fatal", like [VirtualMachineError] or
160+
* [ThreadDeath]. Us using the handler for the undeliverable exceptions to signal "fatal" exceptions is
161+
* inconsistent with RxJava and Reactor, which attempt to bubble the exception up the call chain as soon as
162+
* possible. However, we can't do much better here, as simply throwing from all methods indiscriminately
163+
* would violate the contracts we place on them. */
164+
cancelled = true
165+
val causeDelivered = close(cause)
143166
unlockAndCheckCompleted()
144-
throw e
167+
return if (causeDelivered) {
168+
// `cause` is the reason this channel is closed
169+
cause
170+
} else {
171+
// Someone else closed the channel during `onNext`. We report `cause` as an undeliverable exception.
172+
exceptionOnCancelHandler(cause, context)
173+
getCancellationException()
174+
}
145175
}
146176
// now update nRequested
147177
while (true) { // lock-free loop on nRequested
@@ -152,12 +182,13 @@ public class PublisherCoroutine<in T>(
152182
if (_nRequested.compareAndSet(current, updated)) {
153183
if (updated == 0L) {
154184
// return to keep locked due to back-pressure
155-
return
185+
return null
156186
}
157187
break // unlock if updated > 0
158188
}
159189
}
160190
unlockAndCheckCompleted()
191+
return null
161192
}
162193

163194
private fun unlockAndCheckCompleted() {
@@ -177,38 +208,31 @@ public class PublisherCoroutine<in T>(
177208
// assert: mutex.isLocked() & isCompleted
178209
private fun doLockedSignalCompleted(cause: Throwable?, handled: Boolean) {
179210
try {
180-
if (_nRequested.value >= CLOSED) {
181-
_nRequested.value = SIGNALLED // we'll signal onError/onCompleted (that the final state -- no CAS needed)
182-
// Specification requires that after cancellation requested we don't call onXXX
183-
if (cancelled) {
184-
// If the parent had failed to handle our exception, then we must not lose this exception
185-
if (cause != null && !handled) exceptionOnCancelHandler(cause, context)
186-
return
187-
}
188-
211+
if (_nRequested.value == SIGNALLED)
212+
return
213+
_nRequested.value = SIGNALLED // we'll signal onError/onCompleted (the final state, so no CAS needed)
214+
// Specification requires that after the cancellation is requested we eventually stop calling onXXX
215+
if (cancelled) {
216+
// If the parent had failed to handle our exception, then we must not lose this exception
217+
if (cause != null && !handled) exceptionOnCancelHandler(cause, context)
218+
return
219+
}
220+
if (cause == null) {
189221
try {
190-
if (cause != null && cause !is CancellationException) {
191-
/*
192-
* Reactive frameworks have two types of exceptions: regular and fatal.
193-
* Regular are passed to onError.
194-
* Fatal can be passed to onError, but even the standard implementations **can just swallow it** (e.g. see #1297).
195-
* Such behaviour is inconsistent, leads to silent failures and we can't possibly know whether
196-
* the cause will be handled by onError (and moreover, it depends on whether a fatal exception was
197-
* thrown by subscriber or upstream).
198-
* To make behaviour consistent and least surprising, we always handle fatal exceptions
199-
* by coroutines machinery, anyway, they should not be present in regular program flow,
200-
* thus our goal here is just to expose it as soon as possible.
201-
*/
202-
subscriber.onError(cause)
203-
if (!handled && cause.isFatal()) {
204-
exceptionOnCancelHandler(cause, context)
205-
}
206-
} else {
207-
subscriber.onComplete()
208-
}
222+
subscriber.onComplete()
209223
} catch (e: Throwable) {
210224
handleCoroutineException(context, e)
211225
}
226+
} else {
227+
try {
228+
// This can't be the cancellation exception from `cancel`, as then `cancelled` would be `true`.
229+
subscriber.onError(cause)
230+
} catch (e: Throwable) {
231+
if (e !== cause) {
232+
cause.addSuppressed(e)
233+
}
234+
handleCoroutineException(context, cause)
235+
}
212236
}
213237
} finally {
214238
mutex.unlock()
@@ -217,20 +241,25 @@ public class PublisherCoroutine<in T>(
217241

218242
override fun request(n: Long) {
219243
if (n <= 0) {
220-
// Specification requires IAE for n <= 0
244+
// Specification requires to call onError with IAE for n <= 0
221245
cancelCoroutine(IllegalArgumentException("non-positive subscription request $n"))
222246
return
223247
}
224248
while (true) { // lock-free loop for nRequested
225249
val cur = _nRequested.value
226-
if (cur < 0) return // already closed for send, ignore requests
250+
if (cur < 0) return // already closed for send, ignore requests, as mandated by the reactive streams spec
227251
var upd = cur + n
228252
if (upd < 0 || n == Long.MAX_VALUE)
229253
upd = Long.MAX_VALUE
230254
if (cur == upd) return // nothing to do
231255
if (_nRequested.compareAndSet(cur, upd)) {
232256
// unlock the mutex when we don't have back-pressure anymore
233257
if (cur == 0L) {
258+
/** In a sense, after a successful CAS, it is this invocation, not the coroutine itself, that owns
259+
* the lock, given that `upd` is necessarily strictly positive. Thus, no other operation has the
260+
* right to lower the value on [_nRequested], it can only grow or become [CLOSED]. Therefore, it is
261+
* impossible for any other operations to assume that they own the lock without actually acquiring
262+
* it. */
234263
unlockAndCheckCompleted()
235264
}
236265
return
@@ -271,8 +300,6 @@ public class PublisherCoroutine<in T>(
271300
cancelled = true
272301
super.cancel(null)
273302
}
274-
275-
private fun Throwable.isFatal() = this is VirtualMachineError || this is ThreadDeath || this is LinkageError
276303
}
277304

278305
@Deprecated(

0 commit comments

Comments
 (0)