Skip to content

Commit

Permalink
Clear transient disconnect timeouts per spec
Browse files Browse the repository at this point in the history
Based on the spec referenced in 20f21c7.

Resolves #48.
  • Loading branch information
lawrence-forooghian committed Oct 30, 2024
1 parent f48c14a commit 1118faa
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 17 deletions.
46 changes: 41 additions & 5 deletions Sources/AblyChat/RoomLifecycleManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {
/// - the manager has performed all status changes provoked by the state change
/// - the manager has performed all contributor actions provoked by the state change, namely calls to ``RoomLifecycleContributorChannel.detach()`` or ``RoomLifecycleContributor.emitDiscontinuity(_:)``
/// - the manager has recorded all transient disconnect timeouts provoked by the state change (you can retrieve this information using ``testsOnly_hasTransientDisconnectTimeout(for:) or ``testsOnly_idOfTransientDisconnectTimeout(for:)``)
/// - the manager has performed all transient disconnect timeouts cancellations by the state change (you can retrieve this information using ``testsOnly_hasTransientDisconnectTimeout(for:) or ``testsOnly_idOfTransientDisconnectTimeout(for:)``)
internal func testsOnly_subscribeToHandledContributorStateChanges() -> Subscription<ARTChannelStateChange> {
let subscription = Subscription<ARTChannelStateChange>(bufferingPolicy: .unbounded)
stateChangeHandledSubscriptions.append(subscription)
Expand All @@ -318,6 +319,10 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {
contributorAnnotations[contributor].hasTransientDisconnectTimeout
}

internal var testsOnly_hasTransientDisconnectTimeoutForAnyContributor: Bool {
contributors.contains { testsOnly_hasTransientDisconnectTimeout(for: $0) }
}

internal func testsOnly_idOfTransientDisconnectTimeout(for contributor: Contributor) -> UUID? {
contributorAnnotations[contributor].transientDisconnectTimeout?.id
}
Expand Down Expand Up @@ -364,11 +369,16 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {

contributorAnnotations[contributor].pendingDiscontinuityEvents.append(reason)
}
} else if status != .attached {
if await (contributors.async.map { await $0.channel.state }.allSatisfy { $0 == .attached }) {
// CHA-RL4b8
logger.log(message: "Now that all contributors are ATTACHED, transitioning room to ATTACHED", level: .info)
changeStatus(to: .attached)
} else {
// CHA-RL4b10
clearTransientDisconnectTimeouts(for: contributor)

if status != .attached {
if await (contributors.async.map { await $0.channel.state }.allSatisfy { $0 == .attached }) {
// CHA-RL4b8
logger.log(message: "Now that all contributors are ATTACHED, transitioning room to ATTACHED", level: .info)
changeStatus(to: .attached)
}
}
}
case .failed:
Expand All @@ -379,6 +389,7 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {
preconditionFailure("FAILED state change event should have a reason")
}

clearTransientDisconnectTimeouts()
changeStatus(to: .failed(error: reason))

// TODO: CHA-RL4b5 is a bit unclear about how to handle failure, and whether they can be detached concurrently (asked in https://github.com/ably/specification/pull/200/files#r1777471810)
Expand All @@ -398,6 +409,8 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {
preconditionFailure("SUSPENDED state change event should have a reason")
}

clearTransientDisconnectTimeouts()

changeStatus(to: .suspended(error: reason))
}
case .attaching:
Expand All @@ -409,6 +422,8 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {
transientDisconnectTimeout.task = Task {
do {
try await clock.sleep(timeInterval: 5)
// clock.sleep throws an error if the Task gets cancelled, but for my peace of mind and to make my intentions clear (i.e. that the status change is not meant to happen if the Task is cancelled, let’s have this too
try Task.checkCancellation()
} catch {
logger.log(message: "Transient disconnect timeout \(transientDisconnectTimeout.id) for \(contributor) was interrupted, error \(error)", level: .debug)
}
Expand All @@ -429,6 +444,22 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {
#endif
}

private func clearTransientDisconnectTimeouts(for contributor: Contributor) {
guard let transientDisconnectTimeout = contributorAnnotations[contributor].transientDisconnectTimeout else {
return
}

logger.log(message: "Clearing transient disconnect timeout \(transientDisconnectTimeout.id) for \(contributor)", level: .debug)
transientDisconnectTimeout.task?.cancel()
contributorAnnotations[contributor].transientDisconnectTimeout = nil
}

private func clearTransientDisconnectTimeouts() {
for contributor in contributors {
clearTransientDisconnectTimeouts(for: contributor)
}
}

// MARK: - Operation handling

/// Whether the room lifecycle manager currently has a room lifecycle operation in progress.
Expand Down Expand Up @@ -614,6 +645,9 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {
}
}

// CHA-RL1g3
clearTransientDisconnectTimeouts()

// CHA-RL1g1
changeStatus(to: .attached)

Expand Down Expand Up @@ -684,6 +718,7 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {
}

// CHA-RL2e
clearTransientDisconnectTimeouts()
changeStatus(to: .detaching(detachOperationID: operationID))

// CHA-RL2f
Expand Down Expand Up @@ -774,6 +809,7 @@ internal actor RoomLifecycleManager<Contributor: RoomLifecycleContributor> {
}

// CHA-RL3l
clearTransientDisconnectTimeouts()
changeStatus(to: .releasing(releaseOperationID: operationID))

// CHA-RL3d
Expand Down
118 changes: 106 additions & 12 deletions Tests/AblyChatTests/RoomLifecycleManagerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,23 @@ struct RoomLifecycleManagerTests {
}
}

// @spec CHA-RL1g3
@Test
func attach_uponSuccess_clearsTransientDisconnectTimeouts() async throws {
// Given: A RoomLifecycleManager, all of whose contributors’ calls to `attach` succeed
let contributors = (1 ... 3).map { _ in createContributor(attachBehavior: .complete(.success)) }
let manager = await createManager(
forTestingWhatHappensWhenHasTransientDisconnectTimeoutForTheseContributorIDs: [contributors[1].id],
contributors: contributors
)

// When: `performAttachOperation()` is called on the lifecycle manager
try await manager.performAttachOperation()

// Then: It clears all transient disconnect timeouts
#expect(await !manager.testsOnly_hasTransientDisconnectTimeoutForAnyContributor)
}

// @spec CHA-RL1h2
// @specOneOf(1/2) CHA-RL1h1 - tests that an error gets thrown when channel attach fails due to entering SUSPENDED (TODO: but I don’t yet fully understand the meaning of CHA-RL1h1; outstanding question https://github.com/ably/specification/pull/200/files#r1765476610)
// @specPartial CHA-RL1h3 - Have tested the failure of the operation and the error that’s thrown. Have not yet implemented the "enter the recovery loop" (TODO: https://github.com/ably-labs/ably-chat-swift/issues/50)
Expand Down Expand Up @@ -511,22 +528,29 @@ struct RoomLifecycleManagerTests {
}
}

// @specPartial CHA-RL2e - Haven’t implemented the part that refers to "transient disconnect timeouts"; TODO do this (https://github.com/ably-labs/ably-chat-swift/issues/48)
// @spec CHA-RL2e
@Test
func detach_transitionsToDetaching() async throws {
// Given: A RoomLifecycleManager, with a contributor on whom calling `detach()` will not complete until after the "Then" part of this test (the motivation for this is to suppress the room from transitioning to DETACHED, so that we can assert its current state as being DETACHING)
let contributorDetachOperation = SignallableChannelOperation()

let manager = await createManager(contributors: [createContributor(detachBehavior: contributorDetachOperation.behavior)])
let contributor = createContributor(detachBehavior: contributorDetachOperation.behavior)

let manager = await createManager(
// We set a transient disconnect timeout, just so we can check that it gets cleared, as the spec point specifies
forTestingWhatHappensWhenHasTransientDisconnectTimeoutForTheseContributorIDs: [contributor.id],
contributors: [contributor]
)
let statusChangeSubscription = await manager.onChange(bufferingPolicy: .unbounded)
async let statusChange = statusChangeSubscription.first { _ in true }

// When: `performDetachOperation()` is called on the lifecycle manager
async let _ = try await manager.performDetachOperation()

// Then: It emits a status change to DETACHING, and its current state is DETACHING
// Then: It emits a status change to DETACHING, its current state is DETACHING, and it clears transient disconnect timeouts
#expect(try #require(await statusChange).current == .detaching)
#expect(await manager.current == .detaching)
#expect(await !manager.testsOnly_hasTransientDisconnectTimeoutForAnyContributor)

// Post-test: Now that we’ve seen the DETACHING state, allow the contributor `detach` call to complete
contributorDetachOperation.complete(result: .success)
Expand Down Expand Up @@ -720,22 +744,29 @@ struct RoomLifecycleManagerTests {
#expect(await contributor.channel.detachCallCount == 1)
}

// @specPartial CHA-RL3l - Haven’t implemented the part that refers to "transient disconnect timeouts"; TODO do this (https://github.com/ably-labs/ably-chat-swift/issues/48)
// @spec CHA-RL3l
@Test
func release_transitionsToReleasing() async throws {
// Given: A RoomLifecycleManager, with a contributor on whom calling `detach()` will not complete until after the "Then" part of this test (the motivation for this is to suppress the room from transitioning to RELEASED, so that we can assert its current state as being RELEASING)
let contributorDetachOperation = SignallableChannelOperation()

let manager = await createManager(contributors: [createContributor(detachBehavior: contributorDetachOperation.behavior)])
let contributor = createContributor(detachBehavior: contributorDetachOperation.behavior)

let manager = await createManager(
// We set a transient disconnect timeout, just so we can check that it gets cleared, as the spec point specifies
forTestingWhatHappensWhenHasTransientDisconnectTimeoutForTheseContributorIDs: [contributor.id],
contributors: [contributor]
)
let statusChangeSubscription = await manager.onChange(bufferingPolicy: .unbounded)
async let statusChange = statusChangeSubscription.first { _ in true }

// When: `performReleaseOperation()` is called on the lifecycle manager
async let _ = await manager.performReleaseOperation()

// Then: It emits a status change to RELEASING, and its current state is RELEASING
// Then: It emits a status change to RELEASING, its current state is RELEASING, and it clears transient disconnect timeouts
#expect(try #require(await statusChange).current == .releasing)
#expect(await manager.current == .releasing)
#expect(await !manager.testsOnly_hasTransientDisconnectTimeoutForAnyContributor)

// Post-test: Now that we’ve seen the RELEASING state, allow the contributor `detach` call to complete
contributorDetachOperation.complete(result: .success)
Expand Down Expand Up @@ -959,17 +990,23 @@ struct RoomLifecycleManagerTests {
#expect(pendingDiscontinuityEvent === contributorStateChange.reason)
}

// @specPartial CHA-RL4b5 - Haven’t implemented the part that refers to "transient disconnect timeouts"; TODO do this (https://github.com/ably-labs/ably-chat-swift/issues/48)
// @spec CHA-RL4b5
@Test
func contributorFailedEvent_withNoOperationInProgress() async throws {
// Given: A RoomLifecycleManager, with no room lifecycle operation in progress
let contributors = [
// TODO: The .success is currently arbitrary since the spec doesn’t say what to do if detach fails (have asked in https://github.com/ably/specification/pull/200#discussion_r1777471810)
createContributor(detachBehavior: .success),
createContributor(detachBehavior: .success),
createContributor(detachBehavior: .success),
]
let manager = await createManager(
forTestingWhatHappensWhenCurrentlyIn: .initialized, // case arbitrary, just care that no operation is in progress
forTestingWhatHappensWhenHasTransientDisconnectTimeoutForTheseContributorIDs: [
// Give 2 of the 3 contributors a transient disconnect timeout, so we can test that _all_ such timeouts get cleared (as the spec point specifies), not just those for the FAILED contributor
contributors[0].id,
contributors[1].id,
],
contributors: contributors
)

Expand All @@ -992,12 +1029,15 @@ struct RoomLifecycleManagerTests {
// Then:
// - the room status transitions to failed, with the error of the status change being the `reason` of the contributor FAILED event
// - and it calls `detach` on all contributors
// - it clears all transient disconnect timeouts
_ = try #require(await failedStatusChange)
#expect(await manager.current.isFailed)

for contributor in contributors {
#expect(await contributor.channel.detachCallCount == 1)
}

#expect(await !manager.testsOnly_hasTransientDisconnectTimeoutForAnyContributor)
}

// @spec CHA-RL4b6
Expand Down Expand Up @@ -1081,6 +1121,44 @@ struct RoomLifecycleManagerTests {
#expect(await !manager.testsOnly_hasTransientDisconnectTimeout(for: contributor))
}

// @spec CHA-RL4b10
@Test
func contributorAttachedEvent_withNoOperationInProgress_clearsTransientDisconnectTimeouts() async throws {
// Given: A RoomLifecycleManager, with no room lifecycle operation in progress
let contributorThatWillEmitAttachedStateChange = createContributor()
let contributors = [
contributorThatWillEmitAttachedStateChange,
createContributor(),
createContributor(),
]
let manager = await createManager(
forTestingWhatHappensWhenCurrentlyIn: .initialized, // case arbitrary, just care that no operation is in progress
forTestingWhatHappensWhenHasTransientDisconnectTimeoutForTheseContributorIDs: [
// Give 2 of the 3 contributors a transient disconnect timeout, so we can test that only the timeout for the ATTACHED contributor gets cleared, not all of them
contributorThatWillEmitAttachedStateChange.id,
contributors[1].id,
],
contributors: contributors
)

// When: A contributor emits a state change to ATTACHED
let contributorAttachedStateChange = ARTChannelStateChange(
current: .attached,
previous: .attaching, // arbitrary
event: .attached,
reason: nil // arbitrary
)

await waitForManager(manager, toHandleContributorStateChange: contributorAttachedStateChange) {
await contributorThatWillEmitAttachedStateChange.channel.emitStateChange(contributorAttachedStateChange)
}

// Then: The manager clears any transient disconnect timeout for that contributor
#expect(await !manager.testsOnly_hasTransientDisconnectTimeout(for: contributorThatWillEmitAttachedStateChange))
// check the timeout for the other contributors didn’t get cleared
#expect(await manager.testsOnly_hasTransientDisconnectTimeout(for: contributors[1]))
}

// @specOneOf(1/2) CHA-RL4b8
@Test
func contributorAttachedEvent_withNoOperationInProgress_roomNotAttached_allContributorsAttached() async throws {
Expand Down Expand Up @@ -1146,14 +1224,24 @@ struct RoomLifecycleManagerTests {
#expect(await manager.current == initialManagerStatus.toRoomLifecycle)
}

// @specPartial CHA-RL4b9 - Haven’t implemented the part that refers to "transient disconnect timeouts"; TODO do this (https://github.com/ably-labs/ably-chat-swift/issues/48). Nor have I implemented "the room enters the RETRY loop"; TODO do this (https://github.com/ably-labs/ably-chat-swift/issues/51)
// @specPartial CHA-RL4b9 - Haven’t implemented "the room enters the RETRY loop"; TODO do this (https://github.com/ably-labs/ably-chat-swift/issues/51)
@Test
func contributorSuspendedEvent_withNoOperationInProgress() async throws {
// Given: A RoomLifecycleManager with no lifecycle operation in progress
let contributor = createContributor()
let contributorThatWillEmitStateChange = createContributor()
let contributors = [
contributorThatWillEmitStateChange,
createContributor(),
createContributor(),
]
let manager = await createManager(
forTestingWhatHappensWhenCurrentlyIn: .initialized, // case arbitrary, just care that no operation is in progress
contributors: [contributor]
// Give 2 of the 3 contributors a transient disconnect timeout, so we can test that _all_ such timeouts get cleared (as the spec point specifies), not just those for the SUSPENDED contributor
forTestingWhatHappensWhenHasTransientDisconnectTimeoutForTheseContributorIDs: [
contributorThatWillEmitStateChange.id,
contributors[1].id,
],
contributors: [contributorThatWillEmitStateChange]
)

let roomStatusSubscription = await manager.onChange(bufferingPolicy: .unbounded)
Expand All @@ -1169,12 +1257,18 @@ struct RoomLifecycleManagerTests {
resumed: false // arbitrary
)

await contributor.channel.emitStateChange(contributorStateChange)
await waitForManager(manager, toHandleContributorStateChange: contributorStateChange) {
await contributorThatWillEmitStateChange.channel.emitStateChange(contributorStateChange)
}

// Then: The room transitions to SUSPENDED, and this state change has error equal to the contributor state change’s `reason`
// Then:
// - The room transitions to SUSPENDED, and this state change has error equal to the contributor state change’s `reason`
// - All transient disconnect timeouts are cancelled
let suspendedRoomStatusChange = try #require(await maybeSuspendedRoomStatusChange)
#expect(suspendedRoomStatusChange.error === contributorStateChangeReason)

#expect(await manager.current == .suspended(error: contributorStateChangeReason))

#expect(await !manager.testsOnly_hasTransientDisconnectTimeoutForAnyContributor)
}
}

0 comments on commit 1118faa

Please sign in to comment.