Skip to content

Commit 63e30e3

Browse files
authored
fix: stuck queries when too many events with same timestamp (#586)
* fix: stuck queries when too many events with same timestamp * ci: add mima filters * ci: pin sqlserver to the previous version * test: same timestamp, overlapping seq numbers * suggestions from code review * track previous timestamp to only filter by seq nr when same timestamp * ci: update docker compose command for yugabyte tests
1 parent 6fc1f7e commit 63e30e3

File tree

10 files changed

+206
-17
lines changed

10 files changed

+206
-17
lines changed

.github/workflows/build-test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ jobs:
203203

204204
- name: Start DB
205205
run: |-
206-
docker-compose -f docker/docker-compose-yugabyte.yml up -d
206+
docker compose -f docker/docker-compose-yugabyte.yml up -d
207207
# TODO: could we poll the port instead of sleep?
208208
sleep 10
209209
docker exec -i yb-tserver-n1 /home/yugabyte/bin/ysqlsh -h yb-tserver-n1 -t < ddl-scripts/create_tables_yugabyte.sql
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# internals: fromSeqNr added to rowsBySlices
2+
ProblemFilters.exclude[DirectMissingMethodProblem]("akka.persistence.r2dbc.internal.BySliceQuery#Dao.rowsBySlices")
3+
ProblemFilters.exclude[ReversedMissingMethodProblem]("akka.persistence.r2dbc.internal.BySliceQuery#Dao.rowsBySlices")

core/src/main/scala/akka/persistence/r2dbc/internal/BySliceQuery.scala

+46-7
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,19 @@ import org.slf4j.Logger
3131

3232
object QueryState {
3333
val empty: QueryState =
34-
QueryState(TimestampOffset.Zero, 0, 0, 0, 0, backtrackingCount = 0, TimestampOffset.Zero, 0, 0, Buckets.empty)
34+
QueryState(
35+
latest = TimestampOffset.Zero,
36+
rowCount = 0,
37+
rowCountSinceBacktracking = 0,
38+
queryCount = 0,
39+
idleCount = 0,
40+
backtrackingCount = 0,
41+
latestBacktracking = TimestampOffset.Zero,
42+
latestBacktrackingSeenCount = 0,
43+
backtrackingExpectFiltered = 0,
44+
buckets = Buckets.empty,
45+
previous = TimestampOffset.Zero,
46+
previousBacktracking = TimestampOffset.Zero)
3547
}
3648

3749
final case class QueryState(
@@ -44,7 +56,9 @@ import org.slf4j.Logger
4456
latestBacktracking: TimestampOffset,
4557
latestBacktrackingSeenCount: Int,
4658
backtrackingExpectFiltered: Int,
47-
buckets: Buckets) {
59+
buckets: Buckets,
60+
previous: TimestampOffset,
61+
previousBacktracking: TimestampOffset) {
4862

4963
def backtracking: Boolean = backtrackingCount > 0
5064

@@ -56,6 +70,10 @@ import org.slf4j.Logger
5670
if (backtracking) latestBacktracking.timestamp
5771
else latest.timestamp
5872

73+
def nextQueryFromSeqNr: Option[Long] =
74+
if (backtracking) highestSeenSeqNr(previousBacktracking, latestBacktracking)
75+
else highestSeenSeqNr(previous, latest)
76+
5977
def nextQueryToTimestamp(atLeastNumberOfEvents: Int): Option[Instant] = {
6078
buckets.findTimeForLimit(nextQueryFromTimestamp, atLeastNumberOfEvents) match {
6179
case Some(t) =>
@@ -70,6 +88,12 @@ import org.slf4j.Logger
7088
}
7189
}
7290

91+
// only filter by highest seen seq nr when the next query is the same timestamp (or when unknown for initial queries)
92+
private def highestSeenSeqNr(previous: TimestampOffset, latest: TimestampOffset): Option[Long] =
93+
Option.when((previous == TimestampOffset.Zero || previous.timestamp == latest.timestamp) && latest.seen.nonEmpty) {
94+
latest.seen.values.max
95+
}
96+
7397
object Buckets {
7498
type EpochSeconds = Long
7599
type Count = Long
@@ -157,6 +181,7 @@ import org.slf4j.Logger
157181
minSlice: Int,
158182
maxSlice: Int,
159183
fromTimestamp: Instant,
184+
fromSeqNr: Option[Long], // for events with same timestamp as `fromTimestamp`
160185
toTimestamp: Option[Instant],
161186
behindCurrentTime: FiniteDuration,
162187
backtracking: Boolean): Source[SerializedRow, NotUsed]
@@ -212,7 +237,10 @@ import org.slf4j.Logger
212237
// so continue until rowCount is 0. That means an extra query at the end to make sure there are no
213238
// more to fetch.
214239
if (state.queryCount == 0L || state.rowCount > 0) {
215-
val newState = state.copy(rowCount = 0, queryCount = state.queryCount + 1)
240+
val newState = state.copy(rowCount = 0, queryCount = state.queryCount + 1, previous = state.latest)
241+
242+
val fromTimestamp = state.latest.timestamp
243+
val fromSeqNr = highestSeenSeqNr(state.previous, state.latest)
216244

217245
val toTimestamp = newState.nextQueryToTimestamp(settings.querySettings.bufferSize) match {
218246
case Some(t) =>
@@ -228,7 +256,7 @@ import org.slf4j.Logger
228256
state.queryCount,
229257
minSlice,
230258
maxSlice,
231-
state.latest.timestamp,
259+
fromTimestamp,
232260
toTimestamp,
233261
state.rowCount)
234262

@@ -238,7 +266,8 @@ import org.slf4j.Logger
238266
entityType,
239267
minSlice,
240268
maxSlice,
241-
state.latest.timestamp,
269+
fromTimestamp,
270+
fromSeqNr,
242271
toTimestamp = Some(toTimestamp),
243272
behindCurrentTime = Duration.Zero,
244273
backtracking = false)
@@ -312,7 +341,11 @@ import org.slf4j.Logger
312341
s"Unexpected offset [$offset] before latestBacktracking [${state.latestBacktracking}].")
313342

314343
val newSeenCount =
315-
if (offset.timestamp == state.latestBacktracking.timestamp) state.latestBacktrackingSeenCount + 1 else 1
344+
if (offset.timestamp == state.latestBacktracking.timestamp &&
345+
highestSeenSeqNr(state.previousBacktracking, offset) ==
346+
highestSeenSeqNr(state.previousBacktracking, state.latestBacktracking))
347+
state.latestBacktrackingSeenCount + 1
348+
else 1
316349

317350
state.copy(
318351
latestBacktracking = offset,
@@ -420,6 +453,7 @@ import org.slf4j.Logger
420453
else settings.querySettings.behindCurrentTime
421454

422455
val fromTimestamp = newState.nextQueryFromTimestamp
456+
val fromSeqNr = newState.nextQueryFromSeqNr
423457
val toTimestamp = newState.nextQueryToTimestamp(settings.querySettings.bufferSize)
424458

425459
if (log.isDebugEnabled()) {
@@ -446,14 +480,19 @@ import org.slf4j.Logger
446480
else s"Found [${state.rowCount}] rows in previous query.")
447481
}
448482

449-
newState ->
483+
val newStateWithPrevious =
484+
if (newState.backtracking) newState.copy(previousBacktracking = newState.latestBacktracking)
485+
else newState.copy(previous = newState.latest)
486+
487+
newStateWithPrevious ->
450488
Some(
451489
dao
452490
.rowsBySlices(
453491
entityType,
454492
minSlice,
455493
maxSlice,
456494
fromTimestamp,
495+
fromSeqNr,
457496
toTimestamp,
458497
behindCurrentTime,
459498
backtracking = newState.backtracking)

core/src/main/scala/akka/persistence/r2dbc/internal/h2/H2QueryDao.scala

+5-1
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,17 @@ private[r2dbc] class H2QueryDao(executorProvider: R2dbcExecutorProvider) extends
2424
override protected lazy val log: Logger = LoggerFactory.getLogger(classOf[H2QueryDao])
2525

2626
override protected def eventsBySlicesRangeSql(
27+
fromSeqNrParam: Boolean,
2728
toDbTimestampParam: Boolean,
2829
behindCurrentTime: FiniteDuration,
2930
backtracking: Boolean,
3031
minSlice: Int,
3132
maxSlice: Int): String = {
3233
// not caching, too many combinations
3334

35+
def fromSeqNrParamCondition =
36+
if (fromSeqNrParam) "AND (db_timestamp != ? OR seq_nr >= ?)" else ""
37+
3438
def toDbTimestampParamCondition =
3539
if (toDbTimestampParam) "AND db_timestamp <= ?" else ""
3640

@@ -51,7 +55,7 @@ private[r2dbc] class H2QueryDao(executorProvider: R2dbcExecutorProvider) extends
5155
FROM ${journalTable(minSlice)}
5256
WHERE entity_type = ?
5357
AND ${sliceCondition(minSlice, maxSlice)}
54-
AND db_timestamp >= ? $toDbTimestampParamCondition $behindCurrentTimeIntervalCondition
58+
AND db_timestamp >= ? $fromSeqNrParamCondition $toDbTimestampParamCondition $behindCurrentTimeIntervalCondition
5559
AND deleted = false
5660
ORDER BY db_timestamp, seq_nr
5761
LIMIT ?"""

core/src/main/scala/akka/persistence/r2dbc/internal/postgres/PostgresDurableStateDao.scala

+1
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,7 @@ private[r2dbc] class PostgresDurableStateDao(executorProvider: R2dbcExecutorProv
653653
minSlice: Int,
654654
maxSlice: Int,
655655
fromTimestamp: Instant,
656+
fromSeqNr: Option[Long],
656657
toTimestamp: Option[Instant],
657658
behindCurrentTime: FiniteDuration,
658659
backtracking: Boolean): Source[SerializedStateRow, NotUsed] = {

core/src/main/scala/akka/persistence/r2dbc/internal/postgres/PostgresQueryDao.scala

+25-5
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,22 @@ private[r2dbc] class PostgresQueryDao(executorProvider: R2dbcExecutorProvider) e
6969
"SELECT CURRENT_TIMESTAMP AS db_timestamp"
7070

7171
protected def eventsBySlicesRangeSql(
72+
fromSeqNrParam: Boolean,
7273
toDbTimestampParam: Boolean,
7374
behindCurrentTime: FiniteDuration,
7475
backtracking: Boolean,
7576
minSlice: Int,
7677
maxSlice: Int): String = {
7778
// not caching, too many combinations
7879

80+
// If more events than the buffer size are all on the same timestamp, then the query will get stuck on that same
81+
// timestamp. Avoid this by also starting from the highest seen sequence number for that timestamp, using the fact
82+
// that events are ordered by db_timestamp, seq_nr. Note that sequence numbers are per persistence id, so a later
83+
// timestamp can have an earlier sequence number. Add a logical conditional only when db_timestamp = fromTimestamp
84+
// to also filter for seq_nr >= fromSeqNr. Expressed in a logically equivalent form, where A -> B === ~A v B.
85+
def fromSeqNrParamCondition =
86+
if (fromSeqNrParam) "AND (db_timestamp != ? OR seq_nr >= ?)" else ""
87+
7988
def toDbTimestampParamCondition =
8089
if (toDbTimestampParam) "AND db_timestamp <= ?" else ""
8190

@@ -96,7 +105,7 @@ private[r2dbc] class PostgresQueryDao(executorProvider: R2dbcExecutorProvider) e
96105
FROM ${journalTable(minSlice)}
97106
WHERE entity_type = ?
98107
AND ${sliceCondition(minSlice, maxSlice)}
99-
AND db_timestamp >= ? $toDbTimestampParamCondition $behindCurrentTimeIntervalCondition
108+
AND db_timestamp >= ? $fromSeqNrParamCondition $toDbTimestampParamCondition $behindCurrentTimeIntervalCondition
100109
AND deleted = false
101110
ORDER BY db_timestamp, seq_nr
102111
LIMIT ?"""
@@ -209,16 +218,25 @@ private[r2dbc] class PostgresQueryDao(executorProvider: R2dbcExecutorProvider) e
209218
stmt: Statement,
210219
entityType: String,
211220
fromTimestamp: Instant,
221+
fromSeqNr: Option[Long],
212222
toTimestamp: Option[Instant]): Statement = {
213223
stmt
214224
.bind(0, entityType)
215225
.bindTimestamp(1, fromTimestamp)
226+
val index1 = 2
227+
val index2 = fromSeqNr match {
228+
case Some(seqNr) =>
229+
stmt.bindTimestamp(index1, fromTimestamp)
230+
stmt.bind(index1 + 1, seqNr)
231+
index1 + 2
232+
case None => index1
233+
}
216234
toTimestamp match {
217235
case Some(until) =>
218-
stmt.bindTimestamp(2, until)
219-
stmt.bind(3, settings.querySettings.bufferSize)
236+
stmt.bindTimestamp(index2, until)
237+
stmt.bind(index2 + 1, settings.querySettings.bufferSize)
220238
case None =>
221-
stmt.bind(2, settings.querySettings.bufferSize)
239+
stmt.bind(index2, settings.querySettings.bufferSize)
222240
}
223241
}
224242

@@ -227,6 +245,7 @@ private[r2dbc] class PostgresQueryDao(executorProvider: R2dbcExecutorProvider) e
227245
minSlice: Int,
228246
maxSlice: Int,
229247
fromTimestamp: Instant,
248+
fromSeqNr: Option[Long],
230249
toTimestamp: Option[Instant],
231250
behindCurrentTime: FiniteDuration,
232251
backtracking: Boolean): Source[SerializedJournalRow, NotUsed] = {
@@ -241,12 +260,13 @@ private[r2dbc] class PostgresQueryDao(executorProvider: R2dbcExecutorProvider) e
241260
val stmt = connection
242261
.createStatement(
243262
eventsBySlicesRangeSql(
263+
fromSeqNrParam = fromSeqNr.isDefined,
244264
toDbTimestampParam = toTimestamp.isDefined,
245265
behindCurrentTime,
246266
backtracking,
247267
minSlice,
248268
maxSlice))
249-
bindEventsBySlicesRangeSql(stmt, entityType, fromTimestamp, toTimestamp)
269+
bindEventsBySlicesRangeSql(stmt, entityType, fromTimestamp, fromSeqNr, toTimestamp)
250270
},
251271
row =>
252272
if (backtracking)

core/src/main/scala/akka/persistence/r2dbc/internal/postgres/PostgresSnapshotDao.scala

+1
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ private[r2dbc] class PostgresSnapshotDao(executorProvider: R2dbcExecutorProvider
388388
minSlice: Int,
389389
maxSlice: Int,
390390
fromTimestamp: Instant,
391+
fromSeqNr: Option[Long],
391392
toTimestamp: Option[Instant],
392393
behindCurrentTime: FiniteDuration,
393394
backtracking: Boolean): Source[SerializedSnapshotRow, NotUsed] = {

core/src/main/scala/akka/persistence/r2dbc/internal/sqlserver/SqlServerQueryDao.scala

+7-1
Original file line numberDiff line numberDiff line change
@@ -112,13 +112,17 @@ private[r2dbc] class SqlServerQueryDao(executorProvider: R2dbcExecutorProvider)
112112
}
113113

114114
override protected def eventsBySlicesRangeSql(
115+
fromSeqNrParam: Boolean,
115116
toDbTimestampParam: Boolean,
116117
behindCurrentTime: FiniteDuration,
117118
backtracking: Boolean,
118119
minSlice: Int,
119120
maxSlice: Int): String = {
120121
// not caching, too many combinations
121122

123+
def fromSeqNrParamCondition =
124+
if (fromSeqNrParam) "AND (db_timestamp != @from OR seq_nr >= @fromSeqNr)" else ""
125+
122126
def toDbTimestampParamCondition =
123127
if (toDbTimestampParam) "AND db_timestamp <= @until" else ""
124128

@@ -139,7 +143,7 @@ private[r2dbc] class SqlServerQueryDao(executorProvider: R2dbcExecutorProvider)
139143
FROM ${journalTable(minSlice)}
140144
WHERE entity_type = @entityType
141145
AND ${sliceCondition(minSlice, maxSlice)}
142-
AND db_timestamp >= @from $toDbTimestampParamCondition $behindCurrentTimeIntervalCondition
146+
AND db_timestamp >= @from $fromSeqNrParamCondition $toDbTimestampParamCondition $behindCurrentTimeIntervalCondition
143147
AND deleted = $sqlFalse
144148
ORDER BY db_timestamp, seq_nr"""
145149
}
@@ -148,11 +152,13 @@ private[r2dbc] class SqlServerQueryDao(executorProvider: R2dbcExecutorProvider)
148152
stmt: Statement,
149153
entityType: String,
150154
fromTimestamp: Instant,
155+
fromSeqNr: Option[Long],
151156
toTimestamp: Option[Instant]): Statement = {
152157
stmt
153158
.bind("@limit", settings.querySettings.bufferSize)
154159
.bind("@entityType", entityType)
155160
.bindTimestamp("@from", fromTimestamp)
161+
fromSeqNr.foreach(seqNr => stmt.bind("@fromSeqNr", seqNr))
156162
toTimestamp.foreach(timestamp => stmt.bindTimestamp("@until", timestamp))
157163
stmt
158164
}

0 commit comments

Comments
 (0)