@@ -19,7 +19,11 @@ type t = {
1919
2020let getActiveSource = sourceManager => sourceManager .activeSource
2121
22- let makeGetHeightRetryInterval = (~initialRetryInterval , ~backoffMultiplicative , ~maxRetryInterval ) => {
22+ let makeGetHeightRetryInterval = (
23+ ~initialRetryInterval ,
24+ ~backoffMultiplicative ,
25+ ~maxRetryInterval ,
26+ ) => {
2327 (~retry : int ) => {
2428 let backoff = if retry === 0 {
2529 1
@@ -35,7 +39,11 @@ let make = (
3539 ~maxPartitionConcurrency ,
3640 ~newBlockFallbackStallTimeout = 20_000 ,
3741 ~stalledPollingInterval = 5_000 ,
38- ~getHeightRetryInterval = makeGetHeightRetryInterval (~initialRetryInterval = 1000 , ~backoffMultiplicative = 2 , ~maxRetryInterval = 60_000 ),
42+ ~getHeightRetryInterval = makeGetHeightRetryInterval (
43+ ~initialRetryInterval = 1000 ,
44+ ~backoffMultiplicative = 2 ,
45+ ~maxRetryInterval = 60_000 ,
46+ ),
3947) => {
4048 let initialActiveSource = switch sources -> Js .Array2 .find (source => source .sourceFor === Sync ) {
4149 | None => Js .Exn .raiseError ("Invalid configuration, no data-source for historical sync provided" )
@@ -244,6 +252,10 @@ let getNextSyncSource = (
244252 sourceManager ,
245253 // This is needed to include the Fallback source to rotation
246254 ~initialSource ,
255+ // After multiple failures start returning fallback sources as well
256+ // But don't try it when main sync sources fail because of invalid configuration
257+ // note: The logic might be changed in the future
258+ ~attemptFallbacks = false ,
247259) => {
248260 let before = []
249261 let after = []
@@ -256,7 +268,7 @@ let getNextSyncSource = (
256268 } else if (
257269 switch source .sourceFor {
258270 | Sync => true
259- | Fallback => source === initialSource
271+ | Fallback => attemptFallbacks || source === initialSource
260272 }
261273 ) {
262274 (hasActive .contents ? after : before )-> Array .push (source )
@@ -291,11 +303,13 @@ let executeQuery = async (sourceManager: t, ~query: FetchState.query, ~currentBl
291303 },
292304 )
293305 let responseRef = ref (None )
306+ let retryRef = ref (0 )
294307 let initialSource = sourceManager .activeSource
295308
296309 while responseRef .contents -> Option .isNone {
297310 let source = sourceManager .activeSource
298311 let toBlock = toBlockRef .contents
312+ let retry = retryRef .contents
299313
300314 let logger = Logging .createChild (
301315 ~params = {
@@ -306,6 +320,7 @@ let executeQuery = async (sourceManager: t, ~query: FetchState.query, ~currentBl
306320 "fromBlock" : query .fromBlock ,
307321 "toBlock" : toBlock ,
308322 "addresses" : addresses ,
323+ "retry" : retry ,
309324 },
310325 )
311326
@@ -317,6 +332,7 @@ let executeQuery = async (sourceManager: t, ~query: FetchState.query, ~currentBl
317332 ~partitionId = query .partitionId ,
318333 ~currentBlockHeight ,
319334 ~selection = query .selection ,
335+ ~retry ,
320336 ~logger ,
321337 )
322338 logger -> Logging .childTrace ({
@@ -339,7 +355,10 @@ let executeQuery = async (sourceManager: t, ~query: FetchState.query, ~currentBl
339355 let notAlreadyDeleted = sourceManager .sources -> Utils .Set .delete (source )
340356
341357 if nextSource === source {
342- exn -> ErrorHandling .mkLogAndRaise (~logger , ~msg = "The indexer doesn't have data-sources which can continue fetching. Please, check the error logs or reach out to the Envio team." )
358+ exn -> ErrorHandling .mkLogAndRaise (
359+ ~logger ,
360+ ~msg = "The indexer doesn't have data-sources which can continue fetching. Please, check the error logs or reach out to the Envio team." ,
361+ )
343362 } else {
344363 // In case there are multiple partitions
345364 // failing at the same time. Log only once
@@ -361,33 +380,60 @@ let executeQuery = async (sourceManager: t, ~query: FetchState.query, ~currentBl
361380 "source" : nextSource .name ,
362381 })
363382 sourceManager .activeSource = nextSource
383+ retryRef := 0
364384 }
365385 }
366386 | FailedGettingItems ({attemptedToBlock , retry : WithSuggestedToBlock ({toBlock })}) =>
367387 logger -> Logging .childTrace ({
368- "msg" : "Failed getting data for the block range. Retrying with the suggested block range from response." ,
388+ "msg" : "Failed getting data for the block range. Immediately retrying with the suggested block range from response." ,
369389 "toBlock" : attemptedToBlock ,
370390 "suggestedToBlock" : toBlock ,
371391 })
372392 toBlockRef := Some (toBlock )
373- | FailedGettingItems ({exn , attemptedToBlock , retry : WithBackoff ({backoffMillis })}) =>
374- let nextSource = sourceManager -> getNextSyncSource (~initialSource )
375- let hasAnotherSyncSource = nextSource !== source
376- logger -> Logging .childTrace ({
377- "msg" : ` Failed getting data for the block range. Will try smaller block range for the next attempt.` ,
393+ retryRef := 0
394+ | FailedGettingItems ({exn , attemptedToBlock , retry : WithBackoff ({message , backoffMillis })}) =>
395+ // Starting from the 11th failure (retry=10)
396+ // include fallback sources for switch
397+ // (previously it would consider only sync sources or the initial one)
398+ // This is a little bit tricky to find the right number,
399+ // because meaning between RPC and HyperSync is different for the error
400+ // but since Fallback was initially designed to be used only for height check
401+ // just keep the value high
402+ let attemptFallbacks = retry >= 10
403+
404+ let nextSource = switch retry {
405+ // Don't attempt a switch on first two failure
406+ | 0 | 1 => source
407+ | _ =>
408+ // Then try to switch every second failure
409+ if retry -> mod (2 ) === 0 {
410+ sourceManager -> getNextSyncSource (~initialSource , ~attemptFallbacks )
411+ } else {
412+ source
413+ }
414+ }
415+
416+ // Start displaying warnings after 4 failures
417+ let log = retry >= 4 ? Logging .childWarn : Logging .childTrace
418+ logger -> log ({
419+ "msg" : message ,
378420 "toBlock" : attemptedToBlock ,
379421 "backOffMilliseconds" : backoffMillis ,
380- "err" : exn ,
422+ "retry" : retry ,
423+ "err" : exn -> ErrorHandling .prettifyExn ,
381424 })
382- if hasAnotherSyncSource {
425+
426+ let shouldSwitch = nextSource !== source
427+ if shouldSwitch {
383428 logger -> Logging .childInfo ({
384429 "msg" : "Switching to another data-source" ,
385430 "source" : nextSource .name ,
386431 })
387432 sourceManager .activeSource = nextSource
388433 } else {
389- await Utils .delay (backoffMillis )
434+ await Utils .delay (Pervasives . min ( backoffMillis , 60_000 ) )
390435 }
436+ retryRef := retryRef .contents + 1
391437 }
392438 // TODO: Handle more error cases and hang/retry instead of throwing
393439 | exn => exn -> ErrorHandling .mkLogAndRaise (~logger , ~msg = "Failed to fetch block Range" )
0 commit comments