Skip to content

Commit adb7bbc

Browse files
authored
Merge pull request #82 from keep-network/balance-monitoring-retries
Balance monitoring retries We want to use a retry mechanism for single executions of the balance check. The retries will be performed during the period of retryTimeout, logging a warning on each error. When the timeout is hit an error will be logged and retries stopped. The next balance check will be triggered at the next tick.
2 parents cecc0d5 + 71ed95d commit adb7bbc

File tree

6 files changed

+522
-6
lines changed

6 files changed

+522
-6
lines changed

pkg/chain/celo/celoutil/balance_monitor.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@ package celoutil
22

33
import (
44
"context"
5+
"time"
6+
57
"github.com/celo-org/celo-blockchain/common"
68
"github.com/keep-network/keep-common/pkg/chain/celo"
79
"github.com/keep-network/keep-common/pkg/chain/ethlike"
8-
"time"
910
)
1011

1112
// BalanceSource provides a balance info for the given address.
@@ -38,16 +39,19 @@ func NewBalanceMonitor(balanceSource BalanceSource) *BalanceMonitor {
3839
// Observe starts a process which checks the address balance with the given
3940
// tick and triggers an alert in case the balance falls below the
4041
// alert threshold value.
42+
// The balance check will be retried in case of an error up to the retry timeout.
4143
func (bm *BalanceMonitor) Observe(
4244
ctx context.Context,
4345
address common.Address,
4446
alertThreshold *celo.Wei,
4547
tick time.Duration,
48+
retryTimeout time.Duration,
4649
) {
4750
bm.delegate.Observe(
4851
ctx,
4952
ethlike.Address(address),
5053
&alertThreshold.Token,
5154
tick,
55+
retryTimeout,
5256
)
5357
}

pkg/chain/ethereum/ethutil/balance_monitor.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@ package ethutil
22

33
import (
44
"context"
5+
"time"
6+
57
"github.com/ethereum/go-ethereum/common"
68
"github.com/keep-network/keep-common/pkg/chain/ethereum"
79
"github.com/keep-network/keep-common/pkg/chain/ethlike"
8-
"time"
910
)
1011

1112
// BalanceSource provides a balance info for the given address.
@@ -38,16 +39,19 @@ func NewBalanceMonitor(balanceSource BalanceSource) *BalanceMonitor {
3839
// Observe starts a process which checks the address balance with the given
3940
// tick and triggers an alert in case the balance falls below the
4041
// alert threshold value.
42+
// The balance check will be retried in case of an error up to the retry timeout.
4143
func (bm *BalanceMonitor) Observe(
4244
ctx context.Context,
4345
address common.Address,
4446
alertThreshold *ethereum.Wei,
4547
tick time.Duration,
48+
retryTimeout time.Duration,
4649
) {
4750
bm.delegate.Observe(
4851
ctx,
4952
ethlike.Address(address),
5053
&alertThreshold.Token,
5154
tick,
55+
retryTimeout,
5256
)
5357
}

pkg/chain/ethlike/balance_monitor.go

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ package ethlike
22

33
import (
44
"context"
5+
"fmt"
56
"time"
7+
8+
"github.com/keep-network/keep-common/pkg/wrappers"
69
)
710

811
// BalanceSource provides a balance info for the given address.
@@ -22,17 +25,26 @@ func NewBalanceMonitor(balanceSource BalanceSource) *BalanceMonitor {
2225
// Observe starts a process which checks the address balance with the given
2326
// tick and triggers an alert in case the balance falls below the
2427
// alert threshold value.
28+
// The balance check will be retried in case of an error up to the retry timeout.
2529
func (bm *BalanceMonitor) Observe(
2630
ctx context.Context,
2731
address Address,
2832
alertThreshold *Token,
2933
tick time.Duration,
34+
retryTimeout time.Duration,
3035
) {
31-
check := func() {
36+
check := func(ctx context.Context) error {
3237
balance, err := bm.balanceSource(address)
3338
if err != nil {
34-
logger.Errorf("balance monitor error: [%v]", err)
35-
return
39+
wrappedErr := fmt.Errorf(
40+
"failed to get balance for account [%s]: [%w]",
41+
address.TerminalString(),
42+
err,
43+
)
44+
45+
logger.Warning(wrappedErr)
46+
47+
return wrappedErr
3648
}
3749

3850
if balance.Cmp(alertThreshold.Int) == -1 {
@@ -43,16 +55,29 @@ func (bm *BalanceMonitor) Observe(
4355
alertThreshold.Text(10),
4456
)
4557
}
58+
59+
return nil
4660
}
4761

4862
go func() {
4963
ticker := time.NewTicker(tick)
5064
defer ticker.Stop()
5165

66+
checkBalance := func() {
67+
err := wrappers.DoWithDefaultRetry(retryTimeout, check)
68+
if err != nil {
69+
logger.Errorf("balance monitor error: [%v]", err)
70+
}
71+
}
72+
73+
// Initial balance check at monitoring start.
74+
checkBalance()
75+
5276
for {
5377
select {
78+
// Balance check at ticks.
5479
case <-ticker.C:
55-
check()
80+
checkBalance()
5681
case <-ctx.Done():
5782
return
5883
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package ethlike
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"math/big"
7+
"sync"
8+
"time"
9+
10+
"testing"
11+
12+
"github.com/ipfs/go-log"
13+
)
14+
15+
func TestBalanceMonitor_Retries(t *testing.T) {
16+
log.SetDebugLogging()
17+
18+
attemptsCount := 0
19+
expectedAttempts := 3
20+
21+
wg := &sync.WaitGroup{}
22+
wg.Add(expectedAttempts)
23+
24+
balanceSource := func(address Address) (*Token, error) {
25+
attemptsCount++
26+
wg.Done()
27+
28+
if attemptsCount < expectedAttempts {
29+
return nil, fmt.Errorf("not this time")
30+
}
31+
32+
return &Token{big.NewInt(10)}, nil
33+
}
34+
35+
balanceMonitor := NewBalanceMonitor(balanceSource)
36+
37+
address := Address{1, 2}
38+
alertThreshold := &Token{big.NewInt(15)}
39+
tick := 1 * time.Minute
40+
retryTimeout := 5 * time.Second
41+
42+
balanceMonitor.Observe(
43+
context.Background(),
44+
address,
45+
alertThreshold,
46+
tick,
47+
retryTimeout,
48+
)
49+
50+
wg.Wait()
51+
52+
if expectedAttempts != attemptsCount {
53+
t.Errorf(
54+
"unexpected retries count\nexpected: %d\nactual: %d",
55+
expectedAttempts,
56+
attemptsCount,
57+
)
58+
}
59+
}

pkg/wrappers/wrappers.go

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
package wrappers
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"math/rand"
7+
"time"
8+
)
9+
10+
// DoWithRetry executes the provided doFn as long as it returns an error or until
11+
// a timeout is hit. It applies exponential backoff wait of backoffTime * 2^n
12+
// before nth retry of doFn. In case the calculated backoff is longer than
13+
// backoffMax, the backoffMax wait is applied.
14+
func DoWithRetry(
15+
backoffTime time.Duration,
16+
backoffMax time.Duration,
17+
timeout time.Duration,
18+
doFn func(ctx context.Context) error,
19+
) error {
20+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
21+
defer cancel()
22+
23+
var err error
24+
for {
25+
select {
26+
case <-ctx.Done():
27+
return fmt.Errorf(
28+
"retry timeout [%v] exceeded; most recent error: [%w]",
29+
timeout,
30+
err,
31+
)
32+
default:
33+
err = doFn(ctx)
34+
if err == nil {
35+
return nil
36+
}
37+
38+
timedOut := backoffWait(ctx, backoffTime)
39+
if timedOut {
40+
return fmt.Errorf(
41+
"retry timeout [%v] exceeded; most recent error: [%w]",
42+
timeout,
43+
err,
44+
)
45+
}
46+
47+
backoffTime = calculateBackoff(
48+
backoffTime,
49+
backoffMax,
50+
)
51+
}
52+
}
53+
}
54+
55+
const (
56+
// DefaultDoBackoffTime is the default value of backoff time used by
57+
// DoWithDefaultRetry function.
58+
DefaultDoBackoffTime = 1 * time.Second
59+
60+
// DefaultDoMaxBackoffTime is the default value of max backoff time used by
61+
// DoWithDefaultRetry function.
62+
DefaultDoMaxBackoffTime = 120 * time.Second
63+
)
64+
65+
// DoWithDefaultRetry executes the provided doFn as long as it returns an error or
66+
// until a timeout is hit. It applies exponential backoff wait of
67+
// DefaultBackoffTime * 2^n before nth retry of doFn. In case the calculated
68+
// backoff is longer than DefaultMaxBackoffTime, the DefaultMaxBackoffTime is
69+
// applied.
70+
func DoWithDefaultRetry(
71+
timeout time.Duration,
72+
doFn func(ctx context.Context) error,
73+
) error {
74+
return DoWithRetry(
75+
DefaultDoBackoffTime,
76+
DefaultDoMaxBackoffTime,
77+
timeout,
78+
doFn,
79+
)
80+
}
81+
82+
// ConfirmWithTimeout executes the provided confirmFn until it returns true or
83+
// until it fails or until a timeout is hit. It applies exponential backoff wait
84+
// of backoffTime * 2^n before nth execution of confirmFn. In case the
85+
// calculated backoff is longer than backoffMax, the backoffMax is applied.
86+
// In case confirmFn returns an error, ConfirmWithTimeout exits with the same
87+
// error immediately. This is different from DoWithRetry behavior as the use
88+
// case for this function is different. ConfirmWithTimeout is intended to be
89+
// used to confirm a chain state and not to try to enforce a successful
90+
// execution of some function.
91+
func ConfirmWithTimeout(
92+
backoffTime time.Duration,
93+
backoffMax time.Duration,
94+
timeout time.Duration,
95+
confirmFn func(ctx context.Context) (bool, error),
96+
) (bool, error) {
97+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
98+
defer cancel()
99+
100+
for {
101+
select {
102+
case <-ctx.Done():
103+
return false, nil
104+
default:
105+
ok, err := confirmFn(ctx)
106+
if err == nil && ok {
107+
return true, nil
108+
}
109+
if err != nil {
110+
return false, err
111+
}
112+
113+
timedOut := backoffWait(ctx, backoffTime)
114+
if timedOut {
115+
return false, nil
116+
}
117+
118+
backoffTime = calculateBackoff(
119+
backoffTime,
120+
backoffMax,
121+
)
122+
}
123+
}
124+
}
125+
126+
const (
127+
// DefaultConfirmBackoffTime is the default value of backoff time used by
128+
// ConfirmWithDefaultTimeout function.
129+
DefaultConfirmBackoffTime = 5 * time.Second
130+
131+
// DefaultConfirmMaxBackoffTime is the default value of max backoff time
132+
// used by ConfirmWithDefaultTimeout function.
133+
DefaultConfirmMaxBackoffTime = 10 * time.Second
134+
)
135+
136+
// ConfirmWithTimeoutDefaultBackoff executed the provided confirmFn until it
137+
// returns true or until it fails or until timeout is hit. It applies
138+
// backoff wait of DefaultConfirmBackoffTime * 2^n before nth execution of
139+
// confirmFn. In case the calculated backoff is longer than
140+
// DefaultConfirmMaxBackoffTime, DefaultConfirmMaxBackoffTime is applied.
141+
// In case confirmFn returns an error, ConfirmWithTimeoutDefaultBackoff exits
142+
// with the same error immediately. This is different from DoWithDefaultRetry
143+
// behavior as the use case for this function is different.
144+
// ConfirmWithTimeoutDefaultBackoff is intended to be used to confirm a chain
145+
// state and not to try to enforce a successful execution of some function.
146+
func ConfirmWithTimeoutDefaultBackoff(
147+
timeout time.Duration,
148+
confirmFn func(ctx context.Context) (bool, error),
149+
) (bool, error) {
150+
return ConfirmWithTimeout(
151+
DefaultConfirmBackoffTime,
152+
DefaultConfirmMaxBackoffTime,
153+
timeout,
154+
confirmFn,
155+
)
156+
}
157+
158+
func calculateBackoff(
159+
backoffPrev time.Duration,
160+
backoffMax time.Duration,
161+
) time.Duration {
162+
backoff := backoffPrev
163+
164+
backoff *= 2
165+
166+
// #nosec G404
167+
// we are fine with not using cryptographically secure random integer,
168+
// it is just exponential backoff jitter
169+
r := rand.Int63n(backoff.Nanoseconds()/10 + 1)
170+
jitter := time.Duration(r) * time.Nanosecond
171+
backoff += jitter
172+
173+
if backoff > backoffMax {
174+
backoff = backoffMax
175+
}
176+
177+
return backoff
178+
}
179+
180+
func backoffWait(ctx context.Context, waitTime time.Duration) bool {
181+
timer := time.NewTimer(waitTime)
182+
defer timer.Stop()
183+
184+
select {
185+
case <-ctx.Done():
186+
return true
187+
case <-timer.C:
188+
return false
189+
}
190+
}

0 commit comments

Comments
 (0)