Skip to content

Commit febd2db

Browse files
authored
Improve worker shutdown logic (#77)
Signed-off-by: Fabian Martinez <[email protected]>
1 parent 0c4afbc commit febd2db

File tree

5 files changed

+299
-10
lines changed

5 files changed

+299
-10
lines changed

CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515

1616
### Changed
1717

18-
- Make WaitForOrchestrationXXX gRPC APIs resilient ([#80](https://github.com/microsoft/durabletask-go/pull/81)) - by [@famarting](https://github.com/famarting)
18+
- Make WaitForOrchestrationXXX gRPC APIs resilient ([#80](https://github.com/microsoft/durabletask-go/pull/80)) - by [@famarting](https://github.com/famarting)
19+
- Improve worker shutdown logic ([#77](https://github.com/microsoft/durabletask-go/pull/77)) - by [@famarting](https://github.com/famarting)
1920

2021
## [v0.5.0] - 2024-06-28
2122

backend/taskhub.go

+18-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package backend
22

33
import (
44
"context"
5+
"sync"
56
)
67

78
type TaskHubWorker interface {
@@ -50,7 +51,22 @@ func (w *taskHubWorker) Shutdown(ctx context.Context) error {
5051
}
5152

5253
w.logger.Info("workers stopping and draining...")
53-
w.orchestrationWorker.StopAndDrain()
54-
w.activityWorker.StopAndDrain()
54+
defer w.logger.Info("finished stopping and draining workers!")
55+
56+
wg := sync.WaitGroup{}
57+
wg.Add(1)
58+
go func() {
59+
defer wg.Done()
60+
w.orchestrationWorker.StopAndDrain()
61+
}()
62+
63+
wg.Add(1)
64+
go func() {
65+
defer wg.Done()
66+
w.activityWorker.StopAndDrain()
67+
}()
68+
69+
wg.Wait()
70+
5571
return nil
5672
}

backend/worker.go

+27-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"sync"
7+
"sync/atomic"
78
"time"
89

910
"github.com/cenkalti/backoff/v4"
@@ -45,6 +46,7 @@ type worker struct {
4546
cancel context.CancelFunc
4647
processor TaskProcessor
4748
waiting bool
49+
stop atomic.Bool
4850
}
4951

5052
type NewTaskWorkerOptions func(*WorkerOptions)
@@ -89,6 +91,8 @@ func (w *worker) Start(ctx context.Context) {
8991
ctx, cancel := context.WithCancel(ctx)
9092
w.cancel = cancel
9193

94+
w.stop.Store(false)
95+
9296
go func() {
9397
var b backoff.BackOff = &backoff.ExponentialBackOff{
9498
InitialInterval: 50 * time.Millisecond,
@@ -190,6 +194,11 @@ func (w *worker) ProcessNext(ctx context.Context) (bool, error) {
190194
}
191195

192196
func (w *worker) StopAndDrain() {
197+
w.logger.Debugf("%v: stop and drain...", w.Name())
198+
defer w.logger.Debugf("%v: finished stop and drain...", w.Name())
199+
200+
w.stop.Store(true)
201+
193202
// Cancel the background poller and dispatcher(s)
194203
if w.cancel != nil {
195204
w.cancel()
@@ -206,20 +215,37 @@ func (w *worker) processWorkItem(ctx context.Context, wi WorkItem) {
206215

207216
w.logger.Debugf("%v: processing work item: %s", w.Name(), wi)
208217

218+
if w.stop.Load() {
219+
if err := w.processor.AbandonWorkItem(context.Background(), wi); err != nil {
220+
w.logger.Errorf("%v: failed to abandon work item: %v", w.Name(), err)
221+
}
222+
return
223+
}
224+
209225
if err := w.processor.ProcessWorkItem(ctx, wi); err != nil {
210226
if errors.Is(err, ctx.Err()) {
211227
w.logger.Warnf("%v: abandoning work item due to cancellation", w.Name())
212228
} else {
213229
w.logger.Errorf("%v: failed to process work item: %v", w.Name(), err)
214230
}
231+
if w.stop.Load() {
232+
ctx = context.Background()
233+
}
215234
if err := w.processor.AbandonWorkItem(ctx, wi); err != nil {
216235
w.logger.Errorf("%v: failed to abandon work item: %v", w.Name(), err)
217236
}
218237
return
219238
}
220239

221240
if err := w.processor.CompleteWorkItem(ctx, wi); err != nil {
222-
w.logger.Errorf("%v: failed to complete work item: %v", w.Name(), err)
241+
if errors.Is(err, ctx.Err()) {
242+
w.logger.Warnf("%v: failed to complete work item due to cancellation", w.Name())
243+
} else {
244+
w.logger.Errorf("%v: failed to complete work item: %v", w.Name(), err)
245+
}
246+
if w.stop.Load() {
247+
ctx = context.Background()
248+
}
223249
if err := w.processor.AbandonWorkItem(ctx, wi); err != nil {
224250
w.logger.Errorf("%v: failed to abandon work item: %v", w.Name(), err)
225251
}

tests/mocks/task.go

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
package mocks
2+
3+
import (
4+
context "context"
5+
"errors"
6+
"sync"
7+
"sync/atomic"
8+
"time"
9+
10+
backend "github.com/microsoft/durabletask-go/backend"
11+
)
12+
13+
var _ backend.TaskProcessor = &TestTaskProcessor{}
14+
15+
// TestTaskProcessor implements a dummy task processor useful for testing
16+
type TestTaskProcessor struct {
17+
name string
18+
19+
processingBlocked atomic.Bool
20+
21+
workItemMu sync.Mutex
22+
workItems []backend.WorkItem
23+
24+
abandonedWorkItemMu sync.Mutex
25+
abandonedWorkItems []backend.WorkItem
26+
27+
completedWorkItemMu sync.Mutex
28+
completedWorkItems []backend.WorkItem
29+
}
30+
31+
func NewTestTaskPocessor(name string) *TestTaskProcessor {
32+
return &TestTaskProcessor{
33+
name: name,
34+
}
35+
}
36+
37+
func (t *TestTaskProcessor) BlockProcessing() {
38+
t.processingBlocked.Store(true)
39+
}
40+
41+
func (t *TestTaskProcessor) UnblockProcessing() {
42+
t.processingBlocked.Store(false)
43+
}
44+
45+
func (t *TestTaskProcessor) PendingWorkItems() []backend.WorkItem {
46+
t.workItemMu.Lock()
47+
defer t.workItemMu.Unlock()
48+
49+
// copy array
50+
return append([]backend.WorkItem{}, t.workItems...)
51+
}
52+
53+
func (t *TestTaskProcessor) AbandonedWorkItems() []backend.WorkItem {
54+
t.abandonedWorkItemMu.Lock()
55+
defer t.abandonedWorkItemMu.Unlock()
56+
57+
// copy array
58+
return append([]backend.WorkItem{}, t.abandonedWorkItems...)
59+
}
60+
61+
func (t *TestTaskProcessor) CompletedWorkItems() []backend.WorkItem {
62+
t.completedWorkItemMu.Lock()
63+
defer t.completedWorkItemMu.Unlock()
64+
65+
// copy array
66+
return append([]backend.WorkItem{}, t.completedWorkItems...)
67+
}
68+
69+
func (t *TestTaskProcessor) AddWorkItems(wis ...backend.WorkItem) {
70+
t.workItemMu.Lock()
71+
defer t.workItemMu.Unlock()
72+
73+
t.workItems = append(t.workItems, wis...)
74+
}
75+
76+
func (t *TestTaskProcessor) Name() string {
77+
return t.name
78+
}
79+
80+
func (t *TestTaskProcessor) FetchWorkItem(context.Context) (backend.WorkItem, error) {
81+
t.workItemMu.Lock()
82+
defer t.workItemMu.Unlock()
83+
84+
if len(t.workItems) == 0 {
85+
return nil, backend.ErrNoWorkItems
86+
}
87+
88+
// pop first item
89+
i := 0
90+
wi := t.workItems[i]
91+
t.workItems = append(t.workItems[:i], t.workItems[i+1:]...)
92+
93+
return wi, nil
94+
}
95+
96+
func (t *TestTaskProcessor) ProcessWorkItem(ctx context.Context, wi backend.WorkItem) error {
97+
if !t.processingBlocked.Load() {
98+
return nil
99+
}
100+
// wait for context cancellation or until processing is unblocked
101+
for {
102+
select {
103+
case <-ctx.Done():
104+
return errors.New("dummy error processing work item")
105+
default:
106+
if !t.processingBlocked.Load() {
107+
return nil
108+
}
109+
time.Sleep(time.Millisecond)
110+
}
111+
}
112+
}
113+
114+
func (t *TestTaskProcessor) AbandonWorkItem(ctx context.Context, wi backend.WorkItem) error {
115+
t.abandonedWorkItemMu.Lock()
116+
defer t.abandonedWorkItemMu.Unlock()
117+
118+
t.abandonedWorkItems = append(t.abandonedWorkItems, wi)
119+
return nil
120+
}
121+
122+
func (t *TestTaskProcessor) CompleteWorkItem(ctx context.Context, wi backend.WorkItem) error {
123+
t.completedWorkItemMu.Lock()
124+
defer t.completedWorkItemMu.Unlock()
125+
126+
t.completedWorkItems = append(t.completedWorkItems, wi)
127+
return nil
128+
}

0 commit comments

Comments
 (0)