@@ -183,6 +183,7 @@ func (w *peerWorkManager) workDispatcher() {
183
183
timeout <- chan time.Time
184
184
rem int
185
185
errChan chan error
186
+ cancelChan chan struct {}
186
187
}
187
188
188
189
// We set up a batch index counter to keep track of batches that still
@@ -309,7 +310,20 @@ Loop:
309
310
// turns out to be an error.
310
311
batchNum := currentQueries [result .job .index ]
311
312
delete (currentQueries , result .job .index )
312
- batch := currentBatches [batchNum ]
313
+
314
+ // In case the batch is already canceled we return
315
+ // early.
316
+ batch , ok := currentBatches [batchNum ]
317
+ if ! ok {
318
+ log .Warnf ("Query(%d) result from peer %v " +
319
+ "discarded with retries %d, because " +
320
+ "batch already canceled: %v" ,
321
+ result .job .index ,
322
+ result .peer .Addr (),
323
+ result .job .tries , result .err )
324
+
325
+ continue Loop
326
+ }
313
327
314
328
switch {
315
329
// If the query ended because it was canceled, drop it.
@@ -322,30 +336,34 @@ Loop:
322
336
// was canceled, forward the error on the
323
337
// batch's error channel. We do this since a
324
338
// cancellation applies to the whole batch.
325
- if batch != nil {
326
- batch .errChan <- result .err
327
- delete (currentBatches , batchNum )
339
+ batch .errChan <- result .err
340
+ delete (currentBatches , batchNum )
328
341
329
- log .Debugf ("Canceled batch %v" ,
330
- batchNum )
331
- continue Loop
332
- }
342
+ log .Debugf ("Canceled batch %v" , batchNum )
343
+ continue Loop
333
344
334
345
// If the query ended with any other error, put it back
335
346
// into the work queue if it has not reached the
336
347
// maximum number of retries.
337
348
case result .err != nil :
338
- // Punish the peer for the failed query.
339
- w .cfg .Ranking .Punish (result .peer .Addr ())
349
+ // Refresh peer rank on disconnect.
350
+ if result .err == ErrPeerDisconnected {
351
+ w .cfg .Ranking .ResetRanking (
352
+ result .peer .Addr (),
353
+ )
354
+ } else {
355
+ // Punish the peer for the failed query.
356
+ w .cfg .Ranking .Punish (result .peer .Addr ())
357
+ }
340
358
341
- if batch != nil && ! batch .noRetryMax {
359
+ if ! batch .noRetryMax {
342
360
result .job .tries ++
343
361
}
344
362
345
363
// Check if this query has reached its maximum
346
364
// number of retries. If so, remove it from the
347
365
// batch and don't reschedule it.
348
- if batch != nil && ! batch .noRetryMax &&
366
+ if ! batch .noRetryMax &&
349
367
result .job .tries >= batch .maxRetries {
350
368
351
369
log .Warnf ("Query(%d) from peer %v " +
@@ -380,11 +398,6 @@ Loop:
380
398
result .job .timeout = newTimeout
381
399
}
382
400
383
- // Refresh peer rank on disconnect.
384
- if result .err == ErrPeerDisconnected {
385
- w .cfg .Ranking .ResetRanking (result .peer .Addr ())
386
- }
387
-
388
401
heap .Push (work , result .job )
389
402
currentQueries [result .job .index ] = batchNum
390
403
@@ -396,42 +409,47 @@ Loop:
396
409
397
410
// Decrement the number of queries remaining in
398
411
// the batch.
399
- if batch != nil {
400
- batch .rem --
401
- log .Tracef ("Remaining jobs for batch " +
402
- "%v: %v " , batchNum , batch .rem )
403
-
404
- // If this was the last query in flight
405
- // for this batch, we can notify that
406
- // it finished, and delete it.
407
- if batch .rem == 0 {
408
- batch .errChan <- nil
409
- delete (currentBatches , batchNum )
410
-
411
- log .Tracef ("Batch %v done" ,
412
- batchNum )
413
- continue Loop
414
- }
412
+ batch .rem --
413
+ log .Tracef ("Remaining jobs for batch " +
414
+ "%v: %v " , batchNum , batch .rem )
415
+
416
+ // If this was the last query in flight
417
+ // for this batch, we can notify that
418
+ // it finished, and delete it.
419
+ if batch .rem == 0 {
420
+ batch .errChan <- nil
421
+ delete (currentBatches , batchNum )
422
+
423
+ log .Tracef ("Batch %v done" ,
424
+ batchNum )
425
+ continue Loop
415
426
}
416
427
}
417
428
418
429
// If the total timeout for this batch has passed,
419
430
// return an error.
420
- if batch != nil {
421
- select {
422
- case <- batch .timeout :
423
- batch .errChan <- ErrQueryTimeout
424
- delete (currentBatches , batchNum )
431
+ select {
432
+ case <- batch .timeout :
433
+ batch .errChan <- ErrQueryTimeout
434
+ delete (currentBatches , batchNum )
435
+
436
+ // When deleting the particular batch
437
+ // number we need to make sure to cancel
438
+ // all queued and ongoing queryJobs
439
+ // to not waste resources when the batch
440
+ // call is already canceled.
441
+ if batch .cancelChan != nil {
442
+ close (batch .cancelChan )
443
+ }
425
444
426
- log .Warnf ("Query(%d) failed with " +
427
- "error: %v. Timing out." ,
428
- result .job .index , result .err )
445
+ log .Warnf ("Query(%d) failed with " +
446
+ "error: %v. Timing out." ,
447
+ result .job .index , result .err )
429
448
430
- log .Debugf ("Batch %v timed out" ,
431
- batchNum )
449
+ log .Warnf ("Batch %v timed out" ,
450
+ batchNum )
432
451
433
- default :
434
- }
452
+ default :
435
453
}
436
454
437
455
// A new batch of queries where scheduled.
@@ -442,13 +460,17 @@ Loop:
442
460
log .Debugf ("Adding new batch(%d) of %d queries to " +
443
461
"work queue" , batchIndex , len (batch .requests ))
444
462
463
+ // Internal cancel channel of a batch request.
464
+ cancelChan := make (chan struct {})
465
+
445
466
for _ , q := range batch .requests {
446
467
heap .Push (work , & queryJob {
447
- index : queryIndex ,
448
- timeout : minQueryTimeout ,
449
- encoding : batch .options .encoding ,
450
- cancelChan : batch .options .cancelChan ,
451
- Request : q ,
468
+ index : queryIndex ,
469
+ timeout : minQueryTimeout ,
470
+ encoding : batch .options .encoding ,
471
+ cancelChan : batch .options .cancelChan ,
472
+ internalCancelChan : cancelChan ,
473
+ Request : q ,
452
474
})
453
475
currentQueries [queryIndex ] = batchIndex
454
476
queryIndex ++
@@ -457,9 +479,12 @@ Loop:
457
479
currentBatches [batchIndex ] = & batchProgress {
458
480
noRetryMax : batch .options .noRetryMax ,
459
481
maxRetries : batch .options .numRetries ,
460
- timeout : time .After (batch .options .timeout ),
482
+ timeout : time .After (
483
+ batch .options .timeout ,
484
+ ),
461
485
rem : len (batch .requests ),
462
486
errChan : batch .errChan ,
487
+ cancelChan : cancelChan ,
463
488
}
464
489
batchIndex ++
465
490
0 commit comments