@@ -17,6 +17,7 @@ package schedulers
1717import (
1818 "context"
1919 "testing"
20+ "time"
2021
2122 "github.com/stretchr/testify/require"
2223 "github.com/stretchr/testify/suite"
@@ -207,3 +208,108 @@ func TestEvictSlowStoreBatch(t *testing.T) {
207208 re .Equal (5 , persistValue .Batch )
208209 re .NoError (failpoint .Disable ("github.com/tikv/pd/pkg/schedule/schedulers/transientRecoveryGap" ))
209210}
211+
212+ func TestRecoveryTime (t * testing.T ) {
213+ re := require .New (t )
214+ cancel , _ , tc , oc := prepareSchedulersTest ()
215+ defer cancel ()
216+
217+ // Add stores 1, 2, 3 with different leader counts
218+ tc .AddLeaderStore (1 , 10 )
219+ tc .AddLeaderStore (2 , 0 )
220+ tc .AddLeaderStore (3 , 0 )
221+
222+ // Add regions with leader in store 1
223+ for i := range 10 {
224+ tc .AddLeaderRegion (uint64 (i ), 1 , 2 , 3 )
225+ }
226+
227+ storage := storage .NewStorageWithMemoryBackend ()
228+ es , err := CreateScheduler (types .EvictSlowStoreScheduler , oc , storage ,
229+ ConfigSliceDecoder (types .EvictSlowStoreScheduler , []string {}), nil )
230+ re .NoError (err )
231+ bs , err := CreateScheduler (types .BalanceLeaderScheduler , oc , storage ,
232+ ConfigSliceDecoder (types .BalanceLeaderScheduler , []string {}), nil )
233+ re .NoError (err )
234+
235+ var recoveryTimeInSec uint64 = 1
236+ recoveryTime := 1 * time .Second
237+ es .(* evictSlowStoreScheduler ).conf .RecoveryDurationGap = recoveryTimeInSec
238+
239+ // Mark store 1 as slow
240+ storeInfo := tc .GetStore (1 )
241+ slowStore := storeInfo .Clone (func (store * core.StoreInfo ) {
242+ store .GetStoreStats ().SlowScore = 100
243+ })
244+ tc .PutStore (slowStore )
245+
246+ // Verify store is marked for eviction
247+ ops , _ := es .Schedule (tc , false )
248+ re .NotEmpty (ops )
249+ re .Equal (types .EvictSlowStoreScheduler .String (), ops [0 ].Desc ())
250+ re .Equal (uint64 (1 ), es .(* evictSlowStoreScheduler ).conf .evictStore ())
251+
252+ // Store recovers from being slow
253+ time .Sleep (recoveryTime )
254+ recoveredStore := storeInfo .Clone (func (store * core.StoreInfo ) {
255+ store .GetStoreStats ().SlowScore = 0
256+ })
257+ tc .PutStore (recoveredStore )
258+
259+ // Should not recover immediately due to recovery time window
260+ for range 10 {
261+ // trigger recovery check
262+ es .Schedule (tc , false )
263+ ops , _ = bs .Schedule (tc , false )
264+ re .Empty (ops )
265+ re .Equal (uint64 (1 ), es .(* evictSlowStoreScheduler ).conf .evictStore ())
266+ }
267+
268+ // Store is slow again before recovery time is over
269+ time .Sleep (recoveryTime / 2 )
270+ slowStore = storeInfo .Clone (func (store * core.StoreInfo ) {
271+ store .GetStoreStats ().SlowScore = 100
272+ })
273+ tc .PutStore (slowStore )
274+ time .Sleep (recoveryTime / 2 )
275+ // Should not recover due to recovery time window recalculation
276+ for range 10 {
277+ // trigger recovery check
278+ es .Schedule (tc , false )
279+ ops , _ = bs .Schedule (tc , false )
280+ re .Empty (ops )
281+ re .Equal (uint64 (1 ), es .(* evictSlowStoreScheduler ).conf .evictStore ())
282+ }
283+
284+ // Store recovers from being slow
285+ time .Sleep (recoveryTime )
286+ recoveredStore = storeInfo .Clone (func (store * core.StoreInfo ) {
287+ store .GetStoreStats ().SlowScore = 0
288+ })
289+ tc .PutStore (recoveredStore )
290+
291+ // Should not recover immediately due to recovery time window
292+ for range 10 {
293+ // trigger recovery check
294+ es .Schedule (tc , false )
295+ ops , _ = bs .Schedule (tc , false )
296+ re .Empty (ops )
297+ re .Equal (uint64 (1 ), es .(* evictSlowStoreScheduler ).conf .evictStore ())
298+ }
299+
300+ // Should now recover
301+ time .Sleep (recoveryTime )
302+ // trigger recovery check
303+ es .Schedule (tc , false )
304+
305+ ops , _ = bs .Schedule (tc , false )
306+ re .Empty (ops )
307+ re .Empty (es .(* evictSlowStoreScheduler ).conf .evictStore ())
308+
309+ // Verify persistence
310+ var persistValue evictSlowStoreSchedulerConfig
311+ err = es .(* evictSlowStoreScheduler ).conf .load (& persistValue )
312+ re .NoError (err )
313+ re .Zero (persistValue .evictStore ())
314+ re .True (persistValue .readyForRecovery ())
315+ }
0 commit comments