Skip to content

Commit 00c153b

Browse files
committed
rcu: Let non-offloaded idle CPUs with callbacks defer tick
When a CPU goes idle, rcu_needs_cpu() is invoked to determine whether or not RCU needs the scheduler-clock tick to keep interrupting. Right now, RCU keeps the tick on for a given idle CPU if there are any non-offloaded callbacks queued on that CPU. But if all of these callbacks are waiting for a grace period to finish, there is no point in scheduling a tick before that grace period has any reasonable chance of completing. This commit therefore delays the tick in the case where all the callbacks are waiting for a specific grace period to elapse. In theory, this should result in a 50-70% reduction in RCU-induced scheduling-clock ticks on mostly-idle CPUs. In practice, TBD. /bin/bash: fm: command not found Signed-off-by: Paul E. McKenney <[email protected]> Cc: Peter Zijlstra <[email protected]>
1 parent 1d7d256 commit 00c153b

File tree

4 files changed

+43
-15
lines changed

4 files changed

+43
-15
lines changed

include/linux/rcutiny.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ static inline void rcu_softirq_qs(void)
133133
rcu_tasks_qs(current, (preempt)); \
134134
} while (0)
135135

136-
static inline int rcu_needs_cpu(void)
136+
static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
137137
{
138138
return 0;
139139
}

include/linux/rcutree.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
void rcu_softirq_qs(void);
2121
void rcu_note_context_switch(bool preempt);
22-
int rcu_needs_cpu(void);
22+
int rcu_needs_cpu(u64 basemono, u64 *nextevt);
2323
void rcu_cpu_stall_reset(void);
2424

2525
/*

kernel/rcu/tree.c

+35-9
Original file line numberDiff line numberDiff line change
@@ -670,12 +670,40 @@ void __rcu_irq_enter_check_tick(void)
670670
* scheduler-clock interrupt.
671671
*
672672
* Just check whether or not this CPU has non-offloaded RCU callbacks
673-
* queued.
673+
* queued that need immediate attention.
674674
*/
675-
int rcu_needs_cpu(void)
675+
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
676676
{
677-
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
678-
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
677+
unsigned long j;
678+
unsigned long jlast;
679+
unsigned long jwait;
680+
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
681+
struct rcu_segcblist *rsclp = &rdp->cblist;
682+
683+
// Disabled, empty, or offloaded means nothing to do.
684+
if (!rcu_segcblist_is_enabled(rsclp) ||
685+
rcu_segcblist_empty(rsclp) || rcu_rdp_is_offloaded(rdp)) {
686+
*nextevt = KTIME_MAX;
687+
return 0;
688+
}
689+
690+
// Callbacks ready to invoke or that have not already been
691+
// assigned a grace period need immediate attention.
692+
if (!rcu_segcblist_segempty(rsclp, RCU_DONE_TAIL) ||
693+
!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL))
694+
return 1;
695+
696+
// There are callbacks waiting for some later grace period.
697+
// Wait for about a grace period or two since the last tick, at which
698+
// point there is high probability that this CPU will need to do some
699+
// work for RCU.
700+
j = jiffies;
701+
jlast = __this_cpu_read(rcu_data.last_sched_clock);
702+
jwait = READ_ONCE(jiffies_till_first_fqs) + READ_ONCE(jiffies_till_next_fqs) + 1;
703+
if (time_after(j, jlast + jwait))
704+
return 1;
705+
*nextevt = basemono + TICK_NSEC * (jlast + jwait - j);
706+
return 0;
679707
}
680708

681709
/*
@@ -2318,11 +2346,9 @@ void rcu_sched_clock_irq(int user)
23182346
{
23192347
unsigned long j;
23202348

2321-
if (IS_ENABLED(CONFIG_PROVE_RCU)) {
2322-
j = jiffies;
2323-
WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
2324-
__this_cpu_write(rcu_data.last_sched_clock, j);
2325-
}
2349+
j = jiffies;
2350+
WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
2351+
__this_cpu_write(rcu_data.last_sched_clock, j);
23262352
trace_rcu_utilization(TPS("Start scheduler-tick"));
23272353
lockdep_assert_irqs_disabled();
23282354
raw_cpu_inc(rcu_data.ticks_this_gp);

kernel/time/tick-sched.c

+6-4
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,7 @@ static inline bool local_timer_softirq_pending(void)
784784

785785
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
786786
{
787-
u64 basemono, next_tick, delta, expires;
787+
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
788788
unsigned long basejiff;
789789
unsigned int seq;
790790

@@ -807,7 +807,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
807807
* minimal delta which brings us back to this place
808808
* immediately. Lather, rinse and repeat...
809809
*/
810-
if (rcu_needs_cpu() || arch_needs_cpu() ||
810+
if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
811811
irq_work_needs_cpu() || local_timer_softirq_pending()) {
812812
next_tick = basemono + TICK_NSEC;
813813
} else {
@@ -818,8 +818,10 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
818818
* disabled this also looks at the next expiring
819819
* hrtimer.
820820
*/
821-
next_tick = get_next_timer_interrupt(basejiff, basemono);
822-
ts->next_timer = next_tick;
821+
next_tmr = get_next_timer_interrupt(basejiff, basemono);
822+
ts->next_timer = next_tmr;
823+
/* Take the next rcu event into account */
824+
next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
823825
}
824826

825827
/*

0 commit comments

Comments
 (0)