Message ID | 1389949444-14821-4-git-send-email-daniel.lezcano@linaro.org |
---|---|
State | New |
Headers | show |
On 01/17/2014 03:26 PM, Peter Zijlstra wrote: > On Fri, Jan 17, 2014 at 10:04:04AM +0100, Daniel Lezcano wrote: >> @@ -2679,11 +2715,8 @@ need_resched: >> >> pre_schedule(rq, prev); >> >> - if (unlikely(!rq->nr_running)) >> - rq->idle_stamp = idle_balance(rq) ? 0 : rq_clock(rq); >> - >> put_prev_task(rq, prev); >> - next = pick_next_task(rq); >> + next = pick_next_task_or_idle(rq); >> clear_tsk_need_resched(prev); >> clear_preempt_need_resched(); >> rq->skip_clock_update = 0; > > I have vague memories that we need to have idle_balance() before > put_prev_task(), but I can't recollect why this would be so. > > That said, if I resurrect these patches: > > https://lkml.org/lkml/2012/6/14/271 > > I suppose we could write something like: > > struct task_struct *pick_next_task(struct rq *rq, struct task_struct *prev) > { > const struct sched_class *class; > struct task_struct *p; > > again: > if (likely(rq->nr_running)) { > > if (likely(rq->nr_running == rq->cfs.h_nr_running)) > return fair_sched_class.pick_next_task(rq, prev); > > for_each_class(class) { > p = class->pick_next_task(rq, prev); > if (p) > return p; > } > } > > if (idle_balance(rq)) > goto again; > > rq->idle_stamp = rq_clock(rq); > > return idle_sched_class.pick_next_task(rq, prev); > } > > Which keeps idle_balance() before put_prev_task(), and by using > idle_sched_clas.pick_next_task() doesn't rape the idle class interface > like you did :-) But put_prev_task is called before pick_next_task, so idle_balance() is called after now, no ?
On 01/17/2014 03:09 PM, Peter Zijlstra wrote: > On Fri, Jan 17, 2014 at 10:04:04AM +0100, Daniel Lezcano wrote: >> + schedstat_inc(rq, sched_goidle); >> +#ifdef CONFIG_SMP >> + /* Trigger the post schedule to do an idle_enter for CFS */ >> + rq->post_schedule = 1; >> +#endif >> + return rq->idle; > > Urgh, that retains the stupid idle crap like it is. > > I've not yet tested this, but is there a reason something like the below > couldn't work? I tested it by running hackbench and it seems to work as expected. > --- > Subject: sched: Clean up idle task SMP logic > From: Peter Zijlstra <peterz@infradead.org> > Date: Fri Jan 17 14:54:02 CET 2014 > > The idle post_schedule hook is just a vile waste of time, fix it proper. > > Signed-off-by: Peter Zijlstra <peterz@infradead.org> > --- > kernel/sched/fair.c | 5 +++-- > kernel/sched/idle_task.c | 21 ++++++--------------- > 2 files changed, 9 insertions(+), 17 deletions(-) > > Index: linux-2.6/kernel/sched/fair.c > =================================================================== > --- linux-2.6.orig/kernel/sched/fair.c > +++ linux-2.6/kernel/sched/fair.c > @@ -2416,7 +2416,8 @@ void idle_exit_fair(struct rq *this_rq) > update_rq_runnable_avg(this_rq, 0); > } > > -#else > +#else /* CONFIG_SMP */ > + > static inline void update_entity_load_avg(struct sched_entity *se, > int update_cfs_rq) {} > static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {} > @@ -2428,7 +2429,7 @@ static inline void dequeue_entity_load_a > int sleep) {} > static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, > int force_update) {} > -#endif > +#endif /* CONFIG_SMP */ > > static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) > { > Index: linux-2.6/kernel/sched/idle_task.c > =================================================================== > --- linux-2.6.orig/kernel/sched/idle_task.c > +++ linux-2.6/kernel/sched/idle_task.c > @@ -13,18 +13,8 @@ select_task_rq_idle(struct task_struct * > { > return task_cpu(p); /* IDLE tasks as never migrated */ > } > - > -static void pre_schedule_idle(struct rq *rq, struct task_struct *prev) > -{ > - idle_exit_fair(rq); > - rq_last_tick_reset(rq); > -} > - > -static void post_schedule_idle(struct rq *rq) > -{ > - idle_enter_fair(rq); > -} > #endif /* CONFIG_SMP */ > + > /* > * Idle tasks are unconditionally rescheduled: > */ > @@ -37,8 +27,7 @@ static struct task_struct *pick_next_tas > { > schedstat_inc(rq, sched_goidle); > #ifdef CONFIG_SMP > - /* Trigger the post schedule to do an idle_enter for CFS */ > - rq->post_schedule = 1; > + idle_enter_fair(rq); > #endif > return rq->idle; > } > @@ -58,6 +47,10 @@ dequeue_task_idle(struct rq *rq, struct > > static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) > { > +#ifdef CONFIG_SMP > + idle_exit_fair(rq); > + rq_last_tick_reset(rq); > +#endif > } > > static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued) > @@ -101,8 +94,6 @@ const struct sched_class idle_sched_clas > > #ifdef CONFIG_SMP > .select_task_rq = select_task_rq_idle, > - .pre_schedule = pre_schedule_idle, > - .post_schedule = post_schedule_idle, > #endif > > .set_curr_task = set_curr_task_idle, >
On 01/17/2014 04:23 PM, Peter Zijlstra wrote: > On Fri, Jan 17, 2014 at 04:06:45PM +0100, Daniel Lezcano wrote: >>> I suppose we could write something like: >>> >>> struct task_struct *pick_next_task(struct rq *rq, struct task_struct *prev) >>> { >>> const struct sched_class *class; >>> struct task_struct *p; >>> >>> again: >>> if (likely(rq->nr_running)) { >>> >>> if (likely(rq->nr_running == rq->cfs.h_nr_running)) >>> return fair_sched_class.pick_next_task(rq, prev); >>> >>> for_each_class(class) { >>> p = class->pick_next_task(rq, prev); >>> if (p) >>> return p; >>> } >>> } >>> >>> if (idle_balance(rq)) >>> goto again; >>> >>> rq->idle_stamp = rq_clock(rq); >>> >>> return idle_sched_class.pick_next_task(rq, prev); >>> } >>> >>> Which keeps idle_balance() before put_prev_task(), and by using >>> idle_sched_clas.pick_next_task() doesn't rape the idle class interface >>> like you did :-) >> >> But put_prev_task is called before pick_next_task, so idle_balance() is >> called after now, no ? > > No, put_prev_task() is called by the pick_next_task() that returns a > task. So in the idle case above, the idle_sched_class.pick_next_task() > will do the required put_prev_task(). Ah, ok. Let me try it.
On 01/17/2014 04:33 PM, Peter Zijlstra wrote: > On Fri, Jan 17, 2014 at 04:26:13PM +0100, Daniel Lezcano wrote: >> >> Ah, ok. Let me try it. >> > > http://programming.kicks-ass.net/sekrit/patches.tar.bz2 > > has a queue that applies to tip/master. > > The patches as on lkml need a little help in applying. > > They've not been near a compiler yet though :/ Thanks a lot for the series. That makes my life easier :) I did a small compilation fix with the pick_next_task function for the deadline scheduler and currently hackbench is running. I will give you the results in a moment.
On 01/17/2014 04:33 PM, Peter Zijlstra wrote: > On Fri, Jan 17, 2014 at 04:26:13PM +0100, Daniel Lezcano wrote: >> >> Ah, ok. Let me try it. >> > > http://programming.kicks-ass.net/sekrit/patches.tar.bz2 > > has a queue that applies to tip/master. > > The patches as on lkml need a little help in applying. > > They've not been near a compiler yet though :/ > Here are the results: Col1 : tip/sched/core Col2 : the patchset above Col3 : the patchset above + idle task shortcut hackbench -s 4096 -l 1000 -g 10 -f 40 -T 33.306 32.720 31.902 32.344 32.139 32.214 33.342 33.281 33.056 33.319 33.421 31.789 32.325 32.540 31.941 33.701 32.978 32.229 34.981 32.418 30.218 32.379 31.656 31.717 32.135 32.241 32.812 32.531 32.790 31.967 avg: 33.036 32.618 31.984 hackbench -p -s 4096 -l 1000 -g 10 -f 40 27.595 27.601 26.331 25.192 29.336 30.222 26.057 28.579 28.351 28.397 29.419 28.554 27.628 25.045 30.470 28.976 28.027 29.823 28.764 29.361 26.902 27.632 30.101 26.285 29.129 29.248 26.975 26.020 25.047 29.324 avg: 27.539 28.176 28.324 hackbench -P -s 4096 -l 1000 -g 10 -f 40 32.788 32.057 30.691 33.158 33.251 34.315 32.713 33.076 32.880 32.488 32.878 32.792 31.240 33.003 32.958 32.493 33.096 32.244 33.940 33.660 31.550 34.733 33.777 31.829 32.598 34.117 33.714 34.091 33.196 31.336 avg: 33.024 33.211 32.431
On 01/17/2014 04:33 PM, Peter Zijlstra wrote: > On Fri, Jan 17, 2014 at 04:26:13PM +0100, Daniel Lezcano wrote: >> >> Ah, ok. Let me try it. >> > > http://programming.kicks-ass.net/sekrit/patches.tar.bz2 > > has a queue that applies to tip/master. > > The patches as on lkml need a little help in applying. > > They've not been near a compiler yet though :/ Hi Peter, do you want me to resend the full serie with your patchset included ? Thanks -- Daniel
On 01/21/2014 10:06 AM, Peter Zijlstra wrote: > On Tue, Jan 21, 2014 at 09:41:31AM +0100, Daniel Lezcano wrote: >> On 01/17/2014 04:33 PM, Peter Zijlstra wrote: >>> On Fri, Jan 17, 2014 at 04:26:13PM +0100, Daniel Lezcano wrote: >>>> >>>> Ah, ok. Let me try it. >>>> >>> >>> http://programming.kicks-ass.net/sekrit/patches.tar.bz2 >>> >>> has a queue that applies to tip/master. >>> >>> The patches as on lkml need a little help in applying. >>> >>> They've not been near a compiler yet though :/ >> >> Hi Peter, >> >> do you want me to resend the full serie with your patchset included ? > > Nah, I'll send it out, I wanted PJT or Ben to look over one more patch I > did, meant to do that yesterday but you know how those things go :-) Yeah :) Thanks ! -- Daniel
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b7e3635..c7a8f4e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2586,7 +2586,43 @@ pick_next_task(struct rq *rq) return p; } - BUG(); /* the idle class will always have a runnable task */ + /* + * We must have at least one task to run, the idle task is + * returned by the shortcut in pick_next_task_or_idle. If we + * fall here, we don't have any task to run, so something is + * wrong when we thought the cpu was not going to idle. + */ + BUG(); +} + +static inline struct task_struct *pick_next_task_or_idle(struct rq *rq) +{ + if (likely(rq->nr_running)) + return pick_next_task(rq); + + rq->idle_stamp = 0; + + /* + * If there is a task balanced on this cpu, pick the next task, + * otherwise fall in the optimization by picking the idle task + * directly. + */ + if (idle_balance(rq)) + return pick_next_task(rq); + + rq->idle_stamp = rq_clock(rq); + + /* + * Optimization: pick_next_task will return rq->idle in any case but + * after walking through the different sched domains. Let's add a + * shortcut to directly return the idle task. + */ + schedstat_inc(rq, sched_goidle); +#ifdef CONFIG_SMP + /* Trigger the post schedule to do an idle_enter for CFS */ + rq->post_schedule = 1; +#endif + return rq->idle; } /* @@ -2679,11 +2715,8 @@ need_resched: pre_schedule(rq, prev); - if (unlikely(!rq->nr_running)) - rq->idle_stamp = idle_balance(rq) ? 0 : rq_clock(rq); - put_prev_task(rq, prev); - next = pick_next_task(rq); + next = pick_next_task_or_idle(rq); clear_tsk_need_resched(prev); clear_preempt_need_resched(); rq->skip_clock_update = 0; diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 516c3d9..0b4c94b 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -33,16 +33,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl resched_task(rq->idle); } -static struct task_struct *pick_next_task_idle(struct rq *rq) -{ - schedstat_inc(rq, sched_goidle); -#ifdef CONFIG_SMP - /* Trigger the post schedule to do an idle_enter for CFS */ - rq->post_schedule = 1; -#endif - return rq->idle; -} - /* * It is not legal to sleep in the idle task - print a warning * message if some code attempts to do it: @@ -60,6 +50,11 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { } +static struct task_struct *pick_next_task_idle(struct rq *rq) +{ + return NULL; +} + static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued) { }
With the previous patch, we have no ambiguity on going to idle. So we can pick directly the idle task instead of looking up all the domains which will in any case return no task except the idle_task. Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> --- kernel/sched/core.c | 43 ++++++++++++++++++++++++++++++++++++++----- kernel/sched/idle_task.c | 15 +++++---------- 2 files changed, 43 insertions(+), 15 deletions(-)