脚本宝典收集整理的这篇文章主要介绍了Linux内核源码—CFS调度(4.20.17),脚本宝典觉得挺不错的,现在分享给大家,也给大家做个参考。
每个 cpu 都有一个对应的运行队列 rq,在 rq 中维护着不同调度策略的调度队列。
struct rq { ... struct cfs_rq cfs; struct rt_rq rt; struct dl_rq dl; ... };
cfs的调度队列通过红黑树维护,在 cfs_rq 的数据结构中,struct rb_root_cached tasks_timeline 包含了红黑树 struct rb_root rb_root 和 最左叶子节点缓存 struct rb_node *rb_leftmost 。
struct cfs_rq { struct load_weight load; //CFS运行队列的负载权重值 unsigned long runnable_weight; unsigned int nr_running; unsigned int h_nr_running; u64 exec_clock; u64 min_vruntime; #ifndef CONFIG_64BIT u64 min_vruntime_copy; #endif struct rb_root_cached tasks_timeline; //红黑树,维护调度实体 /* * 'curr' points to currently running entity on this cfs_rq. * It is set to NULL otherwise (i.e when none are currently running). */ struct sched_entity *curr; //当前运行的调度实体 struct sched_entity *next; //下一个调度实体 struct sched_entity *last; //队列中最后的调度实体 struct sched_entity *skip; //跳过的调度实体 #ifdef CONFIG_SCHED_DEBUG unsigned int nr_spread_over; #endif #ifdef CONFIG_SMP /* * CFS load tracking */ struct sched_avg avg; #ifndef CONFIG_64BIT u64 load_last_update_time_copy; #endif struct { raw_spinlock_t lock ____cacheline_aligned; int nr; unsigned long load_avg; unsigned long util_avg; unsigned long runnable_sum; } removed; #ifdef CONFIG_FAIR_GROUP_SCHED unsigned long tg_load_avg_contrib; long propagate; long prop_runnable_sum; /* * h_load = weight * f(tg) * * Where f(tg) is the recursive weight fraction assigned to * this group. */ unsigned long h_load; u64 last_h_load_update; struct sched_entity *h_load_next; #endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_SMP */ #ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq; /* CPU runqueue to which this cfs_rq is attached */ /* * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in * a hierarchy). Non-leaf lrqs hold other higher schedulable entities * (like users, containers etc.) * * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU. * This list is used during load balance. */ int on_list; struct list_head leaf_cfs_rq_list; struct task_group *tg; /* group that "owns" this runqueue */ #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; int expires_seq; u64 runtime_expires; s64 runtime_remaining; u64 throttled_clock; u64 throttled_clock_task; u64 throttled_clock_task_time; int throttled; int throttle_count; struct list_head throttled_list; #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ };
那么CFS是根据什么来对任务进行排序呢?----------》虚拟运行时间 vruntime。
update_curr 函数(/kernel/sched/fair.c)实现了 vruntime 的更新,其步骤是计算出当前进程的运行时间 delta_exec,再结合当前可运行进程总数对delta_exec 进行加权运算。
static void update_curr(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; //获取当前调度实体 u64 now = rq_clock_task(rq_of(cfs_rq)); //获取当前时间 u64 delta_exec; if (unlikely(!curr)) return; delta_exec = now - curr->exec_start; //计算当前进程已执行的时间,exec_start是调度实体的开始执行时间 if (unlikely((s64)delta_exec <= 0)) return; curr->exec_start = now; schedstat_set(curr->statistics.exec_max, max(delta_exec, curr->statistics.exec_max)); curr->sum_exec_runtime += delta_exec; //修改调度实体已执行总时间 schedstat_add(cfs_rq->exec_clock, delta_exec); curr->vruntime += calc_delta_fair(delta_exec, curr); //修改调度实体虚拟运行时间 update_min_vruntime(cfs_rq); if (entity_is_task(curr)) { //如果调度实体是task,也要给它的调度组记录执行时间 struct task_struct *curtask = task_of(curr); trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime); cgroup_account_cputime(curtask, delta_exec); account_group_exec_runtime(curtask, delta_exec); } account_cfs_rq_runtime(cfs_rq, delta_exec); }
calc_delta_fair(delta_exec, curr) 实现了虚拟运行时间的计算:
虚拟运行时间 = delta_exec * NICE_0_LOAD / 当前进程的权重
而具体在 __calc_delta 中,是通过(delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT 实现的,通过左移和右移避免浮点运算。
从公式可以得出,如果一个进程的虚拟运行时间越小,说明实际运行的时间越少或者是进程的权重大,那么就应该具有更高的优先度。而红黑树维护的就是进程的 vruntime 值,每次选择 vruntime 最小的进程执行,该节点缓存在了最左叶子节点 struct rb_node *rb_leftmost 中。
static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se) { if (unlikely(se->load.weight != NICE_0_LOAD)) delta = __calc_delta(delta, NICE_0_LOAD, &se->load); return delta; }
在进程变为可运行状态(被唤醒)或者是通过 fork() 调用第一次创建进程时,需要将进程插入红黑树,调用 __enqueue_entity 实现这一过程。删除节点也是同样的道理。
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { struct rb_node **link = &cfs_rq->tasks_timeline.rb_root.rb_node; //红黑树根节点 struct rb_node *parent = NULL; struct sched_entity *entry; bool leftmost = true; /* * Find the right place in the rbtree: */ while (*link) { parent = *link; entry = rb_entry(parent, struct sched_entity, run_node); //rb_entry 只是 container_of 的封装而已,找到首地址 /* * We dont care about collisions. Nodes with * the same key stay together. */ if (entity_before(se, entry)) { link = &parent->rb_left; } else { link = &parent->rb_right; leftmost = false; } } rb_link_node(&se->run_node, parent, link); //在红黑树中插入节点 rb_insert_color_cached(&se->run_node, //设置节点的颜色 &cfs_rq->tasks_timeline, leftmost); }
进程调度的主要入口点是函数 schedule(/kernel/sched/core.c),它通过 pick_next_task() 函数选择下一个进程,如果选出来的进程与当前运行进程不一致,则调用 context_switch() 函数进行上下文切换。
static void __sched notrace __schedule(bool preempt) { cpu = smp_processor_id(); rq = cpu_rq(cpu); prev = rq->curr; //获取当前运行进程 ... next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); clear_preempt_need_resched(); if (likely(prev != next)) { ... rq = context_switch(rq, prev, next, &rf); } else { rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); rq_unlock_irq(rq, &rf); } ... }
pick_next_task() 函数的实现并不复杂,这里用到了一点优化,如果所有的可运行进程都在 cfs 中,那么就可以直接调用 cfs 的 pick_next_task(), 否则就需要按照调度器的优先级来选择。
static inline struct task_struct * pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { const struct sched_class *class; struct task_struct *p; /* * Optimization: we know that if all tasks are in the fair class we can * call that function directly, but only if the @prev task wasn't of a * higher scheduling class, because otherwise those loose the * opportunity to pull in more work from other CPUs. */ if (likely((prev->sched_class == &idle_sched_class || prev->sched_class == &fair_sched_class) && rq->nr_running == rq->cfs.h_nr_running)) { p = fair_sched_class.pick_next_task(rq, prev, rf); if (unlikely(p == RETRY_TASK)) goto again; /* Assumes fair_sched_class->next == idle_sched_class */ if (unlikely(!p)) p = idle_sched_class.pick_next_task(rq, prev, rf); return p; } again: for_each_class(class) { p = class->pick_next_task(rq, prev, rf); if (p) { if (unlikely(p == RETRY_TASK)) goto again; return p; } } /* The idle class should always have a runnable task: */ BUG(); }
References:
以上是脚本宝典为你收集整理的Linux内核源码—CFS调度(4.20.17)全部内容,希望文章能够帮你解决Linux内核源码—CFS调度(4.20.17)所遇到的问题。
本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
如您有任何意见或建议可联系处理。小编QQ:384754419,请注明来意。