kern_runq.c source code [src/src/sys/kern/kern_runq.c]

1	/ $NetBSD: kern_runq.c,v 1.45 2016/07/07 06:55:43 msaitoh Exp $ /
2
3	/*
4	* Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
5	* All rights reserved.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following conditions
9	* are met:
10	* 1. Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* 2. Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26	* SUCH DAMAGE.
27	*/
28
29	#include <sys/cdefs.h>
30	__KERNEL_RCSID(`0`, "$NetBSD: kern_runq.c,v 1.45 2016/07/07 06:55:43 msaitoh Exp $");
31
32	#include "opt_dtrace.h"
33
34	#include <sys/param.h>
35	#include <sys/kernel.h>
36	#include <sys/bitops.h>
37	#include <sys/cpu.h>
38	#include <sys/idle.h>
39	#include <sys/intr.h>
40	#include <sys/kmem.h>
41	#include <sys/lwp.h>
42	#include <sys/mutex.h>
43	#include <sys/proc.h>
44	#include <sys/sched.h>
45	#include <sys/syscallargs.h>
46	#include <sys/sysctl.h>
47	#include <sys/systm.h>
48	#include <sys/types.h>
49	#include <sys/evcnt.h>
50
51	/*
52	* Priority related definitions.
53	*/
54	#define PRI_TS_COUNT (NPRI_USER)
55	#define PRI_RT_COUNT (PRI_COUNT - PRI_TS_COUNT)
56	#define PRI_HTS_RANGE (PRI_TS_COUNT / 10)
57
58	#define PRI_HIGHEST_TS (MAXPRI_USER)
59
60	/*
61	* Bits per map.
62	*/
63	#define BITMAP_BITS (32)
64	#define BITMAP_SHIFT (5)
65	#define BITMAP_MSB (0x80000000U)
66	#define BITMAP_MASK (BITMAP_BITS - 1)
67
68	/*
69	* Structures, runqueue.
70	*/
71
72	const int schedppq = `1`;
73
74	typedef struct {
75	TAILQ_HEAD(, lwp) q_head;
76	} queue_t;
77
78	typedef struct {
79	/ Bitmap /
80	uint32_t r_bitmap[PRI_COUNT >> BITMAP_SHIFT];
81	/ Counters /
82	u_int r_count; / Count of the threads /
83	u_int r_avgcount; / Average count of threads /
84	u_int r_mcount; / Count of migratable threads /
85	/ Runqueues /
86	queue_t r_rt_queue[PRI_RT_COUNT];
87	queue_t r_ts_queue[PRI_TS_COUNT];
88	/ Event counters /
89	struct evcnt r_ev_pull;
90	struct evcnt r_ev_push;
91	struct evcnt r_ev_stay;
92	struct evcnt r_ev_localize;
93	} runqueue_t;
94
95	static void * sched_getrq(runqueue_t , const* pri_t);
96	#ifdef MULTIPROCESSOR
97	static lwp_t * sched_catchlwp(struct cpu_info *);
98	static void sched_balance(void *);
99	#endif
100
101	/*
102	* Preemption control.
103	*/
104	int sched_upreempt_pri = `0`;
105	#ifdef __HAVE_PREEMPTION
106	# ifdef DEBUG
107	int sched_kpreempt_pri = `0`;
108	# else
109	int sched_kpreempt_pri = PRI_USER_RT;
110	# endif
111	#else
112	int sched_kpreempt_pri = `1000`;
113	#endif
114
115	/*
116	* Migration and balancing.
117	*/
118	static u_int cacheht_time; / Cache hotness time /
119	static u_int min_catch; / Minimal LWP count for catching /
120	static u_int balance_period; / Balance period /
121	static struct cpu_info worker_ci; /* Victim CPU /
122	#ifdef MULTIPROCESSOR
123	static struct callout balance_ch; / Callout of balancer /
124	#endif
125
126	#ifdef KDTRACE_HOOKS
127	struct lwp *curthread;
128	#endif
129
130	void
131	runq_init(void)
132	{
133
134	/ Balancing /
135	worker_ci = curcpu();
136	cacheht_time = mstohz(`3`); / ~3 ms /
137	balance_period = mstohz(`300`); / ~300 ms /
138
139	/ Minimal count of LWPs for catching /
140	min_catch = `1`;
141
142	/ Initialize balancing callout and run it /
143	#ifdef MULTIPROCESSOR
144	callout_init(&balance_ch, CALLOUT_MPSAFE);
145	callout_setfunc(&balance_ch, sched_balance, NULL);
146	callout_schedule(&balance_ch, balance_period);
147	#endif
148	}
149
150	void
151	sched_cpuattach(struct cpu_info *ci)
152	{
153	runqueue_t *ci_rq;
154	void *rq_ptr;
155	u_int i, size;
156
157	if (ci->ci_schedstate.spc_lwplock == NULL) {
158	ci->ci_schedstate.spc_lwplock =
159	mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
160	}
161	if (ci == lwp0.l_cpu) {
162	/ Initialize the scheduler structure of the primary LWP /
163	lwp0.l_mutex = ci->ci_schedstate.spc_lwplock;
164	}
165	if (ci->ci_schedstate.spc_mutex != NULL) {
166	/ Already initialized. /
167	return;
168	}
169
170	/ Allocate the run queue /
171	size = roundup2(sizeof(runqueue_t), coherency_unit) + coherency_unit;
172	rq_ptr = kmem_zalloc(size, KM_SLEEP);
173	if (rq_ptr == NULL) {
174	panic("sched_cpuattach: could not allocate the runqueue");
175	}
176	ci_rq = (void *)(roundup2((uintptr_t)(rq_ptr), coherency_unit));
177
178	/ Initialize run queues /
179	ci->ci_schedstate.spc_mutex =
180	mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
181	for (i = `0`; i < PRI_RT_COUNT; i++)
182	TAILQ_INIT(&ci_rq->r_rt_queue[i].q_head);
183	for (i = `0`; i < PRI_TS_COUNT; i++)
184	TAILQ_INIT(&ci_rq->r_ts_queue[i].q_head);
185
186	ci->ci_schedstate.spc_sched_info = ci_rq;
187
188	evcnt_attach_dynamic(&ci_rq->r_ev_pull, EVCNT_TYPE_MISC, NULL,
189	cpu_name(ci), "runqueue pull");
190	evcnt_attach_dynamic(&ci_rq->r_ev_push, EVCNT_TYPE_MISC, NULL,
191	cpu_name(ci), "runqueue push");
192	evcnt_attach_dynamic(&ci_rq->r_ev_stay, EVCNT_TYPE_MISC, NULL,
193	cpu_name(ci), "runqueue stay");
194	evcnt_attach_dynamic(&ci_rq->r_ev_localize, EVCNT_TYPE_MISC, NULL,
195	cpu_name(ci), "runqueue localize");
196	}
197
198	/*
199	* Control of the runqueue.
200	*/
201
202	static inline void *
203	sched_getrq(runqueue_t ci_rq, const* pri_t prio)
204	{
205
206	KASSERT(prio < PRI_COUNT);
207	return (prio <= PRI_HIGHEST_TS) ?
208	&ci_rq->r_ts_queue[prio].q_head :
209	&ci_rq->r_rt_queue[prio - PRI_HIGHEST_TS - `1`].q_head;
210	}
211
212	void
213	sched_enqueue(struct lwp *l, bool swtch)
214	{
215	runqueue_t *ci_rq;
216	struct schedstate_percpu *spc;
217	TAILQ_HEAD(, lwp) *q_head;
218	const pri_t eprio = lwp_eprio(l);
219	struct cpu_info *ci;
220	int type;
221
222	ci = l->l_cpu;
223	spc = &ci->ci_schedstate;
224	ci_rq = spc->spc_sched_info;
225	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
226
227	/ Update the last run time on switch /
228	if (__predict_true(swtch == true))
229	l->l_rticksum += (hardclock_ticks - l->l_rticks);
230	else if (l->l_rticks == `0`)
231	l->l_rticks = hardclock_ticks;
232
233	/ Enqueue the thread /
234	q_head = sched_getrq(ci_rq, eprio);
235	if (TAILQ_EMPTY(q_head)) {
236	u_int i;
237	uint32_t q;
238
239	/ Mark bit /
240	i = eprio >> BITMAP_SHIFT;
241	q = BITMAP_MSB >> (eprio & BITMAP_MASK);
242	KASSERT((ci_rq->r_bitmap[i] & q) == `0`);
243	ci_rq->r_bitmap[i] \|= q;
244	}
245	TAILQ_INSERT_TAIL(q_head, l, l_runq);
246	ci_rq->r_count++;
247	if ((l->l_pflag & LP_BOUND) == `0`)
248	ci_rq->r_mcount++;
249
250	/*
251	* Update the value of highest priority in the runqueue,
252	* if priority of this thread is higher.
253	*/
254	if (eprio > spc->spc_maxpriority)
255	spc->spc_maxpriority = eprio;
256
257	sched_newts(l);
258
259	/*
260	* Wake the chosen CPU or cause a preemption if the newly
261	* enqueued thread has higher priority. Don't cause a
262	* preemption if the thread is yielding (swtch).
263	*/
264	if (!swtch && eprio > spc->spc_curpriority) {
265	if (eprio >= sched_kpreempt_pri)
266	type = RESCHED_KPREEMPT;
267	else if (eprio >= sched_upreempt_pri)
268	type = RESCHED_IMMED;
269	else
270	type = RESCHED_LAZY;
271	cpu_need_resched(ci, type);
272	}
273	}
274
275	void
276	sched_dequeue(struct lwp *l)
277	{
278	runqueue_t *ci_rq;
279	TAILQ_HEAD(, lwp) *q_head;
280	struct schedstate_percpu *spc;
281	const pri_t eprio = lwp_eprio(l);
282
283	spc = & l->l_cpu->ci_schedstate;
284	ci_rq = spc->spc_sched_info;
285	KASSERT(lwp_locked(l, spc->spc_mutex));
286
287	KASSERT(eprio <= spc->spc_maxpriority);
288	KASSERT(ci_rq->r_bitmap[eprio >> BITMAP_SHIFT] != `0`);
289	KASSERT(ci_rq->r_count > `0`);
290
291	if (spc->spc_migrating == l)
292	spc->spc_migrating = NULL;
293
294	ci_rq->r_count--;
295	if ((l->l_pflag & LP_BOUND) == `0`)
296	ci_rq->r_mcount--;
297
298	q_head = sched_getrq(ci_rq, eprio);
299	TAILQ_REMOVE(q_head, l, l_runq);
300	if (TAILQ_EMPTY(q_head)) {
301	u_int i;
302	uint32_t q;
303
304	/ Unmark bit /
305	i = eprio >> BITMAP_SHIFT;
306	q = BITMAP_MSB >> (eprio & BITMAP_MASK);
307	KASSERT((ci_rq->r_bitmap[i] & q) != `0`);
308	ci_rq->r_bitmap[i] &= ~q;
309
310	/*
311	* Update the value of highest priority in the runqueue, in a
312	* case it was a last thread in the queue of highest priority.
313	*/
314	if (eprio != spc->spc_maxpriority)
315	return;
316
317	do {
318	if (ci_rq->r_bitmap[i] != `0`) {
319	q = ffs(ci_rq->r_bitmap[i]);
320	spc->spc_maxpriority =
321	(i << BITMAP_SHIFT) + (BITMAP_BITS - q);
322	return;
323	}
324	} while (i--);
325
326	/ If not found - set the lowest value /
327	spc->spc_maxpriority = `0`;
328	}
329	}
330
331	/*
332	* Migration and balancing.
333	*/
334
335	#ifdef MULTIPROCESSOR
336
337	/ Estimate if LWP is cache-hot /
338	static inline bool
339	lwp_cache_hot(const struct lwp *l)
340	{
341
342	if (__predict_false(l->l_slptime \|\| l->l_rticks == `0`))
343	return false;
344
345	return (hardclock_ticks - l->l_rticks <= cacheht_time);
346	}
347
348	/ Check if LWP can migrate to the chosen CPU /
349	static inline bool
350	sched_migratable(const struct lwp l, struct* cpu_info *ci)
351	{
352	const struct schedstate_percpu *spc = &ci->ci_schedstate;
353	KASSERT(lwp_locked(__UNCONST(l), NULL));
354
355	/ Is CPU offline? /
356	if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
357	return false;
358
359	/ Is affinity set? /
360	if (__predict_false(l->l_affinity))
361	return kcpuset_isset(l->l_affinity, cpu_index(ci));
362
363	/ Is there a processor-set? /
364	return (spc->spc_psid == l->l_psid);
365	}
366
367	/*
368	* Estimate the migration of LWP to the other CPU.
369	* Take and return the CPU, if migration is needed.
370	*/
371	struct cpu_info *
372	sched_takecpu(struct lwp *l)
373	{
374	struct cpu_info ci, tci, pivot, next;
375	struct schedstate_percpu *spc;
376	runqueue_t ci_rq, ici_rq;
377	pri_t eprio, lpri, pri;
378
379	KASSERT(lwp_locked(l, NULL));
380
381	/ If thread is strictly bound, do not estimate other CPUs /
382	ci = l->l_cpu;
383	if (l->l_pflag & LP_BOUND)
384	return ci;
385
386	spc = &ci->ci_schedstate;
387	ci_rq = spc->spc_sched_info;
388
389	/ Make sure that thread is in appropriate processor-set /
390	if (__predict_true(spc->spc_psid == l->l_psid)) {
391	/ If CPU of this thread is idling - run there /
392	if (ci_rq->r_count == `0`) {
393	ci_rq->r_ev_stay.ev_count++;
394	return ci;
395	}
396	/ Stay if thread is cache-hot /
397	eprio = lwp_eprio(l);
398	if (__predict_true(l->l_stat != LSIDL) &&
399	lwp_cache_hot(l) && eprio >= spc->spc_curpriority) {
400	ci_rq->r_ev_stay.ev_count++;
401	return ci;
402	}
403	} else {
404	eprio = lwp_eprio(l);
405	}
406
407	/ Run on current CPU if priority of thread is higher /
408	ci = curcpu();
409	spc = &ci->ci_schedstate;
410	if (eprio > spc->spc_curpriority && sched_migratable(l, ci)) {
411	ci_rq = spc->spc_sched_info;
412	ci_rq->r_ev_localize.ev_count++;
413	return ci;
414	}
415
416	/*
417	* Look for the CPU with the lowest priority thread. In case of
418	* equal priority, choose the CPU with the fewest of threads.
419	*/
420	pivot = l->l_cpu;
421	ci = pivot;
422	tci = pivot;
423	lpri = PRI_COUNT;
424	do {
425	if ((next = cpu_lookup(cpu_index(ci) + `1`)) == NULL) {
426	/ Reached the end, start from the beginning. /
427	next = cpu_lookup(`0`);
428	}
429	spc = &ci->ci_schedstate;
430	ici_rq = spc->spc_sched_info;
431	pri = MAX(spc->spc_curpriority, spc->spc_maxpriority);
432	if (pri > lpri)
433	continue;
434
435	if (pri == lpri && ci_rq->r_count < ici_rq->r_count)
436	continue;
437
438	if (!sched_migratable(l, ci))
439	continue;
440
441	lpri = pri;
442	tci = ci;
443	ci_rq = ici_rq;
444	} while (ci = next, ci != pivot);
445
446	ci_rq = tci->ci_schedstate.spc_sched_info;
447	ci_rq->r_ev_push.ev_count++;
448
449	return tci;
450	}
451
452	/*
453	* Tries to catch an LWP from the runqueue of other CPU.
454	*/
455	static struct lwp *
456	sched_catchlwp(struct cpu_info *ci)
457	{
458	struct cpu_info *curci = curcpu();
459	struct schedstate_percpu spc, curspc;
460	TAILQ_HEAD(, lwp) *q_head;
461	runqueue_t *ci_rq;
462	struct lwp *l;
463
464	curspc = &curci->ci_schedstate;
465	spc = &ci->ci_schedstate;
466	KASSERT(curspc->spc_psid == spc->spc_psid);
467
468	ci_rq = spc->spc_sched_info;
469	if (ci_rq->r_mcount < min_catch) {
470	spc_unlock(ci);
471	return NULL;
472	}
473
474	/ Take the highest priority thread /
475	q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
476	l = TAILQ_FIRST(q_head);
477
478	for (;;) {
479	/ Check the first and next result from the queue /
480	if (l == NULL) {
481	break;
482	}
483	KASSERTMSG(l->l_stat == LSRUN, "%s l %p (%s) l_stat %d",
484	ci->ci_data.cpu_name,
485	l, (l->l_name ? l->l_name : l->l_proc->p_comm), l->l_stat);
486
487	/ Look for threads, whose are allowed to migrate /
488	if ((l->l_pflag & LP_BOUND) \|\| lwp_cache_hot(l) \|\|
489	!sched_migratable(l, curci)) {
490	l = TAILQ_NEXT(l, l_runq);
491	continue;
492	}
493
494	/ Grab the thread, and move to the local run queue /
495	sched_dequeue(l);
496
497	/*
498	* If LWP is still context switching, we may need to
499	* spin-wait before changing its CPU.
500	*/
501	if (__predict_false(l->l_ctxswtch != `0`)) {
502	u_int count;
503	count = SPINLOCK_BACKOFF_MIN;
504	while (l->l_ctxswtch)
505	SPINLOCK_BACKOFF(count);
506	}
507	l->l_cpu = curci;
508	ci_rq->r_ev_pull.ev_count++;
509	lwp_unlock_to(l, curspc->spc_mutex);
510	sched_enqueue(l, false);
511	return l;
512	}
513	spc_unlock(ci);
514
515	return l;
516	}
517
518	/*
519	* Periodical calculations for balancing.
520	*/
521	static void
522	sched_balance(void *nocallout)
523	{
524	struct cpu_info ci, hci;
525	runqueue_t *ci_rq;
526	CPU_INFO_ITERATOR cii;
527	u_int highest;
528
529	hci = curcpu();
530	highest = `0`;
531
532	/ Make lockless countings /
533	for (CPU_INFO_FOREACH(cii, ci)) {
534	ci_rq = ci->ci_schedstate.spc_sched_info;
535
536	/ Average count of the threads /
537	ci_rq->r_avgcount = (ci_rq->r_avgcount + ci_rq->r_mcount) >> `1`;
538
539	/ Look for CPU with the highest average /
540	if (ci_rq->r_avgcount > highest) {
541	hci = ci;
542	highest = ci_rq->r_avgcount;
543	}
544	}
545
546	/ Update the worker /
547	worker_ci = hci;
548
549	if (nocallout == NULL)
550	callout_schedule(&balance_ch, balance_period);
551	}
552
553	/*
554	* Called from each CPU's idle loop.
555	*/
556	void
557	sched_idle(void)
558	{
559	struct cpu_info ci = curcpu(), tci = NULL;
560	struct schedstate_percpu spc, tspc;
561	runqueue_t *ci_rq;
562	bool dlock = false;
563
564	/ Check if there is a migrating LWP /
565	spc = &ci->ci_schedstate;
566	if (spc->spc_migrating == NULL)
567	goto no_migration;
568
569	spc_lock(ci);
570	for (;;) {
571	struct lwp *l;
572
573	l = spc->spc_migrating;
574	if (l == NULL)
575	break;
576
577	/*
578	* If second attempt, and target CPU has changed,
579	* drop the old lock.
580	*/
581	if (dlock == true && tci != l->l_target_cpu) {
582	KASSERT(tci != NULL);
583	spc_unlock(tci);
584	dlock = false;
585	}
586
587	/*
588	* Nothing to do if destination has changed to the
589	* local CPU, or migration was done by other CPU.
590	*/
591	tci = l->l_target_cpu;
592	if (tci == NULL \|\| tci == ci) {
593	spc->spc_migrating = NULL;
594	l->l_target_cpu = NULL;
595	break;
596	}
597	tspc = &tci->ci_schedstate;
598
599	/*
600	* Double-lock the runqueues.
601	* We do that only once.
602	*/
603	if (dlock == false) {
604	dlock = true;
605	if (ci < tci) {
606	spc_lock(tci);
607	} else if (!mutex_tryenter(tspc->spc_mutex)) {
608	spc_unlock(ci);
609	spc_lock(tci);
610	spc_lock(ci);
611	/ Check the situation again.. /
612	continue;
613	}
614	}
615
616	/ Migrate the thread /
617	KASSERT(l->l_stat == LSRUN);
618	spc->spc_migrating = NULL;
619	l->l_target_cpu = NULL;
620	sched_dequeue(l);
621	l->l_cpu = tci;
622	lwp_setlock(l, tspc->spc_mutex);
623	sched_enqueue(l, false);
624	break;
625	}
626	if (dlock == true) {
627	KASSERT(tci != NULL);
628	spc_unlock(tci);
629	}
630	spc_unlock(ci);
631
632	no_migration:
633	ci_rq = spc->spc_sched_info;
634	if ((spc->spc_flags & SPCF_OFFLINE) != `0` \|\| ci_rq->r_count != `0`) {
635	return;
636	}
637
638	/ Reset the counter, and call the balancer /
639	ci_rq->r_avgcount = `0`;
640	sched_balance(ci);
641	tci = worker_ci;
642	tspc = &tci->ci_schedstate;
643	if (ci == tci \|\| spc->spc_psid != tspc->spc_psid)
644	return;
645	spc_dlock(ci, tci);
646	(void)sched_catchlwp(tci);
647	spc_unlock(ci);
648	}
649
650	#else
651
652	/*
653	* stubs for !MULTIPROCESSOR
654	*/
655
656	struct cpu_info *
657	sched_takecpu(struct lwp *l)
658	{
659
660	return l->l_cpu;
661	}
662
663	void
664	sched_idle(void)
665	{
666
667	}
668	#endif /* MULTIPROCESSOR */
669
670	/*
671	* Scheduling statistics and balancing.
672	*/
673	void
674	sched_lwp_stats(struct lwp *l)
675	{
676	int batch;
677
678	KASSERT(lwp_locked(l, NULL));
679
680	/ Update sleep time /
681	if (l->l_stat == LSSLEEP \|\| l->l_stat == LSSTOP \|\|
682	l->l_stat == LSSUSPENDED)
683	l->l_slptime++;
684
685	/*
686	* Set that thread is more CPU-bound, if sum of run time exceeds the
687	* sum of sleep time. Check if thread is CPU-bound a first time.
688	*/
689	batch = (l->l_rticksum > l->l_slpticksum);
690	if (batch != `0`) {
691	if ((l->l_flag & LW_BATCH) == `0`)
692	batch = `0`;
693	l->l_flag \|= LW_BATCH;
694	} else
695	l->l_flag &= ~LW_BATCH;
696
697	/*
698	* If thread is CPU-bound and never sleeps, it would occupy the CPU.
699	* In such case reset the value of last sleep, and check it later, if
700	* it is still zero - perform the migration, unmark the batch flag.
701	*/
702	if (batch && (l->l_slptime + l->l_slpticksum) == `0`) {
703	if (l->l_slpticks == `0`) {
704	if (l->l_target_cpu == NULL &&
705	(l->l_stat == LSRUN \|\| l->l_stat == LSONPROC)) {
706	struct cpu_info *ci = sched_takecpu(l);
707	l->l_target_cpu = (ci != l->l_cpu) ? ci : NULL;
708	}
709	l->l_flag &= ~LW_BATCH;
710	} else {
711	l->l_slpticks = `0`;
712	}
713	}
714
715	/ Reset the time sums /
716	l->l_slpticksum = `0`;
717	l->l_rticksum = `0`;
718
719	/ Scheduler-specific hook /
720	sched_pstats_hook(l, batch);
721	#ifdef KDTRACE_HOOKS
722	curthread = l;
723	#endif
724	}
725
726	/*
727	* Scheduler mill.
728	*/
729	struct lwp *
730	sched_nextlwp(void)
731	{
732	struct cpu_info *ci = curcpu();
733	struct schedstate_percpu *spc;
734	TAILQ_HEAD(, lwp) *q_head;
735	runqueue_t *ci_rq;
736	struct lwp *l;
737
738	/ Return to idle LWP if there is a migrating thread /
739	spc = &ci->ci_schedstate;
740	if (__predict_false(spc->spc_migrating != NULL))
741	return NULL;
742	ci_rq = spc->spc_sched_info;
743
744	#ifdef MULTIPROCESSOR
745	/ If runqueue is empty, try to catch some thread from other CPU /
746	if (__predict_false(ci_rq->r_count == `0`)) {
747	struct schedstate_percpu *cspc;
748	struct cpu_info *cci;
749
750	/ Offline CPUs should not perform this, however /
751	if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
752	return NULL;
753
754	/ Reset the counter, and call the balancer /
755	ci_rq->r_avgcount = `0`;
756	sched_balance(ci);
757	cci = worker_ci;
758	cspc = &cci->ci_schedstate;
759	if (ci == cci \|\| spc->spc_psid != cspc->spc_psid \|\|
760	!mutex_tryenter(cci->ci_schedstate.spc_mutex))
761	return NULL;
762	return sched_catchlwp(cci);
763	}
764	#else
765	if (__predict_false(ci_rq->r_count == `0`))
766	return NULL;
767	#endif
768
769	/ Take the highest priority thread /
770	KASSERT(ci_rq->r_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
771	q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
772	l = TAILQ_FIRST(q_head);
773	KASSERT(l != NULL);
774
775	sched_oncpu(l);
776	l->l_rticks = hardclock_ticks;
777
778	return l;
779	}
780
781	/*
782	* sched_curcpu_runnable_p: return if curcpu() should exit the idle loop.
783	*/
784
785	bool
786	sched_curcpu_runnable_p(void)
787	{
788	const struct cpu_info *ci;
789	const struct schedstate_percpu *spc;
790	const runqueue_t *ci_rq;
791	bool rv;
792
793	kpreempt_disable();
794	ci = curcpu();
795	spc = &ci->ci_schedstate;
796	ci_rq = spc->spc_sched_info;
797
798	#ifndef __HAVE_FAST_SOFTINTS
799	if (ci->ci_data.cpu_softints) {
800	kpreempt_enable();
801	return true;
802	}
803	#endif
804
805	rv = (ci_rq->r_count != `0`) ? true : false;
806	kpreempt_enable();
807
808	return rv;
809	}
810
811	/*
812	* Sysctl nodes and initialization.
813	*/
814
815	SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
816	{
817	const struct sysctlnode *node = NULL;
818
819	sysctl_createv(clog, `0`, NULL, &node,
820	CTLFLAG_PERMANENT,
821	CTLTYPE_NODE, "sched",
822	SYSCTL_DESCR("Scheduler options"),
823	NULL, `0`, NULL, `0`,
824	CTL_KERN, CTL_CREATE, CTL_EOL);
825
826	if (node == NULL)
827	return;
828
829	sysctl_createv(clog, `0`, &node, NULL,
830	CTLFLAG_PERMANENT \| CTLFLAG_READWRITE,
831	CTLTYPE_INT, "cacheht_time",
832	SYSCTL_DESCR("Cache hotness time (in ticks)"),
833	NULL, `0`, &cacheht_time, `0`,
834	CTL_CREATE, CTL_EOL);
835	sysctl_createv(clog, `0`, &node, NULL,
836	CTLFLAG_PERMANENT \| CTLFLAG_READWRITE,
837	CTLTYPE_INT, "balance_period",
838	SYSCTL_DESCR("Balance period (in ticks)"),
839	NULL, `0`, &balance_period, `0`,
840	CTL_CREATE, CTL_EOL);
841	sysctl_createv(clog, `0`, &node, NULL,
842	CTLFLAG_PERMANENT \| CTLFLAG_READWRITE,
843	CTLTYPE_INT, "min_catch",
844	SYSCTL_DESCR("Minimal count of threads for catching"),
845	NULL, `0`, &min_catch, `0`,
846	CTL_CREATE, CTL_EOL);
847	sysctl_createv(clog, `0`, &node, NULL,
848	CTLFLAG_PERMANENT \| CTLFLAG_READWRITE,
849	CTLTYPE_INT, "timesoftints",
850	SYSCTL_DESCR("Track CPU time for soft interrupts"),
851	NULL, `0`, &softint_timing, `0`,
852	CTL_CREATE, CTL_EOL);
853	sysctl_createv(clog, `0`, &node, NULL,
854	CTLFLAG_PERMANENT \| CTLFLAG_READWRITE,
855	CTLTYPE_INT, "kpreempt_pri",
856	SYSCTL_DESCR("Minimum priority to trigger kernel preemption"),
857	NULL, `0`, &sched_kpreempt_pri, `0`,
858	CTL_CREATE, CTL_EOL);
859	sysctl_createv(clog, `0`, &node, NULL,
860	CTLFLAG_PERMANENT \| CTLFLAG_READWRITE,
861	CTLTYPE_INT, "upreempt_pri",
862	SYSCTL_DESCR("Minimum priority to trigger user preemption"),
863	NULL, `0`, &sched_upreempt_pri, `0`,
864	CTL_CREATE, CTL_EOL);
865	}
866
867	/*
868	* Debugging.
869	*/
870
871	#ifdef DDB
872
873	void
874	sched_print_runqueue(void (pr)(const* char *, ...))
875	{
876	runqueue_t *ci_rq;
877	struct cpu_info ci, tci;
878	struct schedstate_percpu *spc;
879	struct lwp *l;
880	struct proc *p;
881	CPU_INFO_ITERATOR cii;
882
883	for (CPU_INFO_FOREACH(cii, ci)) {
884	int i;
885
886	spc = &ci->ci_schedstate;
887	ci_rq = spc->spc_sched_info;
888
889	(*pr)("Run-queue (CPU = %u):\n", ci->ci_index);
890	(*pr)(" pid.lid = %d.%d, r_count = %u, r_avgcount = %u, "
891	"maxpri = %d, mlwp = %p\n",
892	#ifdef MULTIPROCESSOR
893	ci->ci_curlwp->l_proc->p_pid, ci->ci_curlwp->l_lid,
894	#else
895	curlwp->l_proc->p_pid, curlwp->l_lid,
896	#endif
897	ci_rq->r_count, ci_rq->r_avgcount, spc->spc_maxpriority,
898	spc->spc_migrating);
899	i = (PRI_COUNT >> BITMAP_SHIFT) - `1`;
900	do {
901	uint32_t q;
902	q = ci_rq->r_bitmap[i];
903	(*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q);
904	} while (i--);
905	}
906
907	(*pr)(" %5s %4s %4s %10s %3s %18s %4s %4s %s\n",
908	"LID", "PRI", "EPRI", "FL", "ST", "LWP", "CPU", "TCI", "LRTICKS");
909
910	PROCLIST_FOREACH(p, &allproc) {
911	(pr)(" /- %d (%s)\n", (int*)p->p_pid, p->p_comm);
912	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
913	ci = l->l_cpu;
914	tci = l->l_target_cpu;
915	(*pr)(" \| %5d %4u %4u 0x%8.8x %3s %18p %4u %4d %u\n",
916	(int)l->l_lid, l->l_priority, lwp_eprio(l),
917	l->l_flag, l->l_stat == LSRUN ? "RQ" :
918	(l->l_stat == LSSLEEP ? "SQ" : "-"),
919	l, ci->ci_index, (tci ? tci->ci_index : -`1`),
920	(u_int)(hardclock_ticks - l->l_rticks));
921	}
922	}
923	}
924
925	#endif
926

Browse the source code of src/src/sys/kern/kern_runq.c