1 | /* $NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Andrew Doran. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | /* |
33 | * Sleep queue implementation, used by turnstiles and general sleep/wakeup |
34 | * interfaces. |
35 | */ |
36 | |
37 | #include <sys/cdefs.h> |
38 | __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $" ); |
39 | |
40 | #include <sys/param.h> |
41 | #include <sys/kernel.h> |
42 | #include <sys/cpu.h> |
43 | #include <sys/intr.h> |
44 | #include <sys/pool.h> |
45 | #include <sys/proc.h> |
46 | #include <sys/resourcevar.h> |
47 | #include <sys/sched.h> |
48 | #include <sys/systm.h> |
49 | #include <sys/sleepq.h> |
50 | #include <sys/ktrace.h> |
51 | |
52 | /* |
53 | * for sleepq_abort: |
54 | * During autoconfiguration or after a panic, a sleep will simply lower the |
55 | * priority briefly to allow interrupts, then return. The priority to be |
56 | * used (IPL_SAFEPRI) is machine-dependent, thus this value is initialized and |
57 | * maintained in the machine-dependent layers. This priority will typically |
58 | * be 0, or the lowest priority that is safe for use on the interrupt stack; |
59 | * it can be made higher to block network software interrupts after panics. |
60 | */ |
61 | #ifndef IPL_SAFEPRI |
62 | #define IPL_SAFEPRI 0 |
63 | #endif |
64 | |
65 | static int sleepq_sigtoerror(lwp_t *, int); |
66 | |
67 | /* General purpose sleep table, used by mtsleep() and condition variables. */ |
68 | sleeptab_t sleeptab __cacheline_aligned; |
69 | |
70 | /* |
71 | * sleeptab_init: |
72 | * |
73 | * Initialize a sleep table. |
74 | */ |
75 | void |
76 | sleeptab_init(sleeptab_t *st) |
77 | { |
78 | sleepq_t *sq; |
79 | int i; |
80 | |
81 | for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) { |
82 | sq = &st->st_queues[i].st_queue; |
83 | st->st_queues[i].st_mutex = |
84 | mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); |
85 | sleepq_init(sq); |
86 | } |
87 | } |
88 | |
89 | /* |
90 | * sleepq_init: |
91 | * |
92 | * Prepare a sleep queue for use. |
93 | */ |
94 | void |
95 | sleepq_init(sleepq_t *sq) |
96 | { |
97 | |
98 | TAILQ_INIT(sq); |
99 | } |
100 | |
101 | /* |
102 | * sleepq_remove: |
103 | * |
104 | * Remove an LWP from a sleep queue and wake it up. |
105 | */ |
106 | void |
107 | sleepq_remove(sleepq_t *sq, lwp_t *l) |
108 | { |
109 | struct schedstate_percpu *spc; |
110 | struct cpu_info *ci; |
111 | |
112 | KASSERT(lwp_locked(l, NULL)); |
113 | |
114 | TAILQ_REMOVE(sq, l, l_sleepchain); |
115 | l->l_syncobj = &sched_syncobj; |
116 | l->l_wchan = NULL; |
117 | l->l_sleepq = NULL; |
118 | l->l_flag &= ~LW_SINTR; |
119 | |
120 | ci = l->l_cpu; |
121 | spc = &ci->ci_schedstate; |
122 | |
123 | /* |
124 | * If not sleeping, the LWP must have been suspended. Let whoever |
125 | * holds it stopped set it running again. |
126 | */ |
127 | if (l->l_stat != LSSLEEP) { |
128 | KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED); |
129 | lwp_setlock(l, spc->spc_lwplock); |
130 | return; |
131 | } |
132 | |
133 | /* |
134 | * If the LWP is still on the CPU, mark it as LSONPROC. It may be |
135 | * about to call mi_switch(), in which case it will yield. |
136 | */ |
137 | if ((l->l_pflag & LP_RUNNING) != 0) { |
138 | l->l_stat = LSONPROC; |
139 | l->l_slptime = 0; |
140 | lwp_setlock(l, spc->spc_lwplock); |
141 | return; |
142 | } |
143 | |
144 | /* Update sleep time delta, call the wake-up handler of scheduler */ |
145 | l->l_slpticksum += (hardclock_ticks - l->l_slpticks); |
146 | sched_wakeup(l); |
147 | |
148 | /* Look for a CPU to wake up */ |
149 | l->l_cpu = sched_takecpu(l); |
150 | ci = l->l_cpu; |
151 | spc = &ci->ci_schedstate; |
152 | |
153 | /* |
154 | * Set it running. |
155 | */ |
156 | spc_lock(ci); |
157 | lwp_setlock(l, spc->spc_mutex); |
158 | sched_setrunnable(l); |
159 | l->l_stat = LSRUN; |
160 | l->l_slptime = 0; |
161 | sched_enqueue(l, false); |
162 | spc_unlock(ci); |
163 | } |
164 | |
165 | /* |
166 | * sleepq_insert: |
167 | * |
168 | * Insert an LWP into the sleep queue, optionally sorting by priority. |
169 | */ |
170 | static void |
171 | sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj) |
172 | { |
173 | |
174 | if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { |
175 | lwp_t *l2; |
176 | const int pri = lwp_eprio(l); |
177 | |
178 | TAILQ_FOREACH(l2, sq, l_sleepchain) { |
179 | if (lwp_eprio(l2) < pri) { |
180 | TAILQ_INSERT_BEFORE(l2, l, l_sleepchain); |
181 | return; |
182 | } |
183 | } |
184 | } |
185 | |
186 | if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0) |
187 | TAILQ_INSERT_HEAD(sq, l, l_sleepchain); |
188 | else |
189 | TAILQ_INSERT_TAIL(sq, l, l_sleepchain); |
190 | } |
191 | |
192 | /* |
193 | * sleepq_enqueue: |
194 | * |
195 | * Enter an LWP into the sleep queue and prepare for sleep. The sleep |
196 | * queue must already be locked, and any interlock (such as the kernel |
197 | * lock) must have be released (see sleeptab_lookup(), sleepq_enter()). |
198 | */ |
199 | void |
200 | sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj) |
201 | { |
202 | lwp_t *l = curlwp; |
203 | |
204 | KASSERT(lwp_locked(l, NULL)); |
205 | KASSERT(l->l_stat == LSONPROC); |
206 | KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL); |
207 | |
208 | l->l_syncobj = sobj; |
209 | l->l_wchan = wchan; |
210 | l->l_sleepq = sq; |
211 | l->l_wmesg = wmesg; |
212 | l->l_slptime = 0; |
213 | l->l_stat = LSSLEEP; |
214 | l->l_sleeperr = 0; |
215 | |
216 | sleepq_insert(sq, l, sobj); |
217 | |
218 | /* Save the time when thread has slept */ |
219 | l->l_slpticks = hardclock_ticks; |
220 | sched_slept(l); |
221 | } |
222 | |
223 | /* |
224 | * sleepq_block: |
225 | * |
226 | * After any intermediate step such as releasing an interlock, switch. |
227 | * sleepq_block() may return early under exceptional conditions, for |
228 | * example if the LWP's containing process is exiting. |
229 | * |
230 | * timo is a timeout in ticks. timo = 0 specifies an infinite timeout. |
231 | */ |
232 | int |
233 | sleepq_block(int timo, bool catch_p) |
234 | { |
235 | int error = 0, sig; |
236 | struct proc *p; |
237 | lwp_t *l = curlwp; |
238 | bool early = false; |
239 | int biglocks = l->l_biglocks; |
240 | |
241 | ktrcsw(1, 0); |
242 | |
243 | /* |
244 | * If sleeping interruptably, check for pending signals, exits or |
245 | * core dump events. |
246 | */ |
247 | if (catch_p) { |
248 | l->l_flag |= LW_SINTR; |
249 | if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) { |
250 | l->l_flag &= ~LW_CANCELLED; |
251 | error = EINTR; |
252 | early = true; |
253 | } else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) |
254 | early = true; |
255 | } |
256 | |
257 | if (early) { |
258 | /* lwp_unsleep() will release the lock */ |
259 | lwp_unsleep(l, true); |
260 | } else { |
261 | if (timo) { |
262 | callout_schedule(&l->l_timeout_ch, timo); |
263 | } |
264 | mi_switch(l); |
265 | |
266 | /* The LWP and sleep queue are now unlocked. */ |
267 | if (timo) { |
268 | /* |
269 | * Even if the callout appears to have fired, we need to |
270 | * stop it in order to synchronise with other CPUs. |
271 | */ |
272 | if (callout_halt(&l->l_timeout_ch, NULL)) |
273 | error = EWOULDBLOCK; |
274 | } |
275 | } |
276 | |
277 | if (catch_p && error == 0) { |
278 | p = l->l_proc; |
279 | if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0) |
280 | error = EINTR; |
281 | else if ((l->l_flag & LW_PENDSIG) != 0) { |
282 | /* |
283 | * Acquiring p_lock may cause us to recurse |
284 | * through the sleep path and back into this |
285 | * routine, but is safe because LWPs sleeping |
286 | * on locks are non-interruptable. We will |
287 | * not recurse again. |
288 | */ |
289 | mutex_enter(p->p_lock); |
290 | if (((sig = sigispending(l, 0)) != 0 && |
291 | (sigprop[sig] & SA_STOP) == 0) || |
292 | (sig = issignal(l)) != 0) |
293 | error = sleepq_sigtoerror(l, sig); |
294 | mutex_exit(p->p_lock); |
295 | } |
296 | } |
297 | |
298 | ktrcsw(0, 0); |
299 | if (__predict_false(biglocks != 0)) { |
300 | KERNEL_LOCK(biglocks, NULL); |
301 | } |
302 | return error; |
303 | } |
304 | |
305 | /* |
306 | * sleepq_wake: |
307 | * |
308 | * Wake zero or more LWPs blocked on a single wait channel. |
309 | */ |
310 | void |
311 | sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected, kmutex_t *mp) |
312 | { |
313 | lwp_t *l, *next; |
314 | |
315 | KASSERT(mutex_owned(mp)); |
316 | |
317 | for (l = TAILQ_FIRST(sq); l != NULL; l = next) { |
318 | KASSERT(l->l_sleepq == sq); |
319 | KASSERT(l->l_mutex == mp); |
320 | next = TAILQ_NEXT(l, l_sleepchain); |
321 | if (l->l_wchan != wchan) |
322 | continue; |
323 | sleepq_remove(sq, l); |
324 | if (--expected == 0) |
325 | break; |
326 | } |
327 | |
328 | mutex_spin_exit(mp); |
329 | } |
330 | |
331 | /* |
332 | * sleepq_unsleep: |
333 | * |
334 | * Remove an LWP from its sleep queue and set it runnable again. |
335 | * sleepq_unsleep() is called with the LWP's mutex held, and will |
336 | * always release it. |
337 | */ |
338 | void |
339 | sleepq_unsleep(lwp_t *l, bool cleanup) |
340 | { |
341 | sleepq_t *sq = l->l_sleepq; |
342 | kmutex_t *mp = l->l_mutex; |
343 | |
344 | KASSERT(lwp_locked(l, mp)); |
345 | KASSERT(l->l_wchan != NULL); |
346 | |
347 | sleepq_remove(sq, l); |
348 | if (cleanup) { |
349 | mutex_spin_exit(mp); |
350 | } |
351 | } |
352 | |
353 | /* |
354 | * sleepq_timeout: |
355 | * |
356 | * Entered via the callout(9) subsystem to time out an LWP that is on a |
357 | * sleep queue. |
358 | */ |
359 | void |
360 | sleepq_timeout(void *arg) |
361 | { |
362 | lwp_t *l = arg; |
363 | |
364 | /* |
365 | * Lock the LWP. Assuming it's still on the sleep queue, its |
366 | * current mutex will also be the sleep queue mutex. |
367 | */ |
368 | lwp_lock(l); |
369 | |
370 | if (l->l_wchan == NULL) { |
371 | /* Somebody beat us to it. */ |
372 | lwp_unlock(l); |
373 | return; |
374 | } |
375 | |
376 | lwp_unsleep(l, true); |
377 | } |
378 | |
379 | /* |
380 | * sleepq_sigtoerror: |
381 | * |
382 | * Given a signal number, interpret and return an error code. |
383 | */ |
384 | static int |
385 | sleepq_sigtoerror(lwp_t *l, int sig) |
386 | { |
387 | struct proc *p = l->l_proc; |
388 | int error; |
389 | |
390 | KASSERT(mutex_owned(p->p_lock)); |
391 | |
392 | /* |
393 | * If this sleep was canceled, don't let the syscall restart. |
394 | */ |
395 | if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) |
396 | error = EINTR; |
397 | else |
398 | error = ERESTART; |
399 | |
400 | return error; |
401 | } |
402 | |
403 | /* |
404 | * sleepq_abort: |
405 | * |
406 | * After a panic or during autoconfiguration, lower the interrupt |
407 | * priority level to give pending interrupts a chance to run, and |
408 | * then return. Called if sleepq_dontsleep() returns non-zero, and |
409 | * always returns zero. |
410 | */ |
411 | int |
412 | sleepq_abort(kmutex_t *mtx, int unlock) |
413 | { |
414 | int s; |
415 | |
416 | s = splhigh(); |
417 | splx(IPL_SAFEPRI); |
418 | splx(s); |
419 | if (mtx != NULL && unlock != 0) |
420 | mutex_exit(mtx); |
421 | |
422 | return 0; |
423 | } |
424 | |
425 | /* |
426 | * sleepq_reinsert: |
427 | * |
428 | * Move the possition of the lwp in the sleep queue after a possible |
429 | * change of the lwp's effective priority. |
430 | */ |
431 | static void |
432 | sleepq_reinsert(sleepq_t *sq, lwp_t *l) |
433 | { |
434 | |
435 | KASSERT(l->l_sleepq == sq); |
436 | if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { |
437 | return; |
438 | } |
439 | |
440 | /* |
441 | * Don't let the sleep queue become empty, even briefly. |
442 | * cv_signal() and cv_broadcast() inspect it without the |
443 | * sleep queue lock held and need to see a non-empty queue |
444 | * head if there are waiters. |
445 | */ |
446 | if (TAILQ_FIRST(sq) == l && TAILQ_NEXT(l, l_sleepchain) == NULL) { |
447 | return; |
448 | } |
449 | TAILQ_REMOVE(sq, l, l_sleepchain); |
450 | sleepq_insert(sq, l, l->l_syncobj); |
451 | } |
452 | |
453 | /* |
454 | * sleepq_changepri: |
455 | * |
456 | * Adjust the priority of an LWP residing on a sleepq. |
457 | */ |
458 | void |
459 | sleepq_changepri(lwp_t *l, pri_t pri) |
460 | { |
461 | sleepq_t *sq = l->l_sleepq; |
462 | |
463 | KASSERT(lwp_locked(l, NULL)); |
464 | |
465 | l->l_priority = pri; |
466 | sleepq_reinsert(sq, l); |
467 | } |
468 | |
469 | /* |
470 | * sleepq_changepri: |
471 | * |
472 | * Adjust the lended priority of an LWP residing on a sleepq. |
473 | */ |
474 | void |
475 | sleepq_lendpri(lwp_t *l, pri_t pri) |
476 | { |
477 | sleepq_t *sq = l->l_sleepq; |
478 | |
479 | KASSERT(lwp_locked(l, NULL)); |
480 | |
481 | l->l_inheritedprio = pri; |
482 | l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); |
483 | sleepq_reinsert(sq, l); |
484 | } |
485 | |