1 | /* $NetBSD: kern_mutex.c,v 1.63 2016/07/07 06:55:43 msaitoh Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jason R. Thorpe and Andrew Doran. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | /* |
33 | * Kernel mutex implementation, modeled after those found in Solaris, |
34 | * a description of which can be found in: |
35 | * |
36 | * Solaris Internals: Core Kernel Architecture, Jim Mauro and |
37 | * Richard McDougall. |
38 | */ |
39 | |
40 | #define __MUTEX_PRIVATE |
41 | |
42 | #include <sys/cdefs.h> |
43 | __KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.63 2016/07/07 06:55:43 msaitoh Exp $" ); |
44 | |
45 | #include <sys/param.h> |
46 | #include <sys/atomic.h> |
47 | #include <sys/proc.h> |
48 | #include <sys/mutex.h> |
49 | #include <sys/sched.h> |
50 | #include <sys/sleepq.h> |
51 | #include <sys/systm.h> |
52 | #include <sys/lockdebug.h> |
53 | #include <sys/kernel.h> |
54 | #include <sys/intr.h> |
55 | #include <sys/lock.h> |
56 | #include <sys/types.h> |
57 | |
58 | #include <dev/lockstat.h> |
59 | |
60 | #include <machine/lock.h> |
61 | |
62 | /* |
63 | * When not running a debug kernel, spin mutexes are not much |
64 | * more than an splraiseipl() and splx() pair. |
65 | */ |
66 | |
67 | #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG) |
68 | #define FULL |
69 | #endif |
70 | |
71 | /* |
72 | * Debugging support. |
73 | */ |
74 | |
75 | #define MUTEX_WANTLOCK(mtx) \ |
76 | LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \ |
77 | (uintptr_t)__builtin_return_address(0), 0) |
78 | #define MUTEX_LOCKED(mtx) \ |
79 | LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), NULL, \ |
80 | (uintptr_t)__builtin_return_address(0), 0) |
81 | #define MUTEX_UNLOCKED(mtx) \ |
82 | LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx), \ |
83 | (uintptr_t)__builtin_return_address(0), 0) |
84 | #define MUTEX_ABORT(mtx, msg) \ |
85 | mutex_abort(mtx, __func__, msg) |
86 | |
87 | #if defined(LOCKDEBUG) |
88 | |
89 | #define MUTEX_DASSERT(mtx, cond) \ |
90 | do { \ |
91 | if (!(cond)) \ |
92 | MUTEX_ABORT(mtx, "assertion failed: " #cond); \ |
93 | } while (/* CONSTCOND */ 0); |
94 | |
95 | #else /* LOCKDEBUG */ |
96 | |
97 | #define MUTEX_DASSERT(mtx, cond) /* nothing */ |
98 | |
99 | #endif /* LOCKDEBUG */ |
100 | |
101 | #if defined(DIAGNOSTIC) |
102 | |
103 | #define MUTEX_ASSERT(mtx, cond) \ |
104 | do { \ |
105 | if (!(cond)) \ |
106 | MUTEX_ABORT(mtx, "assertion failed: " #cond); \ |
107 | } while (/* CONSTCOND */ 0) |
108 | |
109 | #else /* DIAGNOSTIC */ |
110 | |
111 | #define MUTEX_ASSERT(mtx, cond) /* nothing */ |
112 | |
113 | #endif /* DIAGNOSTIC */ |
114 | |
115 | /* |
116 | * Some architectures can't use __cpu_simple_lock as is so allow a way |
117 | * for them to use an alternate definition. |
118 | */ |
119 | #ifndef MUTEX_SPINBIT_LOCK_INIT |
120 | #define MUTEX_SPINBIT_LOCK_INIT(mtx) __cpu_simple_lock_init(&(mtx)->mtx_lock) |
121 | #endif |
122 | #ifndef MUTEX_SPINBIT_LOCKED_P |
123 | #define MUTEX_SPINBIT_LOCKED_P(mtx) __SIMPLELOCK_LOCKED_P(&(mtx)->mtx_lock) |
124 | #endif |
125 | #ifndef MUTEX_SPINBIT_LOCK_TRY |
126 | #define MUTEX_SPINBIT_LOCK_TRY(mtx) __cpu_simple_lock_try(&(mtx)->mtx_lock) |
127 | #endif |
128 | #ifndef MUTEX_SPINBIT_LOCK_UNLOCK |
129 | #define MUTEX_SPINBIT_LOCK_UNLOCK(mtx) __cpu_simple_unlock(&(mtx)->mtx_lock) |
130 | #endif |
131 | |
132 | #ifndef MUTEX_INITIALIZE_SPIN_IPL |
133 | #define MUTEX_INITIALIZE_SPIN_IPL(mtx, ipl) \ |
134 | ((mtx)->mtx_ipl = makeiplcookie((ipl))) |
135 | #endif |
136 | |
137 | /* |
138 | * Spin mutex SPL save / restore. |
139 | */ |
140 | |
141 | #define MUTEX_SPIN_SPLRAISE(mtx) \ |
142 | do { \ |
143 | struct cpu_info *x__ci; \ |
144 | int x__cnt, s; \ |
145 | s = splraiseipl(MUTEX_SPIN_IPL(mtx)); \ |
146 | x__ci = curcpu(); \ |
147 | x__cnt = x__ci->ci_mtx_count--; \ |
148 | __insn_barrier(); \ |
149 | if (x__cnt == 0) \ |
150 | x__ci->ci_mtx_oldspl = (s); \ |
151 | } while (/* CONSTCOND */ 0) |
152 | |
153 | #define MUTEX_SPIN_SPLRESTORE(mtx) \ |
154 | do { \ |
155 | struct cpu_info *x__ci = curcpu(); \ |
156 | int s = x__ci->ci_mtx_oldspl; \ |
157 | __insn_barrier(); \ |
158 | if (++(x__ci->ci_mtx_count) == 0) \ |
159 | splx(s); \ |
160 | } while (/* CONSTCOND */ 0) |
161 | |
162 | /* |
163 | * For architectures that provide 'simple' mutexes: they provide a |
164 | * CAS function that is either MP-safe, or does not need to be MP |
165 | * safe. Adaptive mutexes on these architectures do not require an |
166 | * additional interlock. |
167 | */ |
168 | |
169 | #ifdef __HAVE_SIMPLE_MUTEXES |
170 | |
171 | #define MUTEX_OWNER(owner) \ |
172 | (owner & MUTEX_THREAD) |
173 | #define MUTEX_HAS_WAITERS(mtx) \ |
174 | (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0) |
175 | |
176 | #define MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug) \ |
177 | if (!dodebug) \ |
178 | (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \ |
179 | do { \ |
180 | } while (/* CONSTCOND */ 0); |
181 | |
182 | #define MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl) \ |
183 | do { \ |
184 | (mtx)->mtx_owner = MUTEX_BIT_SPIN; \ |
185 | if (!dodebug) \ |
186 | (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \ |
187 | MUTEX_INITIALIZE_SPIN_IPL((mtx), (ipl)); \ |
188 | MUTEX_SPINBIT_LOCK_INIT((mtx)); \ |
189 | } while (/* CONSTCOND */ 0) |
190 | |
191 | #define MUTEX_DESTROY(mtx) \ |
192 | do { \ |
193 | (mtx)->mtx_owner = MUTEX_THREAD; \ |
194 | } while (/* CONSTCOND */ 0); |
195 | |
196 | #define MUTEX_SPIN_P(mtx) \ |
197 | (((mtx)->mtx_owner & MUTEX_BIT_SPIN) != 0) |
198 | #define MUTEX_ADAPTIVE_P(mtx) \ |
199 | (((mtx)->mtx_owner & MUTEX_BIT_SPIN) == 0) |
200 | |
201 | #define MUTEX_DEBUG_P(mtx) (((mtx)->mtx_owner & MUTEX_BIT_NODEBUG) == 0) |
202 | #if defined(LOCKDEBUG) |
203 | #define MUTEX_OWNED(owner) (((owner) & ~MUTEX_BIT_NODEBUG) != 0) |
204 | #define MUTEX_INHERITDEBUG(n, o) (n) |= (o) & MUTEX_BIT_NODEBUG |
205 | #else /* defined(LOCKDEBUG) */ |
206 | #define MUTEX_OWNED(owner) ((owner) != 0) |
207 | #define MUTEX_INHERITDEBUG(n, o) /* nothing */ |
208 | #endif /* defined(LOCKDEBUG) */ |
209 | |
210 | static inline int |
211 | MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread) |
212 | { |
213 | int rv; |
214 | uintptr_t oldown = 0; |
215 | uintptr_t newown = curthread; |
216 | |
217 | MUTEX_INHERITDEBUG(oldown, mtx->mtx_owner); |
218 | MUTEX_INHERITDEBUG(newown, oldown); |
219 | rv = MUTEX_CAS(&mtx->mtx_owner, oldown, newown); |
220 | MUTEX_RECEIVE(mtx); |
221 | return rv; |
222 | } |
223 | |
224 | static inline int |
225 | MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner) |
226 | { |
227 | int rv; |
228 | rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS); |
229 | MUTEX_RECEIVE(mtx); |
230 | return rv; |
231 | } |
232 | |
233 | static inline void |
234 | MUTEX_RELEASE(kmutex_t *mtx) |
235 | { |
236 | uintptr_t newown; |
237 | |
238 | MUTEX_GIVE(mtx); |
239 | newown = 0; |
240 | MUTEX_INHERITDEBUG(newown, mtx->mtx_owner); |
241 | mtx->mtx_owner = newown; |
242 | } |
243 | #endif /* __HAVE_SIMPLE_MUTEXES */ |
244 | |
245 | /* |
246 | * Patch in stubs via strong alias where they are not available. |
247 | */ |
248 | |
249 | #if defined(LOCKDEBUG) |
250 | #undef __HAVE_MUTEX_STUBS |
251 | #undef __HAVE_SPIN_MUTEX_STUBS |
252 | #endif |
253 | |
254 | #ifndef __HAVE_MUTEX_STUBS |
255 | __strong_alias(mutex_enter,mutex_vector_enter); |
256 | __strong_alias(mutex_exit,mutex_vector_exit); |
257 | #endif |
258 | |
259 | #ifndef __HAVE_SPIN_MUTEX_STUBS |
260 | __strong_alias(mutex_spin_enter,mutex_vector_enter); |
261 | __strong_alias(mutex_spin_exit,mutex_vector_exit); |
262 | #endif |
263 | |
264 | static void mutex_abort(kmutex_t *, const char *, const char *); |
265 | static void mutex_dump(volatile void *); |
266 | |
267 | lockops_t mutex_spin_lockops = { |
268 | "Mutex" , |
269 | LOCKOPS_SPIN, |
270 | mutex_dump |
271 | }; |
272 | |
273 | lockops_t mutex_adaptive_lockops = { |
274 | "Mutex" , |
275 | LOCKOPS_SLEEP, |
276 | mutex_dump |
277 | }; |
278 | |
279 | syncobj_t mutex_syncobj = { |
280 | SOBJ_SLEEPQ_SORTED, |
281 | turnstile_unsleep, |
282 | turnstile_changepri, |
283 | sleepq_lendpri, |
284 | (void *)mutex_owner, |
285 | }; |
286 | |
287 | /* |
288 | * mutex_dump: |
289 | * |
290 | * Dump the contents of a mutex structure. |
291 | */ |
292 | void |
293 | mutex_dump(volatile void *cookie) |
294 | { |
295 | volatile kmutex_t *mtx = cookie; |
296 | |
297 | printf_nolog("owner field : %#018lx wait/spin: %16d/%d\n" , |
298 | (long)MUTEX_OWNER(mtx->mtx_owner), MUTEX_HAS_WAITERS(mtx), |
299 | MUTEX_SPIN_P(mtx)); |
300 | } |
301 | |
302 | /* |
303 | * mutex_abort: |
304 | * |
305 | * Dump information about an error and panic the system. This |
306 | * generates a lot of machine code in the DIAGNOSTIC case, so |
307 | * we ask the compiler to not inline it. |
308 | */ |
309 | void __noinline |
310 | mutex_abort(kmutex_t *mtx, const char *func, const char *msg) |
311 | { |
312 | |
313 | LOCKDEBUG_ABORT(mtx, (MUTEX_SPIN_P(mtx) ? |
314 | &mutex_spin_lockops : &mutex_adaptive_lockops), func, msg); |
315 | } |
316 | |
317 | /* |
318 | * mutex_init: |
319 | * |
320 | * Initialize a mutex for use. Note that adaptive mutexes are in |
321 | * essence spin mutexes that can sleep to avoid deadlock and wasting |
322 | * CPU time. We can't easily provide a type of mutex that always |
323 | * sleeps - see comments in mutex_vector_enter() about releasing |
324 | * mutexes unlocked. |
325 | */ |
326 | void |
327 | mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl) |
328 | { |
329 | bool dodebug; |
330 | |
331 | memset(mtx, 0, sizeof(*mtx)); |
332 | |
333 | switch (type) { |
334 | case MUTEX_ADAPTIVE: |
335 | KASSERT(ipl == IPL_NONE); |
336 | break; |
337 | case MUTEX_DEFAULT: |
338 | case MUTEX_DRIVER: |
339 | if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK || |
340 | ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET || |
341 | ipl == IPL_SOFTSERIAL) { |
342 | type = MUTEX_ADAPTIVE; |
343 | } else { |
344 | type = MUTEX_SPIN; |
345 | } |
346 | break; |
347 | default: |
348 | break; |
349 | } |
350 | |
351 | switch (type) { |
352 | case MUTEX_NODEBUG: |
353 | dodebug = LOCKDEBUG_ALLOC(mtx, NULL, |
354 | (uintptr_t)__builtin_return_address(0)); |
355 | MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl); |
356 | break; |
357 | case MUTEX_ADAPTIVE: |
358 | dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_adaptive_lockops, |
359 | (uintptr_t)__builtin_return_address(0)); |
360 | MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug); |
361 | break; |
362 | case MUTEX_SPIN: |
363 | dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_spin_lockops, |
364 | (uintptr_t)__builtin_return_address(0)); |
365 | MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl); |
366 | break; |
367 | default: |
368 | panic("mutex_init: impossible type" ); |
369 | break; |
370 | } |
371 | } |
372 | |
373 | /* |
374 | * mutex_destroy: |
375 | * |
376 | * Tear down a mutex. |
377 | */ |
378 | void |
379 | mutex_destroy(kmutex_t *mtx) |
380 | { |
381 | |
382 | if (MUTEX_ADAPTIVE_P(mtx)) { |
383 | MUTEX_ASSERT(mtx, !MUTEX_OWNED(mtx->mtx_owner) && |
384 | !MUTEX_HAS_WAITERS(mtx)); |
385 | } else { |
386 | MUTEX_ASSERT(mtx, !MUTEX_SPINBIT_LOCKED_P(mtx)); |
387 | } |
388 | |
389 | LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx); |
390 | MUTEX_DESTROY(mtx); |
391 | } |
392 | |
393 | #ifdef MULTIPROCESSOR |
394 | /* |
395 | * mutex_oncpu: |
396 | * |
397 | * Return true if an adaptive mutex owner is running on a CPU in the |
398 | * system. If the target is waiting on the kernel big lock, then we |
399 | * must release it. This is necessary to avoid deadlock. |
400 | */ |
401 | static bool |
402 | mutex_oncpu(uintptr_t owner) |
403 | { |
404 | struct cpu_info *ci; |
405 | lwp_t *l; |
406 | |
407 | KASSERT(kpreempt_disabled()); |
408 | |
409 | if (!MUTEX_OWNED(owner)) { |
410 | return false; |
411 | } |
412 | |
413 | /* |
414 | * See lwp_dtor() why dereference of the LWP pointer is safe. |
415 | * We must have kernel preemption disabled for that. |
416 | */ |
417 | l = (lwp_t *)MUTEX_OWNER(owner); |
418 | ci = l->l_cpu; |
419 | |
420 | if (ci && ci->ci_curlwp == l) { |
421 | /* Target is running; do we need to block? */ |
422 | return (ci->ci_biglock_wanted != l); |
423 | } |
424 | |
425 | /* Not running. It may be safe to block now. */ |
426 | return false; |
427 | } |
428 | #endif /* MULTIPROCESSOR */ |
429 | |
430 | /* |
431 | * mutex_vector_enter: |
432 | * |
433 | * Support routine for mutex_enter() that must handle all cases. In |
434 | * the LOCKDEBUG case, mutex_enter() is always aliased here, even if |
435 | * fast-path stubs are available. If a mutex_spin_enter() stub is |
436 | * not available, then it is also aliased directly here. |
437 | */ |
438 | void |
439 | mutex_vector_enter(kmutex_t *mtx) |
440 | { |
441 | uintptr_t owner, curthread; |
442 | turnstile_t *ts; |
443 | #ifdef MULTIPROCESSOR |
444 | u_int count; |
445 | #endif |
446 | LOCKSTAT_COUNTER(spincnt); |
447 | LOCKSTAT_COUNTER(slpcnt); |
448 | LOCKSTAT_TIMER(spintime); |
449 | LOCKSTAT_TIMER(slptime); |
450 | LOCKSTAT_FLAG(lsflag); |
451 | |
452 | /* |
453 | * Handle spin mutexes. |
454 | */ |
455 | if (MUTEX_SPIN_P(mtx)) { |
456 | #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR) |
457 | u_int spins = 0; |
458 | #endif |
459 | MUTEX_SPIN_SPLRAISE(mtx); |
460 | MUTEX_WANTLOCK(mtx); |
461 | #ifdef FULL |
462 | if (MUTEX_SPINBIT_LOCK_TRY(mtx)) { |
463 | MUTEX_LOCKED(mtx); |
464 | return; |
465 | } |
466 | #if !defined(MULTIPROCESSOR) |
467 | MUTEX_ABORT(mtx, "locking against myself" ); |
468 | #else /* !MULTIPROCESSOR */ |
469 | |
470 | LOCKSTAT_ENTER(lsflag); |
471 | LOCKSTAT_START_TIMER(lsflag, spintime); |
472 | count = SPINLOCK_BACKOFF_MIN; |
473 | |
474 | /* |
475 | * Spin testing the lock word and do exponential backoff |
476 | * to reduce cache line ping-ponging between CPUs. |
477 | */ |
478 | do { |
479 | if (panicstr != NULL) |
480 | break; |
481 | while (MUTEX_SPINBIT_LOCKED_P(mtx)) { |
482 | SPINLOCK_BACKOFF(count); |
483 | #ifdef LOCKDEBUG |
484 | if (SPINLOCK_SPINOUT(spins)) |
485 | MUTEX_ABORT(mtx, "spinout" ); |
486 | #endif /* LOCKDEBUG */ |
487 | } |
488 | } while (!MUTEX_SPINBIT_LOCK_TRY(mtx)); |
489 | |
490 | if (count != SPINLOCK_BACKOFF_MIN) { |
491 | LOCKSTAT_STOP_TIMER(lsflag, spintime); |
492 | LOCKSTAT_EVENT(lsflag, mtx, |
493 | LB_SPIN_MUTEX | LB_SPIN, 1, spintime); |
494 | } |
495 | LOCKSTAT_EXIT(lsflag); |
496 | #endif /* !MULTIPROCESSOR */ |
497 | #endif /* FULL */ |
498 | MUTEX_LOCKED(mtx); |
499 | return; |
500 | } |
501 | |
502 | curthread = (uintptr_t)curlwp; |
503 | |
504 | MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(mtx)); |
505 | MUTEX_ASSERT(mtx, curthread != 0); |
506 | MUTEX_WANTLOCK(mtx); |
507 | |
508 | if (panicstr == NULL) { |
509 | LOCKDEBUG_BARRIER(&kernel_lock, 1); |
510 | } |
511 | |
512 | LOCKSTAT_ENTER(lsflag); |
513 | |
514 | /* |
515 | * Adaptive mutex; spin trying to acquire the mutex. If we |
516 | * determine that the owner is not running on a processor, |
517 | * then we stop spinning, and sleep instead. |
518 | */ |
519 | KPREEMPT_DISABLE(curlwp); |
520 | for (owner = mtx->mtx_owner;;) { |
521 | if (!MUTEX_OWNED(owner)) { |
522 | /* |
523 | * Mutex owner clear could mean two things: |
524 | * |
525 | * * The mutex has been released. |
526 | * * The owner field hasn't been set yet. |
527 | * |
528 | * Try to acquire it again. If that fails, |
529 | * we'll just loop again. |
530 | */ |
531 | if (MUTEX_ACQUIRE(mtx, curthread)) |
532 | break; |
533 | owner = mtx->mtx_owner; |
534 | continue; |
535 | } |
536 | if (__predict_false(panicstr != NULL)) { |
537 | KPREEMPT_ENABLE(curlwp); |
538 | return; |
539 | } |
540 | if (__predict_false(MUTEX_OWNER(owner) == curthread)) { |
541 | MUTEX_ABORT(mtx, "locking against myself" ); |
542 | } |
543 | #ifdef MULTIPROCESSOR |
544 | /* |
545 | * Check to see if the owner is running on a processor. |
546 | * If so, then we should just spin, as the owner will |
547 | * likely release the lock very soon. |
548 | */ |
549 | if (mutex_oncpu(owner)) { |
550 | LOCKSTAT_START_TIMER(lsflag, spintime); |
551 | count = SPINLOCK_BACKOFF_MIN; |
552 | do { |
553 | KPREEMPT_ENABLE(curlwp); |
554 | SPINLOCK_BACKOFF(count); |
555 | KPREEMPT_DISABLE(curlwp); |
556 | owner = mtx->mtx_owner; |
557 | } while (mutex_oncpu(owner)); |
558 | LOCKSTAT_STOP_TIMER(lsflag, spintime); |
559 | LOCKSTAT_COUNT(spincnt, 1); |
560 | if (!MUTEX_OWNED(owner)) |
561 | continue; |
562 | } |
563 | #endif |
564 | |
565 | ts = turnstile_lookup(mtx); |
566 | |
567 | /* |
568 | * Once we have the turnstile chain interlock, mark the |
569 | * mutex has having waiters. If that fails, spin again: |
570 | * chances are that the mutex has been released. |
571 | */ |
572 | if (!MUTEX_SET_WAITERS(mtx, owner)) { |
573 | turnstile_exit(mtx); |
574 | owner = mtx->mtx_owner; |
575 | continue; |
576 | } |
577 | |
578 | #ifdef MULTIPROCESSOR |
579 | /* |
580 | * mutex_exit() is permitted to release the mutex without |
581 | * any interlocking instructions, and the following can |
582 | * occur as a result: |
583 | * |
584 | * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit() |
585 | * ---------------------------- ---------------------------- |
586 | * .. acquire cache line |
587 | * .. test for waiters |
588 | * acquire cache line <- lose cache line |
589 | * lock cache line .. |
590 | * verify mutex is held .. |
591 | * set waiters .. |
592 | * unlock cache line .. |
593 | * lose cache line -> acquire cache line |
594 | * .. clear lock word, waiters |
595 | * return success |
596 | * |
597 | * There is another race that can occur: a third CPU could |
598 | * acquire the mutex as soon as it is released. Since |
599 | * adaptive mutexes are primarily spin mutexes, this is not |
600 | * something that we need to worry about too much. What we |
601 | * do need to ensure is that the waiters bit gets set. |
602 | * |
603 | * To allow the unlocked release, we need to make some |
604 | * assumptions here: |
605 | * |
606 | * o Release is the only non-atomic/unlocked operation |
607 | * that can be performed on the mutex. (It must still |
608 | * be atomic on the local CPU, e.g. in case interrupted |
609 | * or preempted). |
610 | * |
611 | * o At any given time, MUTEX_SET_WAITERS() can only ever |
612 | * be in progress on one CPU in the system - guaranteed |
613 | * by the turnstile chain lock. |
614 | * |
615 | * o No other operations other than MUTEX_SET_WAITERS() |
616 | * and release can modify a mutex with a non-zero |
617 | * owner field. |
618 | * |
619 | * o The result of a successful MUTEX_SET_WAITERS() call |
620 | * is an unbuffered write that is immediately visible |
621 | * to all other processors in the system. |
622 | * |
623 | * o If the holding LWP switches away, it posts a store |
624 | * fence before changing curlwp, ensuring that any |
625 | * overwrite of the mutex waiters flag by mutex_exit() |
626 | * completes before the modification of curlwp becomes |
627 | * visible to this CPU. |
628 | * |
629 | * o mi_switch() posts a store fence before setting curlwp |
630 | * and before resuming execution of an LWP. |
631 | * |
632 | * o _kernel_lock() posts a store fence before setting |
633 | * curcpu()->ci_biglock_wanted, and after clearing it. |
634 | * This ensures that any overwrite of the mutex waiters |
635 | * flag by mutex_exit() completes before the modification |
636 | * of ci_biglock_wanted becomes visible. |
637 | * |
638 | * We now post a read memory barrier (after setting the |
639 | * waiters field) and check the lock holder's status again. |
640 | * Some of the possible outcomes (not an exhaustive list): |
641 | * |
642 | * 1. The on-CPU check returns true: the holding LWP is |
643 | * running again. The lock may be released soon and |
644 | * we should spin. Importantly, we can't trust the |
645 | * value of the waiters flag. |
646 | * |
647 | * 2. The on-CPU check returns false: the holding LWP is |
648 | * not running. We now have the opportunity to check |
649 | * if mutex_exit() has blatted the modifications made |
650 | * by MUTEX_SET_WAITERS(). |
651 | * |
652 | * 3. The on-CPU check returns false: the holding LWP may |
653 | * or may not be running. It has context switched at |
654 | * some point during our check. Again, we have the |
655 | * chance to see if the waiters bit is still set or |
656 | * has been overwritten. |
657 | * |
658 | * 4. The on-CPU check returns false: the holding LWP is |
659 | * running on a CPU, but wants the big lock. It's OK |
660 | * to check the waiters field in this case. |
661 | * |
662 | * 5. The has-waiters check fails: the mutex has been |
663 | * released, the waiters flag cleared and another LWP |
664 | * now owns the mutex. |
665 | * |
666 | * 6. The has-waiters check fails: the mutex has been |
667 | * released. |
668 | * |
669 | * If the waiters bit is not set it's unsafe to go asleep, |
670 | * as we might never be awoken. |
671 | */ |
672 | if ((membar_consumer(), mutex_oncpu(owner)) || |
673 | (membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) { |
674 | turnstile_exit(mtx); |
675 | owner = mtx->mtx_owner; |
676 | continue; |
677 | } |
678 | #endif /* MULTIPROCESSOR */ |
679 | |
680 | LOCKSTAT_START_TIMER(lsflag, slptime); |
681 | |
682 | turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj); |
683 | |
684 | LOCKSTAT_STOP_TIMER(lsflag, slptime); |
685 | LOCKSTAT_COUNT(slpcnt, 1); |
686 | |
687 | owner = mtx->mtx_owner; |
688 | } |
689 | KPREEMPT_ENABLE(curlwp); |
690 | |
691 | LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1, |
692 | slpcnt, slptime); |
693 | LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN, |
694 | spincnt, spintime); |
695 | LOCKSTAT_EXIT(lsflag); |
696 | |
697 | MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread); |
698 | MUTEX_LOCKED(mtx); |
699 | } |
700 | |
701 | /* |
702 | * mutex_vector_exit: |
703 | * |
704 | * Support routine for mutex_exit() that handles all cases. |
705 | */ |
706 | void |
707 | mutex_vector_exit(kmutex_t *mtx) |
708 | { |
709 | turnstile_t *ts; |
710 | uintptr_t curthread; |
711 | |
712 | if (MUTEX_SPIN_P(mtx)) { |
713 | #ifdef FULL |
714 | if (__predict_false(!MUTEX_SPINBIT_LOCKED_P(mtx))) { |
715 | if (panicstr != NULL) |
716 | return; |
717 | MUTEX_ABORT(mtx, "exiting unheld spin mutex" ); |
718 | } |
719 | MUTEX_UNLOCKED(mtx); |
720 | MUTEX_SPINBIT_LOCK_UNLOCK(mtx); |
721 | #endif |
722 | MUTEX_SPIN_SPLRESTORE(mtx); |
723 | return; |
724 | } |
725 | |
726 | if (__predict_false((uintptr_t)panicstr | cold)) { |
727 | MUTEX_UNLOCKED(mtx); |
728 | MUTEX_RELEASE(mtx); |
729 | return; |
730 | } |
731 | |
732 | curthread = (uintptr_t)curlwp; |
733 | MUTEX_DASSERT(mtx, curthread != 0); |
734 | MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread); |
735 | MUTEX_UNLOCKED(mtx); |
736 | #if !defined(LOCKDEBUG) |
737 | __USE(curthread); |
738 | #endif |
739 | |
740 | #ifdef LOCKDEBUG |
741 | /* |
742 | * Avoid having to take the turnstile chain lock every time |
743 | * around. Raise the priority level to splhigh() in order |
744 | * to disable preemption and so make the following atomic. |
745 | */ |
746 | { |
747 | int s = splhigh(); |
748 | if (!MUTEX_HAS_WAITERS(mtx)) { |
749 | MUTEX_RELEASE(mtx); |
750 | splx(s); |
751 | return; |
752 | } |
753 | splx(s); |
754 | } |
755 | #endif |
756 | |
757 | /* |
758 | * Get this lock's turnstile. This gets the interlock on |
759 | * the sleep queue. Once we have that, we can clear the |
760 | * lock. If there was no turnstile for the lock, there |
761 | * were no waiters remaining. |
762 | */ |
763 | ts = turnstile_lookup(mtx); |
764 | |
765 | if (ts == NULL) { |
766 | MUTEX_RELEASE(mtx); |
767 | turnstile_exit(mtx); |
768 | } else { |
769 | MUTEX_RELEASE(mtx); |
770 | turnstile_wakeup(ts, TS_WRITER_Q, |
771 | TS_WAITERS(ts, TS_WRITER_Q), NULL); |
772 | } |
773 | } |
774 | |
775 | #ifndef __HAVE_SIMPLE_MUTEXES |
776 | /* |
777 | * mutex_wakeup: |
778 | * |
779 | * Support routine for mutex_exit() that wakes up all waiters. |
780 | * We assume that the mutex has been released, but it need not |
781 | * be. |
782 | */ |
783 | void |
784 | mutex_wakeup(kmutex_t *mtx) |
785 | { |
786 | turnstile_t *ts; |
787 | |
788 | ts = turnstile_lookup(mtx); |
789 | if (ts == NULL) { |
790 | turnstile_exit(mtx); |
791 | return; |
792 | } |
793 | MUTEX_CLEAR_WAITERS(mtx); |
794 | turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL); |
795 | } |
796 | #endif /* !__HAVE_SIMPLE_MUTEXES */ |
797 | |
798 | /* |
799 | * mutex_owned: |
800 | * |
801 | * Return true if the current LWP (adaptive) or CPU (spin) |
802 | * holds the mutex. |
803 | */ |
804 | int |
805 | mutex_owned(kmutex_t *mtx) |
806 | { |
807 | |
808 | if (mtx == NULL) |
809 | return 0; |
810 | if (MUTEX_ADAPTIVE_P(mtx)) |
811 | return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp; |
812 | #ifdef FULL |
813 | return MUTEX_SPINBIT_LOCKED_P(mtx); |
814 | #else |
815 | return 1; |
816 | #endif |
817 | } |
818 | |
819 | /* |
820 | * mutex_owner: |
821 | * |
822 | * Return the current owner of an adaptive mutex. Used for |
823 | * priority inheritance. |
824 | */ |
825 | lwp_t * |
826 | mutex_owner(kmutex_t *mtx) |
827 | { |
828 | |
829 | MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx)); |
830 | return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner); |
831 | } |
832 | |
833 | /* |
834 | * mutex_tryenter: |
835 | * |
836 | * Try to acquire the mutex; return non-zero if we did. |
837 | */ |
838 | int |
839 | mutex_tryenter(kmutex_t *mtx) |
840 | { |
841 | uintptr_t curthread; |
842 | |
843 | /* |
844 | * Handle spin mutexes. |
845 | */ |
846 | if (MUTEX_SPIN_P(mtx)) { |
847 | MUTEX_SPIN_SPLRAISE(mtx); |
848 | #ifdef FULL |
849 | if (MUTEX_SPINBIT_LOCK_TRY(mtx)) { |
850 | MUTEX_WANTLOCK(mtx); |
851 | MUTEX_LOCKED(mtx); |
852 | return 1; |
853 | } |
854 | MUTEX_SPIN_SPLRESTORE(mtx); |
855 | #else |
856 | MUTEX_WANTLOCK(mtx); |
857 | MUTEX_LOCKED(mtx); |
858 | return 1; |
859 | #endif |
860 | } else { |
861 | curthread = (uintptr_t)curlwp; |
862 | MUTEX_ASSERT(mtx, curthread != 0); |
863 | if (MUTEX_ACQUIRE(mtx, curthread)) { |
864 | MUTEX_WANTLOCK(mtx); |
865 | MUTEX_LOCKED(mtx); |
866 | MUTEX_DASSERT(mtx, |
867 | MUTEX_OWNER(mtx->mtx_owner) == curthread); |
868 | return 1; |
869 | } |
870 | } |
871 | |
872 | return 0; |
873 | } |
874 | |
875 | #if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) |
876 | /* |
877 | * mutex_spin_retry: |
878 | * |
879 | * Support routine for mutex_spin_enter(). Assumes that the caller |
880 | * has already raised the SPL, and adjusted counters. |
881 | */ |
882 | void |
883 | mutex_spin_retry(kmutex_t *mtx) |
884 | { |
885 | #ifdef MULTIPROCESSOR |
886 | u_int count; |
887 | LOCKSTAT_TIMER(spintime); |
888 | LOCKSTAT_FLAG(lsflag); |
889 | #ifdef LOCKDEBUG |
890 | u_int spins = 0; |
891 | #endif /* LOCKDEBUG */ |
892 | |
893 | MUTEX_WANTLOCK(mtx); |
894 | |
895 | LOCKSTAT_ENTER(lsflag); |
896 | LOCKSTAT_START_TIMER(lsflag, spintime); |
897 | count = SPINLOCK_BACKOFF_MIN; |
898 | |
899 | /* |
900 | * Spin testing the lock word and do exponential backoff |
901 | * to reduce cache line ping-ponging between CPUs. |
902 | */ |
903 | do { |
904 | if (panicstr != NULL) |
905 | break; |
906 | while (MUTEX_SPINBIT_LOCKED_P(mtx)) { |
907 | SPINLOCK_BACKOFF(count); |
908 | #ifdef LOCKDEBUG |
909 | if (SPINLOCK_SPINOUT(spins)) |
910 | MUTEX_ABORT(mtx, "spinout" ); |
911 | #endif /* LOCKDEBUG */ |
912 | } |
913 | } while (!MUTEX_SPINBIT_LOCK_TRY(mtx)); |
914 | |
915 | LOCKSTAT_STOP_TIMER(lsflag, spintime); |
916 | LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime); |
917 | LOCKSTAT_EXIT(lsflag); |
918 | |
919 | MUTEX_LOCKED(mtx); |
920 | #else /* MULTIPROCESSOR */ |
921 | MUTEX_ABORT(mtx, "locking against myself" ); |
922 | #endif /* MULTIPROCESSOR */ |
923 | } |
924 | #endif /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */ |
925 | |