1/* $NetBSD: kern_condvar.c,v 1.35 2015/08/07 06:22:12 uebayasi Exp $ */
2
3/*-
4 * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Kernel condition variable implementation.
34 */
35
36#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.35 2015/08/07 06:22:12 uebayasi Exp $");
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/lwp.h>
42#include <sys/condvar.h>
43#include <sys/sleepq.h>
44#include <sys/lockdebug.h>
45#include <sys/cpu.h>
46
47/*
48 * Accessors for the private contents of the kcondvar_t data type.
49 *
50 * cv_opaque[0] sleepq...
51 * cv_opaque[1] ...pointers
52 * cv_opaque[2] description for ps(1)
53 *
54 * cv_opaque[0..1] is protected by the interlock passed to cv_wait() (enqueue
55 * only), and the sleep queue lock acquired with sleeptab_lookup() (enqueue
56 * and dequeue).
57 *
58 * cv_opaque[2] (the wmesg) is static and does not change throughout the life
59 * of the CV.
60 */
61#define CV_SLEEPQ(cv) ((sleepq_t *)(cv)->cv_opaque)
62#define CV_WMESG(cv) ((const char *)(cv)->cv_opaque[2])
63#define CV_SET_WMESG(cv, v) (cv)->cv_opaque[2] = __UNCONST(v)
64
65#define CV_DEBUG_P(cv) (CV_WMESG(cv) != nodebug)
66#define CV_RA ((uintptr_t)__builtin_return_address(0))
67
68static void cv_unsleep(lwp_t *, bool);
69static void cv_wakeup_one(kcondvar_t *);
70static void cv_wakeup_all(kcondvar_t *);
71
72static syncobj_t cv_syncobj = {
73 SOBJ_SLEEPQ_SORTED,
74 cv_unsleep,
75 sleepq_changepri,
76 sleepq_lendpri,
77 syncobj_noowner,
78};
79
80lockops_t cv_lockops = {
81 "Condition variable",
82 LOCKOPS_CV,
83 NULL
84};
85
86static const char deadcv[] = "deadcv";
87#ifdef LOCKDEBUG
88static const char nodebug[] = "nodebug";
89#endif
90
91/*
92 * cv_init:
93 *
94 * Initialize a condition variable for use.
95 */
96void
97cv_init(kcondvar_t *cv, const char *wmesg)
98{
99#ifdef LOCKDEBUG
100 bool dodebug;
101
102 dodebug = LOCKDEBUG_ALLOC(cv, &cv_lockops,
103 (uintptr_t)__builtin_return_address(0));
104 if (!dodebug) {
105 /* XXX This will break vfs_lockf. */
106 wmesg = nodebug;
107 }
108#endif
109 KASSERT(wmesg != NULL);
110 CV_SET_WMESG(cv, wmesg);
111 sleepq_init(CV_SLEEPQ(cv));
112}
113
114/*
115 * cv_destroy:
116 *
117 * Tear down a condition variable.
118 */
119void
120cv_destroy(kcondvar_t *cv)
121{
122
123 LOCKDEBUG_FREE(CV_DEBUG_P(cv), cv);
124#ifdef DIAGNOSTIC
125 KASSERT(cv_is_valid(cv));
126 CV_SET_WMESG(cv, deadcv);
127#endif
128}
129
130/*
131 * cv_enter:
132 *
133 * Look up and lock the sleep queue corresponding to the given
134 * condition variable, and increment the number of waiters.
135 */
136static inline void
137cv_enter(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l)
138{
139 sleepq_t *sq;
140 kmutex_t *mp;
141
142 KASSERT(cv_is_valid(cv));
143 KASSERT(!cpu_intr_p());
144 KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL);
145
146 LOCKDEBUG_LOCKED(CV_DEBUG_P(cv), cv, mtx, CV_RA, 0);
147
148 l->l_kpriority = true;
149 mp = sleepq_hashlock(cv);
150 sq = CV_SLEEPQ(cv);
151 sleepq_enter(sq, l, mp);
152 sleepq_enqueue(sq, cv, CV_WMESG(cv), &cv_syncobj);
153 mutex_exit(mtx);
154 KASSERT(cv_has_waiters(cv));
155}
156
157/*
158 * cv_exit:
159 *
160 * After resuming execution, check to see if we have been restarted
161 * as a result of cv_signal(). If we have, but cannot take the
162 * wakeup (because of eg a pending Unix signal or timeout) then try
163 * to ensure that another LWP sees it. This is necessary because
164 * there may be multiple waiters, and at least one should take the
165 * wakeup if possible.
166 */
167static inline int
168cv_exit(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l, const int error)
169{
170
171 mutex_enter(mtx);
172 if (__predict_false(error != 0))
173 cv_signal(cv);
174
175 LOCKDEBUG_UNLOCKED(CV_DEBUG_P(cv), cv, CV_RA, 0);
176 KASSERT(cv_is_valid(cv));
177
178 return error;
179}
180
181/*
182 * cv_unsleep:
183 *
184 * Remove an LWP from the condition variable and sleep queue. This
185 * is called when the LWP has not been awoken normally but instead
186 * interrupted: for example, when a signal is received. Must be
187 * called with the LWP locked, and must return it unlocked.
188 */
189static void
190cv_unsleep(lwp_t *l, bool cleanup)
191{
192 kcondvar_t *cv __diagused;
193
194 cv = (kcondvar_t *)(uintptr_t)l->l_wchan;
195
196 KASSERT(l->l_wchan == (wchan_t)cv);
197 KASSERT(l->l_sleepq == CV_SLEEPQ(cv));
198 KASSERT(cv_is_valid(cv));
199 KASSERT(cv_has_waiters(cv));
200
201 sleepq_unsleep(l, cleanup);
202}
203
204/*
205 * cv_wait:
206 *
207 * Wait non-interruptably on a condition variable until awoken.
208 */
209void
210cv_wait(kcondvar_t *cv, kmutex_t *mtx)
211{
212 lwp_t *l = curlwp;
213
214 KASSERT(mutex_owned(mtx));
215
216 cv_enter(cv, mtx, l);
217 (void)sleepq_block(0, false);
218 (void)cv_exit(cv, mtx, l, 0);
219}
220
221/*
222 * cv_wait_sig:
223 *
224 * Wait on a condition variable until a awoken or a signal is received.
225 * Will also return early if the process is exiting. Returns zero if
226 * awoken normally, ERESTART if a signal was received and the system
227 * call is restartable, or EINTR otherwise.
228 */
229int
230cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx)
231{
232 lwp_t *l = curlwp;
233 int error;
234
235 KASSERT(mutex_owned(mtx));
236
237 cv_enter(cv, mtx, l);
238 error = sleepq_block(0, true);
239 return cv_exit(cv, mtx, l, error);
240}
241
242/*
243 * cv_timedwait:
244 *
245 * Wait on a condition variable until awoken or the specified timeout
246 * expires. Returns zero if awoken normally or EWOULDBLOCK if the
247 * timeout expired.
248 *
249 * timo is a timeout in ticks. timo = 0 specifies an infinite timeout.
250 */
251int
252cv_timedwait(kcondvar_t *cv, kmutex_t *mtx, int timo)
253{
254 lwp_t *l = curlwp;
255 int error;
256
257 KASSERT(mutex_owned(mtx));
258
259 cv_enter(cv, mtx, l);
260 error = sleepq_block(timo, false);
261 return cv_exit(cv, mtx, l, error);
262}
263
264/*
265 * cv_timedwait_sig:
266 *
267 * Wait on a condition variable until a timeout expires, awoken or a
268 * signal is received. Will also return early if the process is
269 * exiting. Returns zero if awoken normally, EWOULDBLOCK if the
270 * timeout expires, ERESTART if a signal was received and the system
271 * call is restartable, or EINTR otherwise.
272 *
273 * timo is a timeout in ticks. timo = 0 specifies an infinite timeout.
274 */
275int
276cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int timo)
277{
278 lwp_t *l = curlwp;
279 int error;
280
281 KASSERT(mutex_owned(mtx));
282
283 cv_enter(cv, mtx, l);
284 error = sleepq_block(timo, true);
285 return cv_exit(cv, mtx, l, error);
286}
287
288/*
289 * cv_signal:
290 *
291 * Wake the highest priority LWP waiting on a condition variable.
292 * Must be called with the interlocking mutex held.
293 */
294void
295cv_signal(kcondvar_t *cv)
296{
297
298 /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */
299 KASSERT(cv_is_valid(cv));
300
301 if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv))))
302 cv_wakeup_one(cv);
303}
304
305static void __noinline
306cv_wakeup_one(kcondvar_t *cv)
307{
308 sleepq_t *sq;
309 kmutex_t *mp;
310 lwp_t *l;
311
312 KASSERT(cv_is_valid(cv));
313
314 mp = sleepq_hashlock(cv);
315 sq = CV_SLEEPQ(cv);
316 l = TAILQ_FIRST(sq);
317 if (l == NULL) {
318 mutex_spin_exit(mp);
319 return;
320 }
321 KASSERT(l->l_sleepq == sq);
322 KASSERT(l->l_mutex == mp);
323 KASSERT(l->l_wchan == cv);
324 sleepq_remove(sq, l);
325 mutex_spin_exit(mp);
326
327 KASSERT(cv_is_valid(cv));
328}
329
330/*
331 * cv_broadcast:
332 *
333 * Wake all LWPs waiting on a condition variable. Must be called
334 * with the interlocking mutex held.
335 */
336void
337cv_broadcast(kcondvar_t *cv)
338{
339
340 /* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */
341 KASSERT(cv_is_valid(cv));
342
343 if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv))))
344 cv_wakeup_all(cv);
345}
346
347static void __noinline
348cv_wakeup_all(kcondvar_t *cv)
349{
350 sleepq_t *sq;
351 kmutex_t *mp;
352 lwp_t *l, *next;
353
354 KASSERT(cv_is_valid(cv));
355
356 mp = sleepq_hashlock(cv);
357 sq = CV_SLEEPQ(cv);
358 for (l = TAILQ_FIRST(sq); l != NULL; l = next) {
359 KASSERT(l->l_sleepq == sq);
360 KASSERT(l->l_mutex == mp);
361 KASSERT(l->l_wchan == cv);
362 next = TAILQ_NEXT(l, l_sleepchain);
363 sleepq_remove(sq, l);
364 }
365 mutex_spin_exit(mp);
366
367 KASSERT(cv_is_valid(cv));
368}
369
370/*
371 * cv_has_waiters:
372 *
373 * For diagnostic assertions: return non-zero if a condition
374 * variable has waiters.
375 */
376bool
377cv_has_waiters(kcondvar_t *cv)
378{
379
380 return !TAILQ_EMPTY(CV_SLEEPQ(cv));
381}
382
383/*
384 * cv_is_valid:
385 *
386 * For diagnostic assertions: return non-zero if a condition
387 * variable appears to be valid. No locks need be held.
388 */
389bool
390cv_is_valid(kcondvar_t *cv)
391{
392
393 return CV_WMESG(cv) != deadcv && CV_WMESG(cv) != NULL;
394}
395