1/* $NetBSD: linux_futex.c,v 1.35 2016/08/15 09:20:11 maxv Exp $ */
2
3/*-
4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by Emmanuel Dreyfus
17 * 4. The name of the author may not be used to endorse or promote
18 * products derived from this software without specific prior written
19 * permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.35 2016/08/15 09:20:11 maxv Exp $");
36
37#include <sys/param.h>
38#include <sys/time.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/lwp.h>
42#include <sys/queue.h>
43#include <sys/condvar.h>
44#include <sys/mutex.h>
45#include <sys/kmem.h>
46#include <sys/kernel.h>
47#include <sys/atomic.h>
48
49#include <compat/linux/common/linux_types.h>
50#include <compat/linux/common/linux_emuldata.h>
51#include <compat/linux/common/linux_exec.h>
52#include <compat/linux/common/linux_signal.h>
53#include <compat/linux/common/linux_futex.h>
54#include <compat/linux/common/linux_sched.h>
55#include <compat/linux/common/linux_machdep.h>
56#include <compat/linux/linux_syscallargs.h>
57
58struct futex;
59
60struct waiting_proc {
61 struct futex *wp_futex;
62 kcondvar_t wp_futex_cv;
63 TAILQ_ENTRY(waiting_proc) wp_list;
64 bool wp_onlist;
65};
66struct futex {
67 void *f_uaddr;
68 int f_refcount;
69 uint32_t f_bitset;
70 LIST_ENTRY(futex) f_list;
71 TAILQ_HEAD(, waiting_proc) f_waiting_proc;
72};
73
74static LIST_HEAD(futex_list, futex) futex_list;
75static kmutex_t futex_lock;
76
77#define FUTEX_LOCK mutex_enter(&futex_lock)
78#define FUTEX_UNLOCK mutex_exit(&futex_lock)
79#define FUTEX_LOCKASSERT KASSERT(mutex_owned(&futex_lock))
80
81#define FUTEX_SYSTEM_LOCK KERNEL_LOCK(1, NULL)
82#define FUTEX_SYSTEM_UNLOCK KERNEL_UNLOCK_ONE(0)
83
84#ifdef DEBUG_LINUX_FUTEX
85int debug_futex = 1;
86#define FUTEXPRINTF(a) do { if (debug_futex) printf a; } while (0)
87#else
88#define FUTEXPRINTF(a)
89#endif
90
91void
92linux_futex_init(void)
93{
94 FUTEXPRINTF(("%s: initializing futex\n", __func__));
95 mutex_init(&futex_lock, MUTEX_DEFAULT, IPL_NONE);
96}
97
98void
99linux_futex_fini(void)
100{
101 FUTEXPRINTF(("%s: destroying futex\n", __func__));
102 mutex_destroy(&futex_lock);
103}
104
105static struct waiting_proc *futex_wp_alloc(void);
106static void futex_wp_free(struct waiting_proc *);
107static struct futex *futex_get(void *, uint32_t);
108static void futex_ref(struct futex *);
109static void futex_put(struct futex *);
110static int futex_sleep(struct futex **, lwp_t *, int, struct waiting_proc *);
111static int futex_wake(struct futex *, int, struct futex *, int);
112static int futex_atomic_op(lwp_t *, int, void *);
113
114int
115linux_sys_futex(struct lwp *l, const struct linux_sys_futex_args *uap, register_t *retval)
116{
117 /* {
118 syscallarg(int *) uaddr;
119 syscallarg(int) op;
120 syscallarg(int) val;
121 syscallarg(const struct linux_timespec *) timeout;
122 syscallarg(int *) uaddr2;
123 syscallarg(int) val3;
124 } */
125 struct linux_timespec lts;
126 struct timespec ts = { 0, 0 };
127 int error;
128
129 if ((SCARG(uap, op) & LINUX_FUTEX_CMD_MASK) == LINUX_FUTEX_WAIT &&
130 SCARG(uap, timeout) != NULL) {
131 if ((error = copyin(SCARG(uap, timeout),
132 &lts, sizeof(lts))) != 0) {
133 return error;
134 }
135 linux_to_native_timespec(&ts, &lts);
136 }
137 return linux_do_futex(l, uap, retval, &ts);
138}
139
140int
141linux_do_futex(struct lwp *l, const struct linux_sys_futex_args *uap, register_t *retval, struct timespec *ts)
142{
143 /* {
144 syscallarg(int *) uaddr;
145 syscallarg(int) op;
146 syscallarg(int) val;
147 syscallarg(const struct linux_timespec *) timeout;
148 syscallarg(int *) uaddr2;
149 syscallarg(int) val3;
150 } */
151 int val, val3;
152 int ret;
153 int error = 0;
154 struct futex *f;
155 struct futex *newf;
156 int tout;
157 struct futex *f2;
158 struct waiting_proc *wp;
159 int op_ret, cmd;
160 clockid_t clk;
161
162 cmd = SCARG(uap, op) & LINUX_FUTEX_CMD_MASK;
163 val3 = SCARG(uap, val3);
164
165 if (SCARG(uap, op) & LINUX_FUTEX_CLOCK_REALTIME) {
166 switch (cmd) {
167 case LINUX_FUTEX_WAIT_BITSET:
168 case LINUX_FUTEX_WAIT:
169 clk = CLOCK_REALTIME;
170 break;
171 default:
172 return ENOSYS;
173 }
174 } else
175 clk = CLOCK_MONOTONIC;
176
177 /*
178 * Our implementation provides only private futexes. Most of the apps
179 * should use private futexes but don't claim so. Therefore we treat
180 * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works
181 * in most cases (ie. when futexes are not shared on file descriptor
182 * or between different processes).
183 *
184 * Note that we don't handle bitsets at all at the moment. We need
185 * to move from refcounting uaddr's to handling multiple futex entries
186 * pointing to the same uaddr, but having possibly different bitmask.
187 * Perhaps move to an implementation where each uaddr has a list of
188 * futexes.
189 */
190 switch (cmd) {
191 case LINUX_FUTEX_WAIT:
192 val3 = FUTEX_BITSET_MATCH_ANY;
193 /*FALLTHROUGH*/
194 case LINUX_FUTEX_WAIT_BITSET:
195 if ((error = ts2timo(clk, 0, ts, &tout, NULL)) != 0) {
196 if (error != ETIMEDOUT)
197 return error;
198 /*
199 * If the user process requests a non null timeout,
200 * make sure we do not turn it into an infinite
201 * timeout because tout is 0.
202 *
203 * We use a minimal timeout of 1/hz. Maybe it would make
204 * sense to just return ETIMEDOUT without sleeping.
205 */
206 if (SCARG(uap, timeout) != NULL)
207 tout = 1;
208 else
209 tout = 0;
210 }
211 FUTEX_SYSTEM_LOCK;
212 if ((error = copyin(SCARG(uap, uaddr),
213 &val, sizeof(val))) != 0) {
214 FUTEX_SYSTEM_UNLOCK;
215 return error;
216 }
217
218 if (val != SCARG(uap, val)) {
219 FUTEX_SYSTEM_UNLOCK;
220 return EWOULDBLOCK;
221 }
222
223 FUTEXPRINTF(("FUTEX_WAIT %d.%d: val = %d, uaddr = %p, "
224 "*uaddr = %d, timeout = %lld.%09ld\n",
225 l->l_proc->p_pid, l->l_lid, SCARG(uap, val),
226 SCARG(uap, uaddr), val, (long long)ts->tv_sec,
227 ts->tv_nsec));
228
229
230 wp = futex_wp_alloc();
231 FUTEX_LOCK;
232 f = futex_get(SCARG(uap, uaddr), val3);
233 ret = futex_sleep(&f, l, tout, wp);
234 futex_put(f);
235 FUTEX_UNLOCK;
236 futex_wp_free(wp);
237
238 FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, "
239 "ret = %d\n", l->l_proc->p_pid, l->l_lid,
240 SCARG(uap, uaddr), ret));
241
242 FUTEX_SYSTEM_UNLOCK;
243 switch (ret) {
244 case EWOULDBLOCK: /* timeout */
245 return ETIMEDOUT;
246 break;
247 case EINTR: /* signal */
248 return EINTR;
249 break;
250 case 0: /* FUTEX_WAKE received */
251 FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, got it\n",
252 l->l_proc->p_pid, l->l_lid, SCARG(uap, uaddr)));
253 return 0;
254 break;
255 default:
256 FUTEXPRINTF(("FUTEX_WAIT: unexpected ret = %d\n", ret));
257 break;
258 }
259
260 /* NOTREACHED */
261 break;
262
263 case LINUX_FUTEX_WAKE:
264 val = FUTEX_BITSET_MATCH_ANY;
265 /*FALLTHROUGH*/
266 case LINUX_FUTEX_WAKE_BITSET:
267 /*
268 * XXX: Linux is able cope with different addresses
269 * corresponding to the same mapped memory in the sleeping
270 * and the waker process(es).
271 */
272 FUTEXPRINTF(("FUTEX_WAKE %d.%d: uaddr = %p, val = %d\n",
273 l->l_proc->p_pid, l->l_lid,
274 SCARG(uap, uaddr), SCARG(uap, val)));
275
276 FUTEX_SYSTEM_LOCK;
277 FUTEX_LOCK;
278 f = futex_get(SCARG(uap, uaddr), val3);
279 *retval = futex_wake(f, SCARG(uap, val), NULL, 0);
280 futex_put(f);
281 FUTEX_UNLOCK;
282 FUTEX_SYSTEM_UNLOCK;
283
284 break;
285
286 case LINUX_FUTEX_CMP_REQUEUE:
287 FUTEX_SYSTEM_LOCK;
288
289 if ((error = copyin(SCARG(uap, uaddr),
290 &val, sizeof(val))) != 0) {
291 FUTEX_SYSTEM_UNLOCK;
292 return error;
293 }
294
295 if (val != val3) {
296 FUTEX_SYSTEM_UNLOCK;
297 return EAGAIN;
298 }
299
300 FUTEXPRINTF(("FUTEX_CMP_REQUEUE %d.%d: uaddr = %p, val = %d, "
301 "uaddr2 = %p, val2 = %d\n",
302 l->l_proc->p_pid, l->l_lid,
303 SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2),
304 (int)(unsigned long)SCARG(uap, timeout)));
305
306 FUTEX_LOCK;
307 f = futex_get(SCARG(uap, uaddr), val3);
308 newf = futex_get(SCARG(uap, uaddr2), val3);
309 *retval = futex_wake(f, SCARG(uap, val), newf,
310 (int)(unsigned long)SCARG(uap, timeout));
311 futex_put(f);
312 futex_put(newf);
313 FUTEX_UNLOCK;
314
315 FUTEX_SYSTEM_UNLOCK;
316 break;
317
318 case LINUX_FUTEX_REQUEUE:
319 FUTEX_SYSTEM_LOCK;
320
321 FUTEXPRINTF(("FUTEX_REQUEUE %d.%d: uaddr = %p, val = %d, "
322 "uaddr2 = %p, val2 = %d\n",
323 l->l_proc->p_pid, l->l_lid,
324 SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2),
325 (int)(unsigned long)SCARG(uap, timeout)));
326
327 FUTEX_LOCK;
328 f = futex_get(SCARG(uap, uaddr), val3);
329 newf = futex_get(SCARG(uap, uaddr2), val3);
330 *retval = futex_wake(f, SCARG(uap, val), newf,
331 (int)(unsigned long)SCARG(uap, timeout));
332 futex_put(f);
333 futex_put(newf);
334 FUTEX_UNLOCK;
335
336 FUTEX_SYSTEM_UNLOCK;
337 break;
338
339 case LINUX_FUTEX_FD:
340 FUTEXPRINTF(("%s: unimplemented op %d\n", __func__, cmd));
341 return ENOSYS;
342 case LINUX_FUTEX_WAKE_OP:
343 FUTEX_SYSTEM_LOCK;
344
345 FUTEXPRINTF(("FUTEX_WAKE_OP %d.%d: uaddr = %p, op = %d, "
346 "val = %d, uaddr2 = %p, val2 = %d\n",
347 l->l_proc->p_pid, l->l_lid,
348 SCARG(uap, uaddr), cmd, SCARG(uap, val),
349 SCARG(uap, uaddr2),
350 (int)(unsigned long)SCARG(uap, timeout)));
351
352 FUTEX_LOCK;
353 f = futex_get(SCARG(uap, uaddr), val3);
354 f2 = futex_get(SCARG(uap, uaddr2), val3);
355 FUTEX_UNLOCK;
356
357 /*
358 * This function returns positive number as results and
359 * negative as errors
360 */
361 op_ret = futex_atomic_op(l, val3, SCARG(uap, uaddr2));
362 FUTEX_LOCK;
363 if (op_ret < 0) {
364 futex_put(f);
365 futex_put(f2);
366 FUTEX_UNLOCK;
367 FUTEX_SYSTEM_UNLOCK;
368 return -op_ret;
369 }
370
371 ret = futex_wake(f, SCARG(uap, val), NULL, 0);
372 futex_put(f);
373 if (op_ret > 0) {
374 op_ret = 0;
375 /*
376 * Linux abuses the address of the timespec parameter
377 * as the number of retries
378 */
379 op_ret += futex_wake(f2,
380 (int)(unsigned long)SCARG(uap, timeout), NULL, 0);
381 ret += op_ret;
382 }
383 futex_put(f2);
384 FUTEX_UNLOCK;
385 FUTEX_SYSTEM_UNLOCK;
386 *retval = ret;
387 break;
388 default:
389 FUTEXPRINTF(("%s: unknown op %d\n", __func__, cmd));
390 return ENOSYS;
391 }
392 return 0;
393}
394
395static struct waiting_proc *
396futex_wp_alloc(void)
397{
398 struct waiting_proc *wp;
399
400 wp = kmem_zalloc(sizeof(*wp), KM_SLEEP);
401 cv_init(&wp->wp_futex_cv, "futex");
402 return wp;
403}
404
405static void
406futex_wp_free(struct waiting_proc *wp)
407{
408
409 cv_destroy(&wp->wp_futex_cv);
410 kmem_free(wp, sizeof(*wp));
411}
412
413static struct futex *
414futex_get(void *uaddr, uint32_t bitset)
415{
416 struct futex *f;
417
418 FUTEX_LOCKASSERT;
419
420 LIST_FOREACH(f, &futex_list, f_list) {
421 if (f->f_uaddr == uaddr) {
422 f->f_refcount++;
423 return f;
424 }
425 }
426
427 /* Not found, create it */
428 f = kmem_zalloc(sizeof(*f), KM_SLEEP);
429 f->f_uaddr = uaddr;
430 f->f_bitset = bitset;
431 f->f_refcount = 1;
432 TAILQ_INIT(&f->f_waiting_proc);
433 LIST_INSERT_HEAD(&futex_list, f, f_list);
434
435 return f;
436}
437
438static void
439futex_ref(struct futex *f)
440{
441
442 FUTEX_LOCKASSERT;
443
444 f->f_refcount++;
445}
446
447static void
448futex_put(struct futex *f)
449{
450
451 FUTEX_LOCKASSERT;
452
453 f->f_refcount--;
454 if (f->f_refcount == 0) {
455 KASSERT(TAILQ_EMPTY(&f->f_waiting_proc));
456 LIST_REMOVE(f, f_list);
457 kmem_free(f, sizeof(*f));
458 }
459}
460
461static int
462futex_sleep(struct futex **fp, lwp_t *l, int timeout, struct waiting_proc *wp)
463{
464 struct futex *f;
465 int ret;
466
467 FUTEX_LOCKASSERT;
468
469 f = *fp;
470 wp->wp_futex = f;
471 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list);
472 wp->wp_onlist = true;
473 ret = cv_timedwait_sig(&wp->wp_futex_cv, &futex_lock, timeout);
474
475 /*
476 * we may have been requeued to a different futex before we were
477 * woken up, so let the caller know which futex to put. if we were
478 * woken by futex_wake() then it took us off the waiting list,
479 * but if our sleep was interrupted or timed out then we might
480 * need to take ourselves off the waiting list.
481 */
482
483 f = wp->wp_futex;
484 if (wp->wp_onlist) {
485 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
486 }
487 *fp = f;
488 return ret;
489}
490
491static int
492futex_wake(struct futex *f, int n, struct futex *newf, int n2)
493{
494 struct waiting_proc *wp;
495 int count = 0;
496
497 FUTEX_LOCKASSERT;
498
499 /*
500 * wake up up to n threads waiting on this futex.
501 */
502
503 while (n--) {
504 wp = TAILQ_FIRST(&f->f_waiting_proc);
505 if (wp == NULL)
506 return count;
507
508 KASSERT(f == wp->wp_futex);
509 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
510 wp->wp_onlist = false;
511 cv_signal(&wp->wp_futex_cv);
512 count++;
513 }
514 if (newf == NULL)
515 return count;
516
517 /*
518 * then requeue up to n2 additional threads to newf
519 * (without waking them up).
520 */
521
522 while (n2--) {
523 wp = TAILQ_FIRST(&f->f_waiting_proc);
524 if (wp == NULL)
525 return count;
526
527 KASSERT(f == wp->wp_futex);
528 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
529 futex_put(f);
530
531 wp->wp_futex = newf;
532 futex_ref(newf);
533 TAILQ_INSERT_TAIL(&newf->f_waiting_proc, wp, wp_list);
534 count++;
535 }
536 return count;
537}
538
539static int
540futex_atomic_op(lwp_t *l, int encoded_op, void *uaddr)
541{
542 const int op = (encoded_op >> 28) & 7;
543 const int cmp = (encoded_op >> 24) & 15;
544 const int cmparg = (encoded_op << 20) >> 20;
545 int oparg = (encoded_op << 8) >> 20;
546 int error, oldval, cval;
547
548 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
549 oparg = 1 << oparg;
550
551 /* XXX: linux verifies access here and returns EFAULT */
552
553 if (copyin(uaddr, &cval, sizeof(int)) != 0)
554 return -EFAULT;
555
556 for (;;) {
557 int nval;
558
559 switch (op) {
560 case FUTEX_OP_SET:
561 nval = oparg;
562 break;
563 case FUTEX_OP_ADD:
564 nval = cval + oparg;
565 break;
566 case FUTEX_OP_OR:
567 nval = cval | oparg;
568 break;
569 case FUTEX_OP_ANDN:
570 nval = cval & ~oparg;
571 break;
572 case FUTEX_OP_XOR:
573 nval = cval ^ oparg;
574 break;
575 default:
576 return -ENOSYS;
577 }
578
579 error = ucas_int(uaddr, cval, nval, &oldval);
580 if (error || oldval == cval) {
581 break;
582 }
583 cval = oldval;
584 }
585
586 if (error)
587 return -EFAULT;
588
589 switch (cmp) {
590 case FUTEX_OP_CMP_EQ:
591 return (oldval == cmparg);
592 case FUTEX_OP_CMP_NE:
593 return (oldval != cmparg);
594 case FUTEX_OP_CMP_LT:
595 return (oldval < cmparg);
596 case FUTEX_OP_CMP_GE:
597 return (oldval >= cmparg);
598 case FUTEX_OP_CMP_LE:
599 return (oldval <= cmparg);
600 case FUTEX_OP_CMP_GT:
601 return (oldval > cmparg);
602 default:
603 return -ENOSYS;
604 }
605}
606
607int
608linux_sys_set_robust_list(struct lwp *l,
609 const struct linux_sys_set_robust_list_args *uap, register_t *retval)
610{
611 /* {
612 syscallarg(struct linux_robust_list_head *) head;
613 syscallarg(size_t) len;
614 } */
615 struct linux_emuldata *led;
616
617 if (SCARG(uap, len) != sizeof(struct linux_robust_list_head))
618 return EINVAL;
619 led = l->l_emuldata;
620 led->led_robust_head = SCARG(uap, head);
621 *retval = 0;
622 return 0;
623}
624
625int
626linux_sys_get_robust_list(struct lwp *l,
627 const struct linux_sys_get_robust_list_args *uap, register_t *retval)
628{
629 /* {
630 syscallarg(int) pid;
631 syscallarg(struct linux_robust_list_head **) head;
632 syscallarg(size_t *) len;
633 } */
634 struct proc *p;
635 struct linux_emuldata *led;
636 struct linux_robust_list_head *head;
637 size_t len;
638 int error = 0;
639
640 p = l->l_proc;
641 if (!SCARG(uap, pid)) {
642 led = l->l_emuldata;
643 head = led->led_robust_head;
644 } else {
645 mutex_enter(p->p_lock);
646 l = lwp_find(p, SCARG(uap, pid));
647 if (l != NULL) {
648 led = l->l_emuldata;
649 head = led->led_robust_head;
650 }
651 mutex_exit(p->p_lock);
652 if (l == NULL) {
653 return ESRCH;
654 }
655 }
656#ifdef __arch64__
657 if (p->p_flag & PK_32) {
658 uint32_t u32;
659
660 u32 = 12;
661 error = copyout(&u32, SCARG(uap, len), sizeof(u32));
662 if (error)
663 return error;
664 u32 = (uint32_t)(uintptr_t)head;
665 return copyout(&u32, SCARG(uap, head), sizeof(u32));
666 }
667#endif
668
669 len = sizeof(*head);
670 error = copyout(&len, SCARG(uap, len), sizeof(len));
671 if (error)
672 return error;
673 return copyout(&head, SCARG(uap, head), sizeof(head));
674}
675
676static int
677handle_futex_death(void *uaddr, pid_t pid, int pi)
678{
679 int uval, nval, mval;
680 struct futex *f;
681
682retry:
683 if (copyin(uaddr, &uval, sizeof(uval)))
684 return EFAULT;
685
686 if ((uval & FUTEX_TID_MASK) == pid) {
687 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
688 nval = atomic_cas_32(uaddr, uval, mval);
689
690 if (nval == -1)
691 return EFAULT;
692
693 if (nval != uval)
694 goto retry;
695
696 if (!pi && (uval & FUTEX_WAITERS)) {
697 FUTEX_LOCK;
698 f = futex_get(uaddr, FUTEX_BITSET_MATCH_ANY);
699 futex_wake(f, 1, NULL, 0);
700 FUTEX_UNLOCK;
701 }
702 }
703
704 return 0;
705}
706
707static int
708fetch_robust_entry(struct lwp *l, struct linux_robust_list **entry,
709 struct linux_robust_list **head, int *pi)
710{
711 unsigned long uentry;
712
713#ifdef __arch64__
714 if (l->l_proc->p_flag & PK_32) {
715 uint32_t u32;
716
717 if (copyin(head, &u32, sizeof(u32)))
718 return EFAULT;
719 uentry = (unsigned long)u32;
720 } else
721#endif
722 if (copyin(head, &uentry, sizeof(uentry)))
723 return EFAULT;
724
725 *entry = (void *)(uentry & ~1UL);
726 *pi = uentry & 1;
727
728 return 0;
729}
730
731/* This walks the list of robust futexes, releasing them. */
732void
733release_futexes(struct lwp *l)
734{
735 struct linux_robust_list_head head;
736 struct linux_robust_list *entry, *next_entry = NULL, *pending;
737 unsigned int limit = 2048, pi, next_pi, pip;
738 struct linux_emuldata *led;
739 unsigned long futex_offset;
740 int rc;
741
742 led = l->l_emuldata;
743 if (led->led_robust_head == NULL)
744 return;
745
746#ifdef __arch64__
747 if (l->l_proc->p_flag & PK_32) {
748 uint32_t u32s[3];
749
750 if (copyin(led->led_robust_head, u32s, sizeof(u32s)))
751 return;
752
753 head.list.next = (void *)(uintptr_t)u32s[0];
754 head.futex_offset = (unsigned long)u32s[1];
755 head.pending_list = (void *)(uintptr_t)u32s[2];
756 } else
757#endif
758 if (copyin(led->led_robust_head, &head, sizeof(head)))
759 return;
760
761 if (fetch_robust_entry(l, &entry, &head.list.next, &pi))
762 return;
763
764#ifdef __arch64__
765 if (l->l_proc->p_flag & PK_32) {
766 uint32_t u32;
767
768 if (copyin(led->led_robust_head, &u32, sizeof(u32)))
769 return;
770
771 head.futex_offset = (unsigned long)u32;
772 futex_offset = head.futex_offset;
773 } else
774#endif
775 if (copyin(&head.futex_offset, &futex_offset, sizeof(unsigned long)))
776 return;
777
778 if (fetch_robust_entry(l, &pending, &head.pending_list, &pip))
779 return;
780
781 while (entry != &head.list) {
782 rc = fetch_robust_entry(l, &next_entry, &entry->next, &next_pi);
783
784 if (entry != pending)
785 if (handle_futex_death((char *)entry + futex_offset,
786 l->l_lid, pi))
787 return;
788
789 if (rc)
790 return;
791
792 entry = next_entry;
793 pi = next_pi;
794
795 if (!--limit)
796 break;
797
798 yield(); /* XXX why? */
799 }
800
801 if (pending)
802 handle_futex_death((char *)pending + futex_offset,
803 l->l_lid, pip);
804}
805