1 | /* $NetBSD: uipc_usrreq.c,v 1.181 2016/10/31 15:05:05 maxv Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, |
9 | * NASA Ames Research Center, and by Andrew Doran. |
10 | * |
11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions |
13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. |
19 | * |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | * POSSIBILITY OF SUCH DAMAGE. |
31 | */ |
32 | |
33 | /* |
34 | * Copyright (c) 1982, 1986, 1989, 1991, 1993 |
35 | * The Regents of the University of California. All rights reserved. |
36 | * |
37 | * Redistribution and use in source and binary forms, with or without |
38 | * modification, are permitted provided that the following conditions |
39 | * are met: |
40 | * 1. Redistributions of source code must retain the above copyright |
41 | * notice, this list of conditions and the following disclaimer. |
42 | * 2. Redistributions in binary form must reproduce the above copyright |
43 | * notice, this list of conditions and the following disclaimer in the |
44 | * documentation and/or other materials provided with the distribution. |
45 | * 3. Neither the name of the University nor the names of its contributors |
46 | * may be used to endorse or promote products derived from this software |
47 | * without specific prior written permission. |
48 | * |
49 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
50 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
51 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
52 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
53 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
54 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
55 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
56 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
57 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
58 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
59 | * SUCH DAMAGE. |
60 | * |
61 | * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95 |
62 | */ |
63 | |
64 | /* |
65 | * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved. |
66 | * |
67 | * Redistribution and use in source and binary forms, with or without |
68 | * modification, are permitted provided that the following conditions |
69 | * are met: |
70 | * 1. Redistributions of source code must retain the above copyright |
71 | * notice, this list of conditions and the following disclaimer. |
72 | * 2. Redistributions in binary form must reproduce the above copyright |
73 | * notice, this list of conditions and the following disclaimer in the |
74 | * documentation and/or other materials provided with the distribution. |
75 | * 3. All advertising materials mentioning features or use of this software |
76 | * must display the following acknowledgement: |
77 | * This product includes software developed by the University of |
78 | * California, Berkeley and its contributors. |
79 | * 4. Neither the name of the University nor the names of its contributors |
80 | * may be used to endorse or promote products derived from this software |
81 | * without specific prior written permission. |
82 | * |
83 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
84 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
85 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
86 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
87 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
88 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
89 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
90 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
91 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
92 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
93 | * SUCH DAMAGE. |
94 | * |
95 | * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95 |
96 | */ |
97 | |
98 | #include <sys/cdefs.h> |
99 | __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.181 2016/10/31 15:05:05 maxv Exp $" ); |
100 | |
101 | #include <sys/param.h> |
102 | #include <sys/systm.h> |
103 | #include <sys/proc.h> |
104 | #include <sys/filedesc.h> |
105 | #include <sys/domain.h> |
106 | #include <sys/protosw.h> |
107 | #include <sys/socket.h> |
108 | #include <sys/socketvar.h> |
109 | #include <sys/unpcb.h> |
110 | #include <sys/un.h> |
111 | #include <sys/namei.h> |
112 | #include <sys/vnode.h> |
113 | #include <sys/file.h> |
114 | #include <sys/stat.h> |
115 | #include <sys/mbuf.h> |
116 | #include <sys/kauth.h> |
117 | #include <sys/kmem.h> |
118 | #include <sys/atomic.h> |
119 | #include <sys/uidinfo.h> |
120 | #include <sys/kernel.h> |
121 | #include <sys/kthread.h> |
122 | |
123 | #ifdef COMPAT_70 |
124 | #include <compat/sys/socket.h> |
125 | #endif |
126 | |
127 | /* |
128 | * Unix communications domain. |
129 | * |
130 | * TODO: |
131 | * RDM |
132 | * rethink name space problems |
133 | * need a proper out-of-band |
134 | * |
135 | * Notes on locking: |
136 | * |
137 | * The generic rules noted in uipc_socket2.c apply. In addition: |
138 | * |
139 | * o We have a global lock, uipc_lock. |
140 | * |
141 | * o All datagram sockets are locked by uipc_lock. |
142 | * |
143 | * o For stream socketpairs, the two endpoints are created sharing the same |
144 | * independent lock. Sockets presented to PRU_CONNECT2 must already have |
145 | * matching locks. |
146 | * |
147 | * o Stream sockets created via socket() start life with their own |
148 | * independent lock. |
149 | * |
150 | * o Stream connections to a named endpoint are slightly more complicated. |
151 | * Sockets that have called listen() have their lock pointer mutated to |
152 | * the global uipc_lock. When establishing a connection, the connecting |
153 | * socket also has its lock mutated to uipc_lock, which matches the head |
154 | * (listening socket). We create a new socket for accept() to return, and |
155 | * that also shares the head's lock. Until the connection is completely |
156 | * done on both ends, all three sockets are locked by uipc_lock. Once the |
157 | * connection is complete, the association with the head's lock is broken. |
158 | * The connecting socket and the socket returned from accept() have their |
159 | * lock pointers mutated away from uipc_lock, and back to the connecting |
160 | * socket's original, independent lock. The head continues to be locked |
161 | * by uipc_lock. |
162 | * |
163 | * o If uipc_lock is determined to be a significant source of contention, |
164 | * it could easily be hashed out. It is difficult to simply make it an |
165 | * independent lock because of visibility / garbage collection issues: |
166 | * if a socket has been associated with a lock at any point, that lock |
167 | * must remain valid until the socket is no longer visible in the system. |
168 | * The lock must not be freed or otherwise destroyed until any sockets |
169 | * that had referenced it have also been destroyed. |
170 | */ |
171 | const struct sockaddr_un sun_noname = { |
172 | .sun_len = offsetof(struct sockaddr_un, sun_path), |
173 | .sun_family = AF_LOCAL, |
174 | }; |
175 | ino_t unp_ino; /* prototype for fake inode numbers */ |
176 | |
177 | static struct mbuf * unp_addsockcred(struct lwp *, struct mbuf *); |
178 | static void unp_discard_later(file_t *); |
179 | static void unp_discard_now(file_t *); |
180 | static void unp_disconnect1(struct unpcb *); |
181 | static bool unp_drop(struct unpcb *, int); |
182 | static int unp_internalize(struct mbuf **); |
183 | static void unp_mark(file_t *); |
184 | static void unp_scan(struct mbuf *, void (*)(file_t *), int); |
185 | static void unp_shutdown1(struct unpcb *); |
186 | static void unp_thread(void *); |
187 | static void unp_thread_kick(void); |
188 | |
189 | static kmutex_t *uipc_lock; |
190 | |
191 | static kcondvar_t unp_thread_cv; |
192 | static lwp_t *unp_thread_lwp; |
193 | static SLIST_HEAD(,file) unp_thread_discard; |
194 | static int unp_defer; |
195 | |
196 | /* |
197 | * Initialize Unix protocols. |
198 | */ |
199 | void |
200 | uipc_init(void) |
201 | { |
202 | int error; |
203 | |
204 | uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); |
205 | cv_init(&unp_thread_cv, "unpgc" ); |
206 | |
207 | error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread, |
208 | NULL, &unp_thread_lwp, "unpgc" ); |
209 | if (error != 0) |
210 | panic("uipc_init %d" , error); |
211 | } |
212 | |
213 | /* |
214 | * A connection succeeded: disassociate both endpoints from the head's |
215 | * lock, and make them share their own lock. There is a race here: for |
216 | * a very brief time one endpoint will be locked by a different lock |
217 | * than the other end. However, since the current thread holds the old |
218 | * lock (the listening socket's lock, the head) access can still only be |
219 | * made to one side of the connection. |
220 | */ |
221 | static void |
222 | unp_setpeerlocks(struct socket *so, struct socket *so2) |
223 | { |
224 | struct unpcb *unp; |
225 | kmutex_t *lock; |
226 | |
227 | KASSERT(solocked2(so, so2)); |
228 | |
229 | /* |
230 | * Bail out if either end of the socket is not yet fully |
231 | * connected or accepted. We only break the lock association |
232 | * with the head when the pair of sockets stand completely |
233 | * on their own. |
234 | */ |
235 | KASSERT(so->so_head == NULL); |
236 | if (so2->so_head != NULL) |
237 | return; |
238 | |
239 | /* |
240 | * Drop references to old lock. A third reference (from the |
241 | * queue head) must be held as we still hold its lock. Bonus: |
242 | * we don't need to worry about garbage collecting the lock. |
243 | */ |
244 | lock = so->so_lock; |
245 | KASSERT(lock == uipc_lock); |
246 | mutex_obj_free(lock); |
247 | mutex_obj_free(lock); |
248 | |
249 | /* |
250 | * Grab stream lock from the initiator and share between the two |
251 | * endpoints. Issue memory barrier to ensure all modifications |
252 | * become globally visible before the lock change. so2 is |
253 | * assumed not to have a stream lock, because it was created |
254 | * purely for the server side to accept this connection and |
255 | * started out life using the domain-wide lock. |
256 | */ |
257 | unp = sotounpcb(so); |
258 | KASSERT(unp->unp_streamlock != NULL); |
259 | KASSERT(sotounpcb(so2)->unp_streamlock == NULL); |
260 | lock = unp->unp_streamlock; |
261 | unp->unp_streamlock = NULL; |
262 | mutex_obj_hold(lock); |
263 | membar_exit(); |
264 | /* |
265 | * possible race if lock is not held - see comment in |
266 | * uipc_usrreq(PRU_ACCEPT). |
267 | */ |
268 | KASSERT(mutex_owned(lock)); |
269 | solockreset(so, lock); |
270 | solockreset(so2, lock); |
271 | } |
272 | |
273 | /* |
274 | * Reset a socket's lock back to the domain-wide lock. |
275 | */ |
276 | static void |
277 | unp_resetlock(struct socket *so) |
278 | { |
279 | kmutex_t *olock, *nlock; |
280 | struct unpcb *unp; |
281 | |
282 | KASSERT(solocked(so)); |
283 | |
284 | olock = so->so_lock; |
285 | nlock = uipc_lock; |
286 | if (olock == nlock) |
287 | return; |
288 | unp = sotounpcb(so); |
289 | KASSERT(unp->unp_streamlock == NULL); |
290 | unp->unp_streamlock = olock; |
291 | mutex_obj_hold(nlock); |
292 | mutex_enter(nlock); |
293 | solockreset(so, nlock); |
294 | mutex_exit(olock); |
295 | } |
296 | |
297 | static void |
298 | unp_free(struct unpcb *unp) |
299 | { |
300 | if (unp->unp_addr) |
301 | free(unp->unp_addr, M_SONAME); |
302 | if (unp->unp_streamlock != NULL) |
303 | mutex_obj_free(unp->unp_streamlock); |
304 | kmem_free(unp, sizeof(*unp)); |
305 | } |
306 | |
307 | static int |
308 | unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp) |
309 | { |
310 | struct socket *so2; |
311 | const struct sockaddr_un *sun; |
312 | |
313 | /* XXX: server side closed the socket */ |
314 | if (unp->unp_conn == NULL) |
315 | return ECONNREFUSED; |
316 | so2 = unp->unp_conn->unp_socket; |
317 | |
318 | KASSERT(solocked(so2)); |
319 | |
320 | if (unp->unp_addr) |
321 | sun = unp->unp_addr; |
322 | else |
323 | sun = &sun_noname; |
324 | if (unp->unp_conn->unp_flags & UNP_WANTCRED) |
325 | control = unp_addsockcred(curlwp, control); |
326 | #ifdef COMPAT_SOCKCRED70 |
327 | if (unp->unp_conn->unp_flags & UNP_OWANTCRED) |
328 | control = compat_70_unp_addsockcred(curlwp, control); |
329 | #endif |
330 | if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m, |
331 | control) == 0) { |
332 | so2->so_rcv.sb_overflowed++; |
333 | unp_dispose(control); |
334 | m_freem(control); |
335 | m_freem(m); |
336 | return (ENOBUFS); |
337 | } else { |
338 | sorwakeup(so2); |
339 | return (0); |
340 | } |
341 | } |
342 | |
343 | static void |
344 | unp_setaddr(struct socket *so, struct sockaddr *nam, bool peeraddr) |
345 | { |
346 | const struct sockaddr_un *sun = NULL; |
347 | struct unpcb *unp; |
348 | |
349 | KASSERT(solocked(so)); |
350 | unp = sotounpcb(so); |
351 | |
352 | if (peeraddr) { |
353 | if (unp->unp_conn && unp->unp_conn->unp_addr) |
354 | sun = unp->unp_conn->unp_addr; |
355 | } else { |
356 | if (unp->unp_addr) |
357 | sun = unp->unp_addr; |
358 | } |
359 | if (sun == NULL) |
360 | sun = &sun_noname; |
361 | |
362 | memcpy(nam, sun, sun->sun_len); |
363 | } |
364 | |
365 | static int |
366 | unp_rcvd(struct socket *so, int flags, struct lwp *l) |
367 | { |
368 | struct unpcb *unp = sotounpcb(so); |
369 | struct socket *so2; |
370 | u_int newhiwat; |
371 | |
372 | KASSERT(solocked(so)); |
373 | KASSERT(unp != NULL); |
374 | |
375 | switch (so->so_type) { |
376 | |
377 | case SOCK_DGRAM: |
378 | panic("uipc 1" ); |
379 | /*NOTREACHED*/ |
380 | |
381 | case SOCK_SEQPACKET: /* FALLTHROUGH */ |
382 | case SOCK_STREAM: |
383 | #define rcv (&so->so_rcv) |
384 | #define snd (&so2->so_snd) |
385 | if (unp->unp_conn == 0) |
386 | break; |
387 | so2 = unp->unp_conn->unp_socket; |
388 | KASSERT(solocked2(so, so2)); |
389 | /* |
390 | * Adjust backpressure on sender |
391 | * and wakeup any waiting to write. |
392 | */ |
393 | snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; |
394 | unp->unp_mbcnt = rcv->sb_mbcnt; |
395 | newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc; |
396 | (void)chgsbsize(so2->so_uidinfo, |
397 | &snd->sb_hiwat, newhiwat, RLIM_INFINITY); |
398 | unp->unp_cc = rcv->sb_cc; |
399 | sowwakeup(so2); |
400 | #undef snd |
401 | #undef rcv |
402 | break; |
403 | |
404 | default: |
405 | panic("uipc 2" ); |
406 | } |
407 | |
408 | return 0; |
409 | } |
410 | |
411 | static int |
412 | unp_recvoob(struct socket *so, struct mbuf *m, int flags) |
413 | { |
414 | KASSERT(solocked(so)); |
415 | |
416 | return EOPNOTSUPP; |
417 | } |
418 | |
419 | static int |
420 | unp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam, |
421 | struct mbuf *control, struct lwp *l) |
422 | { |
423 | struct unpcb *unp = sotounpcb(so); |
424 | int error = 0; |
425 | u_int newhiwat; |
426 | struct socket *so2; |
427 | |
428 | KASSERT(solocked(so)); |
429 | KASSERT(unp != NULL); |
430 | KASSERT(m != NULL); |
431 | |
432 | /* |
433 | * Note: unp_internalize() rejects any control message |
434 | * other than SCM_RIGHTS, and only allows one. This |
435 | * has the side-effect of preventing a caller from |
436 | * forging SCM_CREDS. |
437 | */ |
438 | if (control) { |
439 | sounlock(so); |
440 | error = unp_internalize(&control); |
441 | solock(so); |
442 | if (error != 0) { |
443 | m_freem(control); |
444 | m_freem(m); |
445 | return error; |
446 | } |
447 | } |
448 | |
449 | switch (so->so_type) { |
450 | |
451 | case SOCK_DGRAM: { |
452 | KASSERT(so->so_lock == uipc_lock); |
453 | if (nam) { |
454 | if ((so->so_state & SS_ISCONNECTED) != 0) |
455 | error = EISCONN; |
456 | else { |
457 | /* |
458 | * Note: once connected, the |
459 | * socket's lock must not be |
460 | * dropped until we have sent |
461 | * the message and disconnected. |
462 | * This is necessary to prevent |
463 | * intervening control ops, like |
464 | * another connection. |
465 | */ |
466 | error = unp_connect(so, nam, l); |
467 | } |
468 | } else { |
469 | if ((so->so_state & SS_ISCONNECTED) == 0) |
470 | error = ENOTCONN; |
471 | } |
472 | if (error) { |
473 | unp_dispose(control); |
474 | m_freem(control); |
475 | m_freem(m); |
476 | return error; |
477 | } |
478 | error = unp_output(m, control, unp); |
479 | if (nam) |
480 | unp_disconnect1(unp); |
481 | break; |
482 | } |
483 | |
484 | case SOCK_SEQPACKET: /* FALLTHROUGH */ |
485 | case SOCK_STREAM: |
486 | #define rcv (&so2->so_rcv) |
487 | #define snd (&so->so_snd) |
488 | if (unp->unp_conn == NULL) { |
489 | error = ENOTCONN; |
490 | break; |
491 | } |
492 | so2 = unp->unp_conn->unp_socket; |
493 | KASSERT(solocked2(so, so2)); |
494 | if (unp->unp_conn->unp_flags & UNP_WANTCRED) { |
495 | /* |
496 | * Credentials are passed only once on |
497 | * SOCK_STREAM and SOCK_SEQPACKET. |
498 | */ |
499 | unp->unp_conn->unp_flags &= ~UNP_WANTCRED; |
500 | control = unp_addsockcred(l, control); |
501 | } |
502 | #ifdef COMPAT_SOCKCRED70 |
503 | if (unp->unp_conn->unp_flags & UNP_OWANTCRED) { |
504 | /* |
505 | * Credentials are passed only once on |
506 | * SOCK_STREAM and SOCK_SEQPACKET. |
507 | */ |
508 | unp->unp_conn->unp_flags &= ~UNP_OWANTCRED; |
509 | control = compat_70_unp_addsockcred(l, control); |
510 | } |
511 | #endif |
512 | /* |
513 | * Send to paired receive port, and then reduce |
514 | * send buffer hiwater marks to maintain backpressure. |
515 | * Wake up readers. |
516 | */ |
517 | if (control) { |
518 | if (sbappendcontrol(rcv, m, control) != 0) |
519 | control = NULL; |
520 | } else { |
521 | switch(so->so_type) { |
522 | case SOCK_SEQPACKET: |
523 | sbappendrecord(rcv, m); |
524 | break; |
525 | case SOCK_STREAM: |
526 | sbappend(rcv, m); |
527 | break; |
528 | default: |
529 | panic("uipc_usrreq" ); |
530 | break; |
531 | } |
532 | } |
533 | snd->sb_mbmax -= |
534 | rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; |
535 | unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; |
536 | newhiwat = snd->sb_hiwat - |
537 | (rcv->sb_cc - unp->unp_conn->unp_cc); |
538 | (void)chgsbsize(so->so_uidinfo, |
539 | &snd->sb_hiwat, newhiwat, RLIM_INFINITY); |
540 | unp->unp_conn->unp_cc = rcv->sb_cc; |
541 | sorwakeup(so2); |
542 | #undef snd |
543 | #undef rcv |
544 | if (control != NULL) { |
545 | unp_dispose(control); |
546 | m_freem(control); |
547 | } |
548 | break; |
549 | |
550 | default: |
551 | panic("uipc 4" ); |
552 | } |
553 | |
554 | return error; |
555 | } |
556 | |
557 | static int |
558 | unp_sendoob(struct socket *so, struct mbuf *m, struct mbuf * control) |
559 | { |
560 | KASSERT(solocked(so)); |
561 | |
562 | m_freem(m); |
563 | m_freem(control); |
564 | |
565 | return EOPNOTSUPP; |
566 | } |
567 | |
568 | /* |
569 | * Unix domain socket option processing. |
570 | */ |
571 | int |
572 | uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt) |
573 | { |
574 | struct unpcb *unp = sotounpcb(so); |
575 | int optval = 0, error = 0; |
576 | |
577 | KASSERT(solocked(so)); |
578 | |
579 | if (sopt->sopt_level != 0) { |
580 | error = ENOPROTOOPT; |
581 | } else switch (op) { |
582 | |
583 | case PRCO_SETOPT: |
584 | switch (sopt->sopt_name) { |
585 | case LOCAL_CREDS: |
586 | case LOCAL_CONNWAIT: |
587 | #ifdef COMPAT_SOCKCRED70 |
588 | case LOCAL_OCREDS: |
589 | #endif |
590 | error = sockopt_getint(sopt, &optval); |
591 | if (error) |
592 | break; |
593 | switch (sopt->sopt_name) { |
594 | #define OPTSET(bit) \ |
595 | if (optval) \ |
596 | unp->unp_flags |= (bit); \ |
597 | else \ |
598 | unp->unp_flags &= ~(bit); |
599 | |
600 | case LOCAL_CREDS: |
601 | OPTSET(UNP_WANTCRED); |
602 | break; |
603 | case LOCAL_CONNWAIT: |
604 | OPTSET(UNP_CONNWAIT); |
605 | break; |
606 | #ifdef COMPAT_SOCKCRED70 |
607 | case LOCAL_OCREDS: |
608 | OPTSET(UNP_OWANTCRED); |
609 | break; |
610 | #endif |
611 | } |
612 | break; |
613 | #undef OPTSET |
614 | |
615 | default: |
616 | error = ENOPROTOOPT; |
617 | break; |
618 | } |
619 | break; |
620 | |
621 | case PRCO_GETOPT: |
622 | sounlock(so); |
623 | switch (sopt->sopt_name) { |
624 | case LOCAL_PEEREID: |
625 | if (unp->unp_flags & UNP_EIDSVALID) { |
626 | error = sockopt_set(sopt, |
627 | &unp->unp_connid, sizeof(unp->unp_connid)); |
628 | } else { |
629 | error = EINVAL; |
630 | } |
631 | break; |
632 | case LOCAL_CREDS: |
633 | #define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0) |
634 | |
635 | optval = OPTBIT(UNP_WANTCRED); |
636 | error = sockopt_setint(sopt, optval); |
637 | break; |
638 | #ifdef COMPAT_SOCKCRED70 |
639 | case LOCAL_OCREDS: |
640 | optval = OPTBIT(UNP_OWANTCRED); |
641 | error = sockopt_setint(sopt, optval); |
642 | break; |
643 | #endif |
644 | #undef OPTBIT |
645 | |
646 | default: |
647 | error = ENOPROTOOPT; |
648 | break; |
649 | } |
650 | solock(so); |
651 | break; |
652 | } |
653 | return (error); |
654 | } |
655 | |
656 | /* |
657 | * Both send and receive buffers are allocated PIPSIZ bytes of buffering |
658 | * for stream sockets, although the total for sender and receiver is |
659 | * actually only PIPSIZ. |
660 | * Datagram sockets really use the sendspace as the maximum datagram size, |
661 | * and don't really want to reserve the sendspace. Their recvspace should |
662 | * be large enough for at least one max-size datagram plus address. |
663 | */ |
664 | #define PIPSIZ 4096 |
665 | u_long unpst_sendspace = PIPSIZ; |
666 | u_long unpst_recvspace = PIPSIZ; |
667 | u_long unpdg_sendspace = 2*1024; /* really max datagram size */ |
668 | u_long unpdg_recvspace = 4*1024; |
669 | |
670 | u_int unp_rights; /* files in flight */ |
671 | u_int unp_rights_ratio = 2; /* limit, fraction of maxfiles */ |
672 | |
673 | static int |
674 | unp_attach(struct socket *so, int proto) |
675 | { |
676 | struct unpcb *unp = sotounpcb(so); |
677 | u_long sndspc, rcvspc; |
678 | int error; |
679 | |
680 | KASSERT(unp == NULL); |
681 | |
682 | switch (so->so_type) { |
683 | case SOCK_SEQPACKET: |
684 | /* FALLTHROUGH */ |
685 | case SOCK_STREAM: |
686 | if (so->so_lock == NULL) { |
687 | so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); |
688 | solock(so); |
689 | } |
690 | sndspc = unpst_sendspace; |
691 | rcvspc = unpst_recvspace; |
692 | break; |
693 | |
694 | case SOCK_DGRAM: |
695 | if (so->so_lock == NULL) { |
696 | mutex_obj_hold(uipc_lock); |
697 | so->so_lock = uipc_lock; |
698 | solock(so); |
699 | } |
700 | sndspc = unpdg_sendspace; |
701 | rcvspc = unpdg_recvspace; |
702 | break; |
703 | |
704 | default: |
705 | panic("unp_attach" ); |
706 | } |
707 | |
708 | if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { |
709 | error = soreserve(so, sndspc, rcvspc); |
710 | if (error) { |
711 | return error; |
712 | } |
713 | } |
714 | |
715 | unp = kmem_zalloc(sizeof(*unp), KM_SLEEP); |
716 | nanotime(&unp->unp_ctime); |
717 | unp->unp_socket = so; |
718 | so->so_pcb = unp; |
719 | |
720 | KASSERT(solocked(so)); |
721 | return 0; |
722 | } |
723 | |
724 | static void |
725 | unp_detach(struct socket *so) |
726 | { |
727 | struct unpcb *unp; |
728 | vnode_t *vp; |
729 | |
730 | unp = sotounpcb(so); |
731 | KASSERT(unp != NULL); |
732 | KASSERT(solocked(so)); |
733 | retry: |
734 | if ((vp = unp->unp_vnode) != NULL) { |
735 | sounlock(so); |
736 | /* Acquire v_interlock to protect against unp_connect(). */ |
737 | /* XXXAD racy */ |
738 | mutex_enter(vp->v_interlock); |
739 | vp->v_socket = NULL; |
740 | mutex_exit(vp->v_interlock); |
741 | vrele(vp); |
742 | solock(so); |
743 | unp->unp_vnode = NULL; |
744 | } |
745 | if (unp->unp_conn) |
746 | unp_disconnect1(unp); |
747 | while (unp->unp_refs) { |
748 | KASSERT(solocked2(so, unp->unp_refs->unp_socket)); |
749 | if (unp_drop(unp->unp_refs, ECONNRESET)) { |
750 | solock(so); |
751 | goto retry; |
752 | } |
753 | } |
754 | soisdisconnected(so); |
755 | so->so_pcb = NULL; |
756 | if (unp_rights) { |
757 | /* |
758 | * Normally the receive buffer is flushed later, in sofree, |
759 | * but if our receive buffer holds references to files that |
760 | * are now garbage, we will enqueue those file references to |
761 | * the garbage collector and kick it into action. |
762 | */ |
763 | sorflush(so); |
764 | unp_free(unp); |
765 | unp_thread_kick(); |
766 | } else |
767 | unp_free(unp); |
768 | } |
769 | |
770 | static int |
771 | unp_accept(struct socket *so, struct sockaddr *nam) |
772 | { |
773 | struct unpcb *unp = sotounpcb(so); |
774 | struct socket *so2; |
775 | |
776 | KASSERT(solocked(so)); |
777 | KASSERT(nam != NULL); |
778 | |
779 | /* XXX code review required to determine if unp can ever be NULL */ |
780 | if (unp == NULL) |
781 | return EINVAL; |
782 | |
783 | KASSERT(so->so_lock == uipc_lock); |
784 | /* |
785 | * Mark the initiating STREAM socket as connected *ONLY* |
786 | * after it's been accepted. This prevents a client from |
787 | * overrunning a server and receiving ECONNREFUSED. |
788 | */ |
789 | if (unp->unp_conn == NULL) { |
790 | /* |
791 | * This will use the empty socket and will not |
792 | * allocate. |
793 | */ |
794 | unp_setaddr(so, nam, true); |
795 | return 0; |
796 | } |
797 | so2 = unp->unp_conn->unp_socket; |
798 | if (so2->so_state & SS_ISCONNECTING) { |
799 | KASSERT(solocked2(so, so->so_head)); |
800 | KASSERT(solocked2(so2, so->so_head)); |
801 | soisconnected(so2); |
802 | } |
803 | /* |
804 | * If the connection is fully established, break the |
805 | * association with uipc_lock and give the connected |
806 | * pair a separate lock to share. |
807 | * There is a race here: sotounpcb(so2)->unp_streamlock |
808 | * is not locked, so when changing so2->so_lock |
809 | * another thread can grab it while so->so_lock is still |
810 | * pointing to the (locked) uipc_lock. |
811 | * this should be harmless, except that this makes |
812 | * solocked2() and solocked() unreliable. |
813 | * Another problem is that unp_setaddr() expects the |
814 | * the socket locked. Grabing sotounpcb(so2)->unp_streamlock |
815 | * fixes both issues. |
816 | */ |
817 | mutex_enter(sotounpcb(so2)->unp_streamlock); |
818 | unp_setpeerlocks(so2, so); |
819 | /* |
820 | * Only now return peer's address, as we may need to |
821 | * block in order to allocate memory. |
822 | * |
823 | * XXX Minor race: connection can be broken while |
824 | * lock is dropped in unp_setaddr(). We will return |
825 | * error == 0 and sun_noname as the peer address. |
826 | */ |
827 | unp_setaddr(so, nam, true); |
828 | /* so_lock now points to unp_streamlock */ |
829 | mutex_exit(so2->so_lock); |
830 | return 0; |
831 | } |
832 | |
833 | static int |
834 | unp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) |
835 | { |
836 | return EOPNOTSUPP; |
837 | } |
838 | |
839 | static int |
840 | unp_stat(struct socket *so, struct stat *ub) |
841 | { |
842 | struct unpcb *unp; |
843 | struct socket *so2; |
844 | |
845 | KASSERT(solocked(so)); |
846 | |
847 | unp = sotounpcb(so); |
848 | if (unp == NULL) |
849 | return EINVAL; |
850 | |
851 | ub->st_blksize = so->so_snd.sb_hiwat; |
852 | switch (so->so_type) { |
853 | case SOCK_SEQPACKET: /* FALLTHROUGH */ |
854 | case SOCK_STREAM: |
855 | if (unp->unp_conn == 0) |
856 | break; |
857 | |
858 | so2 = unp->unp_conn->unp_socket; |
859 | KASSERT(solocked2(so, so2)); |
860 | ub->st_blksize += so2->so_rcv.sb_cc; |
861 | break; |
862 | default: |
863 | break; |
864 | } |
865 | ub->st_dev = NODEV; |
866 | if (unp->unp_ino == 0) |
867 | unp->unp_ino = unp_ino++; |
868 | ub->st_atimespec = ub->st_mtimespec = ub->st_ctimespec = unp->unp_ctime; |
869 | ub->st_ino = unp->unp_ino; |
870 | return (0); |
871 | } |
872 | |
873 | static int |
874 | unp_peeraddr(struct socket *so, struct sockaddr *nam) |
875 | { |
876 | KASSERT(solocked(so)); |
877 | KASSERT(sotounpcb(so) != NULL); |
878 | KASSERT(nam != NULL); |
879 | |
880 | unp_setaddr(so, nam, true); |
881 | return 0; |
882 | } |
883 | |
884 | static int |
885 | unp_sockaddr(struct socket *so, struct sockaddr *nam) |
886 | { |
887 | KASSERT(solocked(so)); |
888 | KASSERT(sotounpcb(so) != NULL); |
889 | KASSERT(nam != NULL); |
890 | |
891 | unp_setaddr(so, nam, false); |
892 | return 0; |
893 | } |
894 | |
895 | /* |
896 | * we only need to perform this allocation until syscalls other than |
897 | * bind are adjusted to use sockaddr_big. |
898 | */ |
899 | static struct sockaddr_un * |
900 | makeun_sb(struct sockaddr *nam, size_t *addrlen) |
901 | { |
902 | struct sockaddr_un *sun; |
903 | |
904 | *addrlen = nam->sa_len + 1; |
905 | sun = malloc(*addrlen, M_SONAME, M_WAITOK); |
906 | memcpy(sun, nam, nam->sa_len); |
907 | *(((char *)sun) + nam->sa_len) = '\0'; |
908 | return sun; |
909 | } |
910 | |
911 | static int |
912 | unp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l) |
913 | { |
914 | struct sockaddr_un *sun; |
915 | struct unpcb *unp; |
916 | vnode_t *vp; |
917 | struct vattr vattr; |
918 | size_t addrlen; |
919 | int error; |
920 | struct pathbuf *pb; |
921 | struct nameidata nd; |
922 | proc_t *p; |
923 | |
924 | unp = sotounpcb(so); |
925 | |
926 | KASSERT(solocked(so)); |
927 | KASSERT(unp != NULL); |
928 | KASSERT(nam != NULL); |
929 | |
930 | if (unp->unp_vnode != NULL) |
931 | return (EINVAL); |
932 | if ((unp->unp_flags & UNP_BUSY) != 0) { |
933 | /* |
934 | * EALREADY may not be strictly accurate, but since this |
935 | * is a major application error it's hardly a big deal. |
936 | */ |
937 | return (EALREADY); |
938 | } |
939 | unp->unp_flags |= UNP_BUSY; |
940 | sounlock(so); |
941 | |
942 | p = l->l_proc; |
943 | sun = makeun_sb(nam, &addrlen); |
944 | |
945 | pb = pathbuf_create(sun->sun_path); |
946 | if (pb == NULL) { |
947 | error = ENOMEM; |
948 | goto bad; |
949 | } |
950 | NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, pb); |
951 | |
952 | /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ |
953 | if ((error = namei(&nd)) != 0) { |
954 | pathbuf_destroy(pb); |
955 | goto bad; |
956 | } |
957 | vp = nd.ni_vp; |
958 | if (vp != NULL) { |
959 | VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); |
960 | if (nd.ni_dvp == vp) |
961 | vrele(nd.ni_dvp); |
962 | else |
963 | vput(nd.ni_dvp); |
964 | vrele(vp); |
965 | pathbuf_destroy(pb); |
966 | error = EADDRINUSE; |
967 | goto bad; |
968 | } |
969 | vattr_null(&vattr); |
970 | vattr.va_type = VSOCK; |
971 | vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask); |
972 | error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); |
973 | if (error) { |
974 | vput(nd.ni_dvp); |
975 | pathbuf_destroy(pb); |
976 | goto bad; |
977 | } |
978 | vp = nd.ni_vp; |
979 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
980 | solock(so); |
981 | vp->v_socket = unp->unp_socket; |
982 | unp->unp_vnode = vp; |
983 | unp->unp_addrlen = addrlen; |
984 | unp->unp_addr = sun; |
985 | unp->unp_connid.unp_pid = p->p_pid; |
986 | unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred); |
987 | unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred); |
988 | unp->unp_flags |= UNP_EIDSBIND; |
989 | VOP_UNLOCK(vp); |
990 | vput(nd.ni_dvp); |
991 | unp->unp_flags &= ~UNP_BUSY; |
992 | pathbuf_destroy(pb); |
993 | return (0); |
994 | |
995 | bad: |
996 | free(sun, M_SONAME); |
997 | solock(so); |
998 | unp->unp_flags &= ~UNP_BUSY; |
999 | return (error); |
1000 | } |
1001 | |
1002 | static int |
1003 | unp_listen(struct socket *so, struct lwp *l) |
1004 | { |
1005 | struct unpcb *unp = sotounpcb(so); |
1006 | |
1007 | KASSERT(solocked(so)); |
1008 | KASSERT(unp != NULL); |
1009 | |
1010 | /* |
1011 | * If the socket can accept a connection, it must be |
1012 | * locked by uipc_lock. |
1013 | */ |
1014 | unp_resetlock(so); |
1015 | if (unp->unp_vnode == NULL) |
1016 | return EINVAL; |
1017 | |
1018 | return 0; |
1019 | } |
1020 | |
1021 | static int |
1022 | unp_disconnect(struct socket *so) |
1023 | { |
1024 | KASSERT(solocked(so)); |
1025 | KASSERT(sotounpcb(so) != NULL); |
1026 | |
1027 | unp_disconnect1(sotounpcb(so)); |
1028 | return 0; |
1029 | } |
1030 | |
1031 | static int |
1032 | unp_shutdown(struct socket *so) |
1033 | { |
1034 | KASSERT(solocked(so)); |
1035 | KASSERT(sotounpcb(so) != NULL); |
1036 | |
1037 | socantsendmore(so); |
1038 | unp_shutdown1(sotounpcb(so)); |
1039 | return 0; |
1040 | } |
1041 | |
1042 | static int |
1043 | unp_abort(struct socket *so) |
1044 | { |
1045 | KASSERT(solocked(so)); |
1046 | KASSERT(sotounpcb(so) != NULL); |
1047 | |
1048 | (void)unp_drop(sotounpcb(so), ECONNABORTED); |
1049 | KASSERT(so->so_head == NULL); |
1050 | KASSERT(so->so_pcb != NULL); |
1051 | unp_detach(so); |
1052 | return 0; |
1053 | } |
1054 | |
1055 | static int |
1056 | unp_connect1(struct socket *so, struct socket *so2, struct lwp *l) |
1057 | { |
1058 | struct unpcb *unp = sotounpcb(so); |
1059 | struct unpcb *unp2; |
1060 | |
1061 | if (so2->so_type != so->so_type) |
1062 | return EPROTOTYPE; |
1063 | |
1064 | /* |
1065 | * All three sockets involved must be locked by same lock: |
1066 | * |
1067 | * local endpoint (so) |
1068 | * remote endpoint (so2) |
1069 | * queue head (so2->so_head, only if PR_CONNREQUIRED) |
1070 | */ |
1071 | KASSERT(solocked2(so, so2)); |
1072 | KASSERT(so->so_head == NULL); |
1073 | if (so2->so_head != NULL) { |
1074 | KASSERT(so2->so_lock == uipc_lock); |
1075 | KASSERT(solocked2(so2, so2->so_head)); |
1076 | } |
1077 | |
1078 | unp2 = sotounpcb(so2); |
1079 | unp->unp_conn = unp2; |
1080 | |
1081 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) { |
1082 | unp2->unp_connid.unp_pid = l->l_proc->p_pid; |
1083 | unp2->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred); |
1084 | unp2->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred); |
1085 | unp2->unp_flags |= UNP_EIDSVALID; |
1086 | if (unp2->unp_flags & UNP_EIDSBIND) { |
1087 | unp->unp_connid = unp2->unp_connid; |
1088 | unp->unp_flags |= UNP_EIDSVALID; |
1089 | } |
1090 | } |
1091 | |
1092 | switch (so->so_type) { |
1093 | |
1094 | case SOCK_DGRAM: |
1095 | unp->unp_nextref = unp2->unp_refs; |
1096 | unp2->unp_refs = unp; |
1097 | soisconnected(so); |
1098 | break; |
1099 | |
1100 | case SOCK_SEQPACKET: /* FALLTHROUGH */ |
1101 | case SOCK_STREAM: |
1102 | |
1103 | /* |
1104 | * SOCK_SEQPACKET and SOCK_STREAM cases are handled by callers |
1105 | * which are unp_connect() or unp_connect2(). |
1106 | */ |
1107 | |
1108 | break; |
1109 | |
1110 | default: |
1111 | panic("unp_connect1" ); |
1112 | } |
1113 | |
1114 | return 0; |
1115 | } |
1116 | |
1117 | int |
1118 | unp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l) |
1119 | { |
1120 | struct sockaddr_un *sun; |
1121 | vnode_t *vp; |
1122 | struct socket *so2, *so3; |
1123 | struct unpcb *unp, *unp2, *unp3; |
1124 | size_t addrlen; |
1125 | int error; |
1126 | struct pathbuf *pb; |
1127 | struct nameidata nd; |
1128 | |
1129 | unp = sotounpcb(so); |
1130 | if ((unp->unp_flags & UNP_BUSY) != 0) { |
1131 | /* |
1132 | * EALREADY may not be strictly accurate, but since this |
1133 | * is a major application error it's hardly a big deal. |
1134 | */ |
1135 | return (EALREADY); |
1136 | } |
1137 | unp->unp_flags |= UNP_BUSY; |
1138 | sounlock(so); |
1139 | |
1140 | sun = makeun_sb(nam, &addrlen); |
1141 | pb = pathbuf_create(sun->sun_path); |
1142 | if (pb == NULL) { |
1143 | error = ENOMEM; |
1144 | goto bad2; |
1145 | } |
1146 | |
1147 | NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); |
1148 | |
1149 | if ((error = namei(&nd)) != 0) { |
1150 | pathbuf_destroy(pb); |
1151 | goto bad2; |
1152 | } |
1153 | vp = nd.ni_vp; |
1154 | pathbuf_destroy(pb); |
1155 | if (vp->v_type != VSOCK) { |
1156 | error = ENOTSOCK; |
1157 | goto bad; |
1158 | } |
1159 | if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0) |
1160 | goto bad; |
1161 | /* Acquire v_interlock to protect against unp_detach(). */ |
1162 | mutex_enter(vp->v_interlock); |
1163 | so2 = vp->v_socket; |
1164 | if (so2 == NULL) { |
1165 | mutex_exit(vp->v_interlock); |
1166 | error = ECONNREFUSED; |
1167 | goto bad; |
1168 | } |
1169 | if (so->so_type != so2->so_type) { |
1170 | mutex_exit(vp->v_interlock); |
1171 | error = EPROTOTYPE; |
1172 | goto bad; |
1173 | } |
1174 | solock(so); |
1175 | unp_resetlock(so); |
1176 | mutex_exit(vp->v_interlock); |
1177 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) { |
1178 | /* |
1179 | * This may seem somewhat fragile but is OK: if we can |
1180 | * see SO_ACCEPTCONN set on the endpoint, then it must |
1181 | * be locked by the domain-wide uipc_lock. |
1182 | */ |
1183 | KASSERT((so2->so_options & SO_ACCEPTCONN) == 0 || |
1184 | so2->so_lock == uipc_lock); |
1185 | if ((so2->so_options & SO_ACCEPTCONN) == 0 || |
1186 | (so3 = sonewconn(so2, false)) == NULL) { |
1187 | error = ECONNREFUSED; |
1188 | sounlock(so); |
1189 | goto bad; |
1190 | } |
1191 | unp2 = sotounpcb(so2); |
1192 | unp3 = sotounpcb(so3); |
1193 | if (unp2->unp_addr) { |
1194 | unp3->unp_addr = malloc(unp2->unp_addrlen, |
1195 | M_SONAME, M_WAITOK); |
1196 | memcpy(unp3->unp_addr, unp2->unp_addr, |
1197 | unp2->unp_addrlen); |
1198 | unp3->unp_addrlen = unp2->unp_addrlen; |
1199 | } |
1200 | unp3->unp_flags = unp2->unp_flags; |
1201 | so2 = so3; |
1202 | } |
1203 | error = unp_connect1(so, so2, l); |
1204 | if (error) { |
1205 | sounlock(so); |
1206 | goto bad; |
1207 | } |
1208 | unp2 = sotounpcb(so2); |
1209 | switch (so->so_type) { |
1210 | |
1211 | /* |
1212 | * SOCK_DGRAM and default cases are handled in prior call to |
1213 | * unp_connect1(), do not add a default case without fixing |
1214 | * unp_connect1(). |
1215 | */ |
1216 | |
1217 | case SOCK_SEQPACKET: /* FALLTHROUGH */ |
1218 | case SOCK_STREAM: |
1219 | unp2->unp_conn = unp; |
1220 | if ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT) |
1221 | soisconnecting(so); |
1222 | else |
1223 | soisconnected(so); |
1224 | soisconnected(so2); |
1225 | /* |
1226 | * If the connection is fully established, break the |
1227 | * association with uipc_lock and give the connected |
1228 | * pair a seperate lock to share. |
1229 | */ |
1230 | KASSERT(so2->so_head != NULL); |
1231 | unp_setpeerlocks(so, so2); |
1232 | break; |
1233 | |
1234 | } |
1235 | sounlock(so); |
1236 | bad: |
1237 | vput(vp); |
1238 | bad2: |
1239 | free(sun, M_SONAME); |
1240 | solock(so); |
1241 | unp->unp_flags &= ~UNP_BUSY; |
1242 | return (error); |
1243 | } |
1244 | |
1245 | int |
1246 | unp_connect2(struct socket *so, struct socket *so2) |
1247 | { |
1248 | struct unpcb *unp = sotounpcb(so); |
1249 | struct unpcb *unp2; |
1250 | int error = 0; |
1251 | |
1252 | KASSERT(solocked2(so, so2)); |
1253 | |
1254 | error = unp_connect1(so, so2, curlwp); |
1255 | if (error) |
1256 | return error; |
1257 | |
1258 | unp2 = sotounpcb(so2); |
1259 | switch (so->so_type) { |
1260 | |
1261 | /* |
1262 | * SOCK_DGRAM and default cases are handled in prior call to |
1263 | * unp_connect1(), do not add a default case without fixing |
1264 | * unp_connect1(). |
1265 | */ |
1266 | |
1267 | case SOCK_SEQPACKET: /* FALLTHROUGH */ |
1268 | case SOCK_STREAM: |
1269 | unp2->unp_conn = unp; |
1270 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) { |
1271 | unp->unp_connid = unp2->unp_connid; |
1272 | unp->unp_flags |= UNP_EIDSVALID; |
1273 | } |
1274 | soisconnected(so); |
1275 | soisconnected(so2); |
1276 | break; |
1277 | |
1278 | } |
1279 | return error; |
1280 | } |
1281 | |
1282 | static void |
1283 | unp_disconnect1(struct unpcb *unp) |
1284 | { |
1285 | struct unpcb *unp2 = unp->unp_conn; |
1286 | struct socket *so; |
1287 | |
1288 | if (unp2 == 0) |
1289 | return; |
1290 | unp->unp_conn = 0; |
1291 | so = unp->unp_socket; |
1292 | switch (so->so_type) { |
1293 | case SOCK_DGRAM: |
1294 | if (unp2->unp_refs == unp) |
1295 | unp2->unp_refs = unp->unp_nextref; |
1296 | else { |
1297 | unp2 = unp2->unp_refs; |
1298 | for (;;) { |
1299 | KASSERT(solocked2(so, unp2->unp_socket)); |
1300 | if (unp2 == 0) |
1301 | panic("unp_disconnect1" ); |
1302 | if (unp2->unp_nextref == unp) |
1303 | break; |
1304 | unp2 = unp2->unp_nextref; |
1305 | } |
1306 | unp2->unp_nextref = unp->unp_nextref; |
1307 | } |
1308 | unp->unp_nextref = 0; |
1309 | so->so_state &= ~SS_ISCONNECTED; |
1310 | break; |
1311 | |
1312 | case SOCK_SEQPACKET: /* FALLTHROUGH */ |
1313 | case SOCK_STREAM: |
1314 | KASSERT(solocked2(so, unp2->unp_socket)); |
1315 | soisdisconnected(so); |
1316 | unp2->unp_conn = 0; |
1317 | soisdisconnected(unp2->unp_socket); |
1318 | break; |
1319 | } |
1320 | } |
1321 | |
1322 | static void |
1323 | unp_shutdown1(struct unpcb *unp) |
1324 | { |
1325 | struct socket *so; |
1326 | |
1327 | switch(unp->unp_socket->so_type) { |
1328 | case SOCK_SEQPACKET: /* FALLTHROUGH */ |
1329 | case SOCK_STREAM: |
1330 | if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) |
1331 | socantrcvmore(so); |
1332 | break; |
1333 | default: |
1334 | break; |
1335 | } |
1336 | } |
1337 | |
1338 | static bool |
1339 | unp_drop(struct unpcb *unp, int errno) |
1340 | { |
1341 | struct socket *so = unp->unp_socket; |
1342 | |
1343 | KASSERT(solocked(so)); |
1344 | |
1345 | so->so_error = errno; |
1346 | unp_disconnect1(unp); |
1347 | if (so->so_head) { |
1348 | so->so_pcb = NULL; |
1349 | /* sofree() drops the socket lock */ |
1350 | sofree(so); |
1351 | unp_free(unp); |
1352 | return true; |
1353 | } |
1354 | return false; |
1355 | } |
1356 | |
1357 | #ifdef notdef |
1358 | unp_drain(void) |
1359 | { |
1360 | |
1361 | } |
1362 | #endif |
1363 | |
1364 | int |
1365 | unp_externalize(struct mbuf *rights, struct lwp *l, int flags) |
1366 | { |
1367 | struct cmsghdr * const cm = mtod(rights, struct cmsghdr *); |
1368 | struct proc * const p = l->l_proc; |
1369 | file_t **rp; |
1370 | int error = 0; |
1371 | |
1372 | const size_t nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / |
1373 | sizeof(file_t *); |
1374 | if (nfds == 0) |
1375 | goto noop; |
1376 | |
1377 | int * const fdp = kmem_alloc(nfds * sizeof(int), KM_SLEEP); |
1378 | rw_enter(&p->p_cwdi->cwdi_lock, RW_READER); |
1379 | |
1380 | /* Make sure the recipient should be able to see the files.. */ |
1381 | rp = (file_t **)CMSG_DATA(cm); |
1382 | for (size_t i = 0; i < nfds; i++) { |
1383 | file_t * const fp = *rp++; |
1384 | if (fp == NULL) { |
1385 | error = EINVAL; |
1386 | goto out; |
1387 | } |
1388 | /* |
1389 | * If we are in a chroot'ed directory, and |
1390 | * someone wants to pass us a directory, make |
1391 | * sure it's inside the subtree we're allowed |
1392 | * to access. |
1393 | */ |
1394 | if (p->p_cwdi->cwdi_rdir != NULL && fp->f_type == DTYPE_VNODE) { |
1395 | vnode_t *vp = fp->f_vnode; |
1396 | if ((vp->v_type == VDIR) && |
1397 | !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) { |
1398 | error = EPERM; |
1399 | goto out; |
1400 | } |
1401 | } |
1402 | } |
1403 | |
1404 | restart: |
1405 | /* |
1406 | * First loop -- allocate file descriptor table slots for the |
1407 | * new files. |
1408 | */ |
1409 | for (size_t i = 0; i < nfds; i++) { |
1410 | if ((error = fd_alloc(p, 0, &fdp[i])) != 0) { |
1411 | /* |
1412 | * Back out what we've done so far. |
1413 | */ |
1414 | while (i-- > 0) { |
1415 | fd_abort(p, NULL, fdp[i]); |
1416 | } |
1417 | if (error == ENOSPC) { |
1418 | fd_tryexpand(p); |
1419 | error = 0; |
1420 | goto restart; |
1421 | } |
1422 | /* |
1423 | * This is the error that has historically |
1424 | * been returned, and some callers may |
1425 | * expect it. |
1426 | */ |
1427 | error = EMSGSIZE; |
1428 | goto out; |
1429 | } |
1430 | } |
1431 | |
1432 | /* |
1433 | * Now that adding them has succeeded, update all of the |
1434 | * file passing state and affix the descriptors. |
1435 | */ |
1436 | rp = (file_t **)CMSG_DATA(cm); |
1437 | int *ofdp = (int *)CMSG_DATA(cm); |
1438 | for (size_t i = 0; i < nfds; i++) { |
1439 | file_t * const fp = *rp++; |
1440 | const int fd = fdp[i]; |
1441 | atomic_dec_uint(&unp_rights); |
1442 | fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0); |
1443 | fd_affix(p, fp, fd); |
1444 | /* |
1445 | * Done with this file pointer, replace it with a fd; |
1446 | */ |
1447 | *ofdp++ = fd; |
1448 | mutex_enter(&fp->f_lock); |
1449 | fp->f_msgcount--; |
1450 | mutex_exit(&fp->f_lock); |
1451 | /* |
1452 | * Note that fd_affix() adds a reference to the file. |
1453 | * The file may already have been closed by another |
1454 | * LWP in the process, so we must drop the reference |
1455 | * added by unp_internalize() with closef(). |
1456 | */ |
1457 | closef(fp); |
1458 | } |
1459 | |
1460 | /* |
1461 | * Adjust length, in case of transition from large file_t |
1462 | * pointers to ints. |
1463 | */ |
1464 | if (sizeof(file_t *) != sizeof(int)) { |
1465 | cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); |
1466 | rights->m_len = CMSG_SPACE(nfds * sizeof(int)); |
1467 | } |
1468 | out: |
1469 | if (__predict_false(error != 0)) { |
1470 | file_t **const fpp = (file_t **)CMSG_DATA(cm); |
1471 | for (size_t i = 0; i < nfds; i++) |
1472 | unp_discard_now(fpp[i]); |
1473 | /* |
1474 | * Truncate the array so that nobody will try to interpret |
1475 | * what is now garbage in it. |
1476 | */ |
1477 | cm->cmsg_len = CMSG_LEN(0); |
1478 | rights->m_len = CMSG_SPACE(0); |
1479 | } |
1480 | rw_exit(&p->p_cwdi->cwdi_lock); |
1481 | kmem_free(fdp, nfds * sizeof(int)); |
1482 | |
1483 | noop: |
1484 | /* |
1485 | * Don't disclose kernel memory in the alignment space. |
1486 | */ |
1487 | KASSERT(cm->cmsg_len <= rights->m_len); |
1488 | memset(&mtod(rights, char *)[cm->cmsg_len], 0, rights->m_len - |
1489 | cm->cmsg_len); |
1490 | return error; |
1491 | } |
1492 | |
1493 | static int |
1494 | unp_internalize(struct mbuf **controlp) |
1495 | { |
1496 | filedesc_t *fdescp = curlwp->l_fd; |
1497 | struct mbuf *control = *controlp; |
1498 | struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *); |
1499 | file_t **rp, **files; |
1500 | file_t *fp; |
1501 | int i, fd, *fdp; |
1502 | int nfds, error; |
1503 | u_int maxmsg; |
1504 | |
1505 | error = 0; |
1506 | newcm = NULL; |
1507 | |
1508 | /* Sanity check the control message header. */ |
1509 | if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || |
1510 | cm->cmsg_len > control->m_len || |
1511 | cm->cmsg_len < CMSG_ALIGN(sizeof(*cm))) |
1512 | return (EINVAL); |
1513 | |
1514 | /* |
1515 | * Verify that the file descriptors are valid, and acquire |
1516 | * a reference to each. |
1517 | */ |
1518 | nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int); |
1519 | fdp = (int *)CMSG_DATA(cm); |
1520 | maxmsg = maxfiles / unp_rights_ratio; |
1521 | for (i = 0; i < nfds; i++) { |
1522 | fd = *fdp++; |
1523 | if (atomic_inc_uint_nv(&unp_rights) > maxmsg) { |
1524 | atomic_dec_uint(&unp_rights); |
1525 | nfds = i; |
1526 | error = EAGAIN; |
1527 | goto out; |
1528 | } |
1529 | if ((fp = fd_getfile(fd)) == NULL |
1530 | || fp->f_type == DTYPE_KQUEUE) { |
1531 | if (fp) |
1532 | fd_putfile(fd); |
1533 | atomic_dec_uint(&unp_rights); |
1534 | nfds = i; |
1535 | error = EBADF; |
1536 | goto out; |
1537 | } |
1538 | } |
1539 | |
1540 | /* Allocate new space and copy header into it. */ |
1541 | newcm = malloc(CMSG_SPACE(nfds * sizeof(file_t *)), M_MBUF, M_WAITOK); |
1542 | if (newcm == NULL) { |
1543 | error = E2BIG; |
1544 | goto out; |
1545 | } |
1546 | memcpy(newcm, cm, sizeof(struct cmsghdr)); |
1547 | files = (file_t **)CMSG_DATA(newcm); |
1548 | |
1549 | /* |
1550 | * Transform the file descriptors into file_t pointers, in |
1551 | * reverse order so that if pointers are bigger than ints, the |
1552 | * int won't get until we're done. No need to lock, as we have |
1553 | * already validated the descriptors with fd_getfile(). |
1554 | */ |
1555 | fdp = (int *)CMSG_DATA(cm) + nfds; |
1556 | rp = files + nfds; |
1557 | for (i = 0; i < nfds; i++) { |
1558 | fp = fdescp->fd_dt->dt_ff[*--fdp]->ff_file; |
1559 | KASSERT(fp != NULL); |
1560 | mutex_enter(&fp->f_lock); |
1561 | *--rp = fp; |
1562 | fp->f_count++; |
1563 | fp->f_msgcount++; |
1564 | mutex_exit(&fp->f_lock); |
1565 | } |
1566 | |
1567 | out: |
1568 | /* Release descriptor references. */ |
1569 | fdp = (int *)CMSG_DATA(cm); |
1570 | for (i = 0; i < nfds; i++) { |
1571 | fd_putfile(*fdp++); |
1572 | if (error != 0) { |
1573 | atomic_dec_uint(&unp_rights); |
1574 | } |
1575 | } |
1576 | |
1577 | if (error == 0) { |
1578 | if (control->m_flags & M_EXT) { |
1579 | m_freem(control); |
1580 | *controlp = control = m_get(M_WAIT, MT_CONTROL); |
1581 | } |
1582 | MEXTADD(control, newcm, CMSG_SPACE(nfds * sizeof(file_t *)), |
1583 | M_MBUF, NULL, NULL); |
1584 | cm = newcm; |
1585 | /* |
1586 | * Adjust message & mbuf to note amount of space |
1587 | * actually used. |
1588 | */ |
1589 | cm->cmsg_len = CMSG_LEN(nfds * sizeof(file_t *)); |
1590 | control->m_len = CMSG_SPACE(nfds * sizeof(file_t *)); |
1591 | } |
1592 | |
1593 | return error; |
1594 | } |
1595 | |
1596 | struct mbuf * |
1597 | unp_addsockcred(struct lwp *l, struct mbuf *control) |
1598 | { |
1599 | struct sockcred *sc; |
1600 | struct mbuf *m; |
1601 | void *p; |
1602 | |
1603 | m = sbcreatecontrol1(&p, SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)), |
1604 | SCM_CREDS, SOL_SOCKET, M_WAITOK); |
1605 | if (m == NULL) |
1606 | return control; |
1607 | |
1608 | sc = p; |
1609 | sc->sc_pid = l->l_proc->p_pid; |
1610 | sc->sc_uid = kauth_cred_getuid(l->l_cred); |
1611 | sc->sc_euid = kauth_cred_geteuid(l->l_cred); |
1612 | sc->sc_gid = kauth_cred_getgid(l->l_cred); |
1613 | sc->sc_egid = kauth_cred_getegid(l->l_cred); |
1614 | sc->sc_ngroups = kauth_cred_ngroups(l->l_cred); |
1615 | |
1616 | for (int i = 0; i < sc->sc_ngroups; i++) |
1617 | sc->sc_groups[i] = kauth_cred_group(l->l_cred, i); |
1618 | |
1619 | return m_add(control, m); |
1620 | } |
1621 | |
1622 | /* |
1623 | * Do a mark-sweep GC of files in the system, to free up any which are |
1624 | * caught in flight to an about-to-be-closed socket. Additionally, |
1625 | * process deferred file closures. |
1626 | */ |
1627 | static void |
1628 | unp_gc(file_t *dp) |
1629 | { |
1630 | extern struct domain unixdomain; |
1631 | file_t *fp, *np; |
1632 | struct socket *so, *so1; |
1633 | u_int i, oflags, rflags; |
1634 | bool didwork; |
1635 | |
1636 | KASSERT(curlwp == unp_thread_lwp); |
1637 | KASSERT(mutex_owned(&filelist_lock)); |
1638 | |
1639 | /* |
1640 | * First, process deferred file closures. |
1641 | */ |
1642 | while (!SLIST_EMPTY(&unp_thread_discard)) { |
1643 | fp = SLIST_FIRST(&unp_thread_discard); |
1644 | KASSERT(fp->f_unpcount > 0); |
1645 | KASSERT(fp->f_count > 0); |
1646 | KASSERT(fp->f_msgcount > 0); |
1647 | KASSERT(fp->f_count >= fp->f_unpcount); |
1648 | KASSERT(fp->f_count >= fp->f_msgcount); |
1649 | KASSERT(fp->f_msgcount >= fp->f_unpcount); |
1650 | SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist); |
1651 | i = fp->f_unpcount; |
1652 | fp->f_unpcount = 0; |
1653 | mutex_exit(&filelist_lock); |
1654 | for (; i != 0; i--) { |
1655 | unp_discard_now(fp); |
1656 | } |
1657 | mutex_enter(&filelist_lock); |
1658 | } |
1659 | |
1660 | /* |
1661 | * Clear mark bits. Ensure that we don't consider new files |
1662 | * entering the file table during this loop (they will not have |
1663 | * FSCAN set). |
1664 | */ |
1665 | unp_defer = 0; |
1666 | LIST_FOREACH(fp, &filehead, f_list) { |
1667 | for (oflags = fp->f_flag;; oflags = rflags) { |
1668 | rflags = atomic_cas_uint(&fp->f_flag, oflags, |
1669 | (oflags | FSCAN) & ~(FMARK|FDEFER)); |
1670 | if (__predict_true(oflags == rflags)) { |
1671 | break; |
1672 | } |
1673 | } |
1674 | } |
1675 | |
1676 | /* |
1677 | * Iterate over the set of sockets, marking ones believed (based on |
1678 | * refcount) to be referenced from a process, and marking for rescan |
1679 | * sockets which are queued on a socket. Recan continues descending |
1680 | * and searching for sockets referenced by sockets (FDEFER), until |
1681 | * there are no more socket->socket references to be discovered. |
1682 | */ |
1683 | do { |
1684 | didwork = false; |
1685 | for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) { |
1686 | KASSERT(mutex_owned(&filelist_lock)); |
1687 | np = LIST_NEXT(fp, f_list); |
1688 | mutex_enter(&fp->f_lock); |
1689 | if ((fp->f_flag & FDEFER) != 0) { |
1690 | atomic_and_uint(&fp->f_flag, ~FDEFER); |
1691 | unp_defer--; |
1692 | if (fp->f_count == 0) { |
1693 | /* |
1694 | * XXX: closef() doesn't pay attention |
1695 | * to FDEFER |
1696 | */ |
1697 | mutex_exit(&fp->f_lock); |
1698 | continue; |
1699 | } |
1700 | } else { |
1701 | if (fp->f_count == 0 || |
1702 | (fp->f_flag & FMARK) != 0 || |
1703 | fp->f_count == fp->f_msgcount || |
1704 | fp->f_unpcount != 0) { |
1705 | mutex_exit(&fp->f_lock); |
1706 | continue; |
1707 | } |
1708 | } |
1709 | atomic_or_uint(&fp->f_flag, FMARK); |
1710 | |
1711 | if (fp->f_type != DTYPE_SOCKET || |
1712 | (so = fp->f_socket) == NULL || |
1713 | so->so_proto->pr_domain != &unixdomain || |
1714 | (so->so_proto->pr_flags & PR_RIGHTS) == 0) { |
1715 | mutex_exit(&fp->f_lock); |
1716 | continue; |
1717 | } |
1718 | |
1719 | /* Gain file ref, mark our position, and unlock. */ |
1720 | didwork = true; |
1721 | LIST_INSERT_AFTER(fp, dp, f_list); |
1722 | fp->f_count++; |
1723 | mutex_exit(&fp->f_lock); |
1724 | mutex_exit(&filelist_lock); |
1725 | |
1726 | /* |
1727 | * Mark files referenced from sockets queued on the |
1728 | * accept queue as well. |
1729 | */ |
1730 | solock(so); |
1731 | unp_scan(so->so_rcv.sb_mb, unp_mark, 0); |
1732 | if ((so->so_options & SO_ACCEPTCONN) != 0) { |
1733 | TAILQ_FOREACH(so1, &so->so_q0, so_qe) { |
1734 | unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); |
1735 | } |
1736 | TAILQ_FOREACH(so1, &so->so_q, so_qe) { |
1737 | unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); |
1738 | } |
1739 | } |
1740 | sounlock(so); |
1741 | |
1742 | /* Re-lock and restart from where we left off. */ |
1743 | closef(fp); |
1744 | mutex_enter(&filelist_lock); |
1745 | np = LIST_NEXT(dp, f_list); |
1746 | LIST_REMOVE(dp, f_list); |
1747 | } |
1748 | /* |
1749 | * Bail early if we did nothing in the loop above. Could |
1750 | * happen because of concurrent activity causing unp_defer |
1751 | * to get out of sync. |
1752 | */ |
1753 | } while (unp_defer != 0 && didwork); |
1754 | |
1755 | /* |
1756 | * Sweep pass. |
1757 | * |
1758 | * We grab an extra reference to each of the files that are |
1759 | * not otherwise accessible and then free the rights that are |
1760 | * stored in messages on them. |
1761 | */ |
1762 | for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) { |
1763 | KASSERT(mutex_owned(&filelist_lock)); |
1764 | np = LIST_NEXT(fp, f_list); |
1765 | mutex_enter(&fp->f_lock); |
1766 | |
1767 | /* |
1768 | * Ignore non-sockets. |
1769 | * Ignore dead sockets, or sockets with pending close. |
1770 | * Ignore sockets obviously referenced elsewhere. |
1771 | * Ignore sockets marked as referenced by our scan. |
1772 | * Ignore new sockets that did not exist during the scan. |
1773 | */ |
1774 | if (fp->f_type != DTYPE_SOCKET || |
1775 | fp->f_count == 0 || fp->f_unpcount != 0 || |
1776 | fp->f_count != fp->f_msgcount || |
1777 | (fp->f_flag & (FMARK | FSCAN)) != FSCAN) { |
1778 | mutex_exit(&fp->f_lock); |
1779 | continue; |
1780 | } |
1781 | |
1782 | /* Gain file ref, mark our position, and unlock. */ |
1783 | LIST_INSERT_AFTER(fp, dp, f_list); |
1784 | fp->f_count++; |
1785 | mutex_exit(&fp->f_lock); |
1786 | mutex_exit(&filelist_lock); |
1787 | |
1788 | /* |
1789 | * Flush all data from the socket's receive buffer. |
1790 | * This will cause files referenced only by the |
1791 | * socket to be queued for close. |
1792 | */ |
1793 | so = fp->f_socket; |
1794 | solock(so); |
1795 | sorflush(so); |
1796 | sounlock(so); |
1797 | |
1798 | /* Re-lock and restart from where we left off. */ |
1799 | closef(fp); |
1800 | mutex_enter(&filelist_lock); |
1801 | np = LIST_NEXT(dp, f_list); |
1802 | LIST_REMOVE(dp, f_list); |
1803 | } |
1804 | } |
1805 | |
1806 | /* |
1807 | * Garbage collector thread. While SCM_RIGHTS messages are in transit, |
1808 | * wake once per second to garbage collect. Run continually while we |
1809 | * have deferred closes to process. |
1810 | */ |
1811 | static void |
1812 | unp_thread(void *cookie) |
1813 | { |
1814 | file_t *dp; |
1815 | |
1816 | /* Allocate a dummy file for our scans. */ |
1817 | if ((dp = fgetdummy()) == NULL) { |
1818 | panic("unp_thread" ); |
1819 | } |
1820 | |
1821 | mutex_enter(&filelist_lock); |
1822 | for (;;) { |
1823 | KASSERT(mutex_owned(&filelist_lock)); |
1824 | if (SLIST_EMPTY(&unp_thread_discard)) { |
1825 | if (unp_rights != 0) { |
1826 | (void)cv_timedwait(&unp_thread_cv, |
1827 | &filelist_lock, hz); |
1828 | } else { |
1829 | cv_wait(&unp_thread_cv, &filelist_lock); |
1830 | } |
1831 | } |
1832 | unp_gc(dp); |
1833 | } |
1834 | /* NOTREACHED */ |
1835 | } |
1836 | |
1837 | /* |
1838 | * Kick the garbage collector into action if there is something for |
1839 | * it to process. |
1840 | */ |
1841 | static void |
1842 | unp_thread_kick(void) |
1843 | { |
1844 | |
1845 | if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) { |
1846 | mutex_enter(&filelist_lock); |
1847 | cv_signal(&unp_thread_cv); |
1848 | mutex_exit(&filelist_lock); |
1849 | } |
1850 | } |
1851 | |
1852 | void |
1853 | unp_dispose(struct mbuf *m) |
1854 | { |
1855 | |
1856 | if (m) |
1857 | unp_scan(m, unp_discard_later, 1); |
1858 | } |
1859 | |
1860 | void |
1861 | unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard) |
1862 | { |
1863 | struct mbuf *m; |
1864 | file_t **rp, *fp; |
1865 | struct cmsghdr *cm; |
1866 | int i, qfds; |
1867 | |
1868 | while (m0) { |
1869 | for (m = m0; m; m = m->m_next) { |
1870 | if (m->m_type != MT_CONTROL || |
1871 | m->m_len < sizeof(*cm)) { |
1872 | continue; |
1873 | } |
1874 | cm = mtod(m, struct cmsghdr *); |
1875 | if (cm->cmsg_level != SOL_SOCKET || |
1876 | cm->cmsg_type != SCM_RIGHTS) |
1877 | continue; |
1878 | qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) |
1879 | / sizeof(file_t *); |
1880 | rp = (file_t **)CMSG_DATA(cm); |
1881 | for (i = 0; i < qfds; i++) { |
1882 | fp = *rp; |
1883 | if (discard) { |
1884 | *rp = 0; |
1885 | } |
1886 | (*op)(fp); |
1887 | rp++; |
1888 | } |
1889 | } |
1890 | m0 = m0->m_nextpkt; |
1891 | } |
1892 | } |
1893 | |
1894 | void |
1895 | unp_mark(file_t *fp) |
1896 | { |
1897 | |
1898 | if (fp == NULL) |
1899 | return; |
1900 | |
1901 | /* If we're already deferred, don't screw up the defer count */ |
1902 | mutex_enter(&fp->f_lock); |
1903 | if (fp->f_flag & (FMARK | FDEFER)) { |
1904 | mutex_exit(&fp->f_lock); |
1905 | return; |
1906 | } |
1907 | |
1908 | /* |
1909 | * Minimize the number of deferrals... Sockets are the only type of |
1910 | * file which can hold references to another file, so just mark |
1911 | * other files, and defer unmarked sockets for the next pass. |
1912 | */ |
1913 | if (fp->f_type == DTYPE_SOCKET) { |
1914 | unp_defer++; |
1915 | KASSERT(fp->f_count != 0); |
1916 | atomic_or_uint(&fp->f_flag, FDEFER); |
1917 | } else { |
1918 | atomic_or_uint(&fp->f_flag, FMARK); |
1919 | } |
1920 | mutex_exit(&fp->f_lock); |
1921 | } |
1922 | |
1923 | static void |
1924 | unp_discard_now(file_t *fp) |
1925 | { |
1926 | |
1927 | if (fp == NULL) |
1928 | return; |
1929 | |
1930 | KASSERT(fp->f_count > 0); |
1931 | KASSERT(fp->f_msgcount > 0); |
1932 | |
1933 | mutex_enter(&fp->f_lock); |
1934 | fp->f_msgcount--; |
1935 | mutex_exit(&fp->f_lock); |
1936 | atomic_dec_uint(&unp_rights); |
1937 | (void)closef(fp); |
1938 | } |
1939 | |
1940 | static void |
1941 | unp_discard_later(file_t *fp) |
1942 | { |
1943 | |
1944 | if (fp == NULL) |
1945 | return; |
1946 | |
1947 | KASSERT(fp->f_count > 0); |
1948 | KASSERT(fp->f_msgcount > 0); |
1949 | |
1950 | mutex_enter(&filelist_lock); |
1951 | if (fp->f_unpcount++ == 0) { |
1952 | SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist); |
1953 | } |
1954 | mutex_exit(&filelist_lock); |
1955 | } |
1956 | |
1957 | const struct pr_usrreqs unp_usrreqs = { |
1958 | .pr_attach = unp_attach, |
1959 | .pr_detach = unp_detach, |
1960 | .pr_accept = unp_accept, |
1961 | .pr_bind = unp_bind, |
1962 | .pr_listen = unp_listen, |
1963 | .pr_connect = unp_connect, |
1964 | .pr_connect2 = unp_connect2, |
1965 | .pr_disconnect = unp_disconnect, |
1966 | .pr_shutdown = unp_shutdown, |
1967 | .pr_abort = unp_abort, |
1968 | .pr_ioctl = unp_ioctl, |
1969 | .pr_stat = unp_stat, |
1970 | .pr_peeraddr = unp_peeraddr, |
1971 | .pr_sockaddr = unp_sockaddr, |
1972 | .pr_rcvd = unp_rcvd, |
1973 | .pr_recvoob = unp_recvoob, |
1974 | .pr_send = unp_send, |
1975 | .pr_sendoob = unp_sendoob, |
1976 | }; |
1977 | |