1 | /* $NetBSD: nfs_srvsocket.c,v 1.4 2009/09/03 20:59:12 tls Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1989, 1991, 1993, 1995 |
5 | * The Regents of the University of California. All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to Berkeley by |
8 | * Rick Macklem at The University of Guelph. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * 3. Neither the name of the University nor the names of its contributors |
19 | * may be used to endorse or promote products derived from this software |
20 | * without specific prior written permission. |
21 | * |
22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 | * SUCH DAMAGE. |
33 | * |
34 | * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 |
35 | */ |
36 | |
37 | /* |
38 | * Socket operations for use by nfs |
39 | */ |
40 | |
41 | #include <sys/cdefs.h> |
42 | __KERNEL_RCSID(0, "$NetBSD: nfs_srvsocket.c,v 1.4 2009/09/03 20:59:12 tls Exp $" ); |
43 | |
44 | #include <sys/param.h> |
45 | #include <sys/systm.h> |
46 | #include <sys/evcnt.h> |
47 | #include <sys/callout.h> |
48 | #include <sys/proc.h> |
49 | #include <sys/mount.h> |
50 | #include <sys/kernel.h> |
51 | #include <sys/kmem.h> |
52 | #include <sys/mbuf.h> |
53 | #include <sys/vnode.h> |
54 | #include <sys/domain.h> |
55 | #include <sys/protosw.h> |
56 | #include <sys/socket.h> |
57 | #include <sys/socketvar.h> |
58 | #include <sys/syslog.h> |
59 | #include <sys/tprintf.h> |
60 | #include <sys/namei.h> |
61 | #include <sys/signal.h> |
62 | #include <sys/signalvar.h> |
63 | #include <sys/kauth.h> |
64 | |
65 | #include <netinet/in.h> |
66 | #include <netinet/tcp.h> |
67 | |
68 | #include <nfs/rpcv2.h> |
69 | #include <nfs/nfsproto.h> |
70 | #include <nfs/nfs.h> |
71 | #include <nfs/xdr_subs.h> |
72 | #include <nfs/nfsm_subs.h> |
73 | #include <nfs/nfsmount.h> |
74 | #include <nfs/nfsnode.h> |
75 | #include <nfs/nfsrtt.h> |
76 | #include <nfs/nfs_var.h> |
77 | |
78 | static void nfsrv_wakenfsd_locked(struct nfssvc_sock *); |
79 | |
80 | int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *, |
81 | struct nfssvc_sock *, struct lwp *, |
82 | struct mbuf **) = { |
83 | nfsrv_null, |
84 | nfsrv_getattr, |
85 | nfsrv_setattr, |
86 | nfsrv_lookup, |
87 | nfsrv3_access, |
88 | nfsrv_readlink, |
89 | nfsrv_read, |
90 | nfsrv_write, |
91 | nfsrv_create, |
92 | nfsrv_mkdir, |
93 | nfsrv_symlink, |
94 | nfsrv_mknod, |
95 | nfsrv_remove, |
96 | nfsrv_rmdir, |
97 | nfsrv_rename, |
98 | nfsrv_link, |
99 | nfsrv_readdir, |
100 | nfsrv_readdirplus, |
101 | nfsrv_statfs, |
102 | nfsrv_fsinfo, |
103 | nfsrv_pathconf, |
104 | nfsrv_commit, |
105 | nfsrv_noop |
106 | }; |
107 | |
108 | /* |
109 | * Socket upcall routine for the nfsd sockets. |
110 | * The void *arg is a pointer to the "struct nfssvc_sock". |
111 | */ |
112 | void |
113 | nfsrv_soupcall(struct socket *so, void *arg, int events, int waitflag) |
114 | { |
115 | struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; |
116 | |
117 | nfsdsock_setbits(slp, SLP_A_NEEDQ); |
118 | nfsrv_wakenfsd(slp); |
119 | } |
120 | |
121 | void |
122 | nfsrv_rcv(struct nfssvc_sock *slp) |
123 | { |
124 | struct socket *so; |
125 | struct mbuf *m; |
126 | struct mbuf *mp, *nam; |
127 | struct uio auio; |
128 | int flags; |
129 | int error; |
130 | int setflags = 0; |
131 | |
132 | error = nfsdsock_lock(slp, true); |
133 | if (error) { |
134 | setflags |= SLP_A_NEEDQ; |
135 | goto dorecs_unlocked; |
136 | } |
137 | |
138 | nfsdsock_clearbits(slp, SLP_A_NEEDQ); |
139 | |
140 | so = slp->ns_so; |
141 | if (so->so_type == SOCK_STREAM) { |
142 | /* |
143 | * Do soreceive(). |
144 | */ |
145 | auio.uio_resid = 1000000000; |
146 | /* not need to setup uio_vmspace */ |
147 | flags = MSG_DONTWAIT; |
148 | error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags); |
149 | if (error || mp == NULL) { |
150 | if (error == EWOULDBLOCK) |
151 | setflags |= SLP_A_NEEDQ; |
152 | else |
153 | setflags |= SLP_A_DISCONN; |
154 | goto dorecs; |
155 | } |
156 | m = mp; |
157 | m_claimm(m, &nfs_mowner); |
158 | if (slp->ns_rawend) { |
159 | slp->ns_rawend->m_next = m; |
160 | slp->ns_cc += 1000000000 - auio.uio_resid; |
161 | } else { |
162 | slp->ns_raw = m; |
163 | slp->ns_cc = 1000000000 - auio.uio_resid; |
164 | } |
165 | while (m->m_next) |
166 | m = m->m_next; |
167 | slp->ns_rawend = m; |
168 | |
169 | /* |
170 | * Now try and parse record(s) out of the raw stream data. |
171 | */ |
172 | error = nfsrv_getstream(slp, M_WAIT); |
173 | if (error) { |
174 | if (error == EPERM) |
175 | setflags |= SLP_A_DISCONN; |
176 | else |
177 | setflags |= SLP_A_NEEDQ; |
178 | } |
179 | } else { |
180 | do { |
181 | auio.uio_resid = 1000000000; |
182 | /* not need to setup uio_vmspace */ |
183 | flags = MSG_DONTWAIT; |
184 | error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, |
185 | &flags); |
186 | if (mp) { |
187 | if (nam) { |
188 | m = nam; |
189 | m->m_next = mp; |
190 | } else |
191 | m = mp; |
192 | m_claimm(m, &nfs_mowner); |
193 | if (slp->ns_recend) |
194 | slp->ns_recend->m_nextpkt = m; |
195 | else |
196 | slp->ns_rec = m; |
197 | slp->ns_recend = m; |
198 | m->m_nextpkt = (struct mbuf *)0; |
199 | } |
200 | if (error) { |
201 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) |
202 | && error != EWOULDBLOCK) { |
203 | setflags |= SLP_A_DISCONN; |
204 | goto dorecs; |
205 | } |
206 | } |
207 | } while (mp); |
208 | } |
209 | dorecs: |
210 | nfsdsock_unlock(slp); |
211 | |
212 | dorecs_unlocked: |
213 | if (setflags) { |
214 | nfsdsock_setbits(slp, setflags); |
215 | } |
216 | } |
217 | |
218 | int |
219 | nfsdsock_lock(struct nfssvc_sock *slp, bool waitok) |
220 | { |
221 | |
222 | mutex_enter(&slp->ns_lock); |
223 | while ((~slp->ns_flags & (SLP_BUSY|SLP_VALID)) == 0) { |
224 | if (!waitok) { |
225 | mutex_exit(&slp->ns_lock); |
226 | return EWOULDBLOCK; |
227 | } |
228 | cv_wait(&slp->ns_cv, &slp->ns_lock); |
229 | } |
230 | if ((slp->ns_flags & SLP_VALID) == 0) { |
231 | mutex_exit(&slp->ns_lock); |
232 | return EINVAL; |
233 | } |
234 | KASSERT((slp->ns_flags & SLP_BUSY) == 0); |
235 | slp->ns_flags |= SLP_BUSY; |
236 | mutex_exit(&slp->ns_lock); |
237 | |
238 | return 0; |
239 | } |
240 | |
241 | void |
242 | nfsdsock_unlock(struct nfssvc_sock *slp) |
243 | { |
244 | |
245 | mutex_enter(&slp->ns_lock); |
246 | KASSERT((slp->ns_flags & SLP_BUSY) != 0); |
247 | cv_broadcast(&slp->ns_cv); |
248 | slp->ns_flags &= ~SLP_BUSY; |
249 | mutex_exit(&slp->ns_lock); |
250 | } |
251 | |
252 | int |
253 | nfsdsock_drain(struct nfssvc_sock *slp) |
254 | { |
255 | int error = 0; |
256 | |
257 | mutex_enter(&slp->ns_lock); |
258 | if ((slp->ns_flags & SLP_VALID) == 0) { |
259 | error = EINVAL; |
260 | goto done; |
261 | } |
262 | slp->ns_flags &= ~SLP_VALID; |
263 | while ((slp->ns_flags & SLP_BUSY) != 0) { |
264 | cv_wait(&slp->ns_cv, &slp->ns_lock); |
265 | } |
266 | done: |
267 | mutex_exit(&slp->ns_lock); |
268 | |
269 | return error; |
270 | } |
271 | |
272 | /* |
273 | * Try and extract an RPC request from the mbuf data list received on a |
274 | * stream socket. The "waitflag" argument indicates whether or not it |
275 | * can sleep. |
276 | */ |
277 | int |
278 | nfsrv_getstream(struct nfssvc_sock *slp, int waitflag) |
279 | { |
280 | struct mbuf *m, **mpp; |
281 | struct mbuf *recm; |
282 | u_int32_t recmark; |
283 | int error = 0; |
284 | |
285 | KASSERT((slp->ns_flags & SLP_BUSY) != 0); |
286 | for (;;) { |
287 | if (slp->ns_reclen == 0) { |
288 | if (slp->ns_cc < NFSX_UNSIGNED) { |
289 | break; |
290 | } |
291 | m = slp->ns_raw; |
292 | m_copydata(m, 0, NFSX_UNSIGNED, (void *)&recmark); |
293 | m_adj(m, NFSX_UNSIGNED); |
294 | slp->ns_cc -= NFSX_UNSIGNED; |
295 | recmark = ntohl(recmark); |
296 | slp->ns_reclen = recmark & ~0x80000000; |
297 | if (recmark & 0x80000000) |
298 | slp->ns_sflags |= SLP_S_LASTFRAG; |
299 | else |
300 | slp->ns_sflags &= ~SLP_S_LASTFRAG; |
301 | if (slp->ns_reclen > NFS_MAXPACKET) { |
302 | error = EPERM; |
303 | break; |
304 | } |
305 | } |
306 | |
307 | /* |
308 | * Now get the record part. |
309 | * |
310 | * Note that slp->ns_reclen may be 0. Linux sometimes |
311 | * generates 0-length records. |
312 | */ |
313 | if (slp->ns_cc == slp->ns_reclen) { |
314 | recm = slp->ns_raw; |
315 | slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; |
316 | slp->ns_cc = slp->ns_reclen = 0; |
317 | } else if (slp->ns_cc > slp->ns_reclen) { |
318 | recm = slp->ns_raw; |
319 | m = m_split(recm, slp->ns_reclen, waitflag); |
320 | if (m == NULL) { |
321 | error = EWOULDBLOCK; |
322 | break; |
323 | } |
324 | m_claimm(recm, &nfs_mowner); |
325 | slp->ns_raw = m; |
326 | if (m->m_next == NULL) |
327 | slp->ns_rawend = m; |
328 | slp->ns_cc -= slp->ns_reclen; |
329 | slp->ns_reclen = 0; |
330 | } else { |
331 | break; |
332 | } |
333 | |
334 | /* |
335 | * Accumulate the fragments into a record. |
336 | */ |
337 | mpp = &slp->ns_frag; |
338 | while (*mpp) |
339 | mpp = &((*mpp)->m_next); |
340 | *mpp = recm; |
341 | if (slp->ns_sflags & SLP_S_LASTFRAG) { |
342 | if (slp->ns_recend) |
343 | slp->ns_recend->m_nextpkt = slp->ns_frag; |
344 | else |
345 | slp->ns_rec = slp->ns_frag; |
346 | slp->ns_recend = slp->ns_frag; |
347 | slp->ns_frag = NULL; |
348 | } |
349 | } |
350 | |
351 | return error; |
352 | } |
353 | |
354 | /* |
355 | * Parse an RPC header. |
356 | */ |
357 | int |
358 | nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd, |
359 | struct nfsrv_descript **ndp, bool *more) |
360 | { |
361 | struct mbuf *m, *nam; |
362 | struct nfsrv_descript *nd; |
363 | int error; |
364 | |
365 | *ndp = NULL; |
366 | *more = false; |
367 | |
368 | if (nfsdsock_lock(slp, true)) { |
369 | return ENOBUFS; |
370 | } |
371 | m = slp->ns_rec; |
372 | if (m == NULL) { |
373 | nfsdsock_unlock(slp); |
374 | return ENOBUFS; |
375 | } |
376 | slp->ns_rec = m->m_nextpkt; |
377 | if (slp->ns_rec) { |
378 | m->m_nextpkt = NULL; |
379 | *more = true; |
380 | } else { |
381 | slp->ns_recend = NULL; |
382 | } |
383 | nfsdsock_unlock(slp); |
384 | |
385 | if (m->m_type == MT_SONAME) { |
386 | nam = m; |
387 | m = m->m_next; |
388 | nam->m_next = NULL; |
389 | } else |
390 | nam = NULL; |
391 | nd = nfsdreq_alloc(); |
392 | nd->nd_md = nd->nd_mrep = m; |
393 | nd->nd_nam2 = nam; |
394 | nd->nd_dpos = mtod(m, void *); |
395 | error = nfs_getreq(nd, nfsd, true); |
396 | if (error) { |
397 | m_freem(nam); |
398 | nfsdreq_free(nd); |
399 | return (error); |
400 | } |
401 | *ndp = nd; |
402 | nfsd->nfsd_nd = nd; |
403 | return (0); |
404 | } |
405 | |
406 | bool |
407 | nfsrv_timer(void) |
408 | { |
409 | struct timeval tv; |
410 | struct nfssvc_sock *slp; |
411 | u_quad_t cur_usec; |
412 | struct nfsrv_descript *nd; |
413 | bool more; |
414 | |
415 | /* |
416 | * Scan the write gathering queues for writes that need to be |
417 | * completed now. |
418 | */ |
419 | getmicrotime(&tv); |
420 | cur_usec = (u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec; |
421 | more = false; |
422 | mutex_enter(&nfsd_lock); |
423 | TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) { |
424 | nd = LIST_FIRST(&slp->ns_tq); |
425 | if (nd != NULL) { |
426 | if (nd->nd_time <= cur_usec) { |
427 | nfsrv_wakenfsd_locked(slp); |
428 | } |
429 | more = true; |
430 | } |
431 | } |
432 | mutex_exit(&nfsd_lock); |
433 | return more; |
434 | } |
435 | |
436 | /* |
437 | * Search for a sleeping nfsd and wake it up. |
438 | * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the |
439 | * running nfsds will go look for the work in the nfssvc_sock list. |
440 | */ |
441 | static void |
442 | nfsrv_wakenfsd_locked(struct nfssvc_sock *slp) |
443 | { |
444 | struct nfsd *nd; |
445 | |
446 | KASSERT(mutex_owned(&nfsd_lock)); |
447 | |
448 | if ((slp->ns_flags & SLP_VALID) == 0) |
449 | return; |
450 | if (slp->ns_gflags & SLP_G_DOREC) |
451 | return; |
452 | nd = SLIST_FIRST(&nfsd_idle_head); |
453 | if (nd) { |
454 | SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle); |
455 | if (nd->nfsd_slp) |
456 | panic("nfsd wakeup" ); |
457 | slp->ns_sref++; |
458 | KASSERT(slp->ns_sref > 0); |
459 | nd->nfsd_slp = slp; |
460 | cv_signal(&nd->nfsd_cv); |
461 | } else { |
462 | slp->ns_gflags |= SLP_G_DOREC; |
463 | nfsd_head_flag |= NFSD_CHECKSLP; |
464 | TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending); |
465 | } |
466 | } |
467 | |
468 | void |
469 | nfsrv_wakenfsd(struct nfssvc_sock *slp) |
470 | { |
471 | |
472 | mutex_enter(&nfsd_lock); |
473 | nfsrv_wakenfsd_locked(slp); |
474 | mutex_exit(&nfsd_lock); |
475 | } |
476 | |
477 | int |
478 | nfsdsock_sendreply(struct nfssvc_sock *slp, struct nfsrv_descript *nd) |
479 | { |
480 | int error; |
481 | |
482 | if (nd->nd_mrep != NULL) { |
483 | m_freem(nd->nd_mrep); |
484 | nd->nd_mrep = NULL; |
485 | } |
486 | |
487 | mutex_enter(&slp->ns_lock); |
488 | if ((slp->ns_flags & SLP_SENDING) != 0) { |
489 | SIMPLEQ_INSERT_TAIL(&slp->ns_sendq, nd, nd_sendq); |
490 | mutex_exit(&slp->ns_lock); |
491 | return 0; |
492 | } |
493 | KASSERT(SIMPLEQ_EMPTY(&slp->ns_sendq)); |
494 | slp->ns_flags |= SLP_SENDING; |
495 | mutex_exit(&slp->ns_lock); |
496 | |
497 | again: |
498 | error = nfs_send(slp->ns_so, nd->nd_nam2, nd->nd_mreq, NULL, curlwp); |
499 | if (nd->nd_nam2) { |
500 | m_free(nd->nd_nam2); |
501 | } |
502 | nfsdreq_free(nd); |
503 | |
504 | mutex_enter(&slp->ns_lock); |
505 | KASSERT((slp->ns_flags & SLP_SENDING) != 0); |
506 | nd = SIMPLEQ_FIRST(&slp->ns_sendq); |
507 | if (nd != NULL) { |
508 | SIMPLEQ_REMOVE_HEAD(&slp->ns_sendq, nd_sendq); |
509 | mutex_exit(&slp->ns_lock); |
510 | goto again; |
511 | } |
512 | slp->ns_flags &= ~SLP_SENDING; |
513 | mutex_exit(&slp->ns_lock); |
514 | |
515 | return error; |
516 | } |
517 | |
518 | void |
519 | nfsdsock_setbits(struct nfssvc_sock *slp, int bits) |
520 | { |
521 | |
522 | mutex_enter(&slp->ns_alock); |
523 | slp->ns_aflags |= bits; |
524 | mutex_exit(&slp->ns_alock); |
525 | } |
526 | |
527 | void |
528 | nfsdsock_clearbits(struct nfssvc_sock *slp, int bits) |
529 | { |
530 | |
531 | mutex_enter(&slp->ns_alock); |
532 | slp->ns_aflags &= ~bits; |
533 | mutex_exit(&slp->ns_alock); |
534 | } |
535 | |
536 | bool |
537 | nfsdsock_testbits(struct nfssvc_sock *slp, int bits) |
538 | { |
539 | |
540 | return (slp->ns_aflags & bits); |
541 | } |
542 | |