1/* $NetBSD: nfs_srvsocket.c,v 1.4 2009/09/03 20:59:12 tls Exp $ */
2
3/*
4 * Copyright (c) 1989, 1991, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
35 */
36
37/*
38 * Socket operations for use by nfs
39 */
40
41#include <sys/cdefs.h>
42__KERNEL_RCSID(0, "$NetBSD: nfs_srvsocket.c,v 1.4 2009/09/03 20:59:12 tls Exp $");
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/evcnt.h>
47#include <sys/callout.h>
48#include <sys/proc.h>
49#include <sys/mount.h>
50#include <sys/kernel.h>
51#include <sys/kmem.h>
52#include <sys/mbuf.h>
53#include <sys/vnode.h>
54#include <sys/domain.h>
55#include <sys/protosw.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/syslog.h>
59#include <sys/tprintf.h>
60#include <sys/namei.h>
61#include <sys/signal.h>
62#include <sys/signalvar.h>
63#include <sys/kauth.h>
64
65#include <netinet/in.h>
66#include <netinet/tcp.h>
67
68#include <nfs/rpcv2.h>
69#include <nfs/nfsproto.h>
70#include <nfs/nfs.h>
71#include <nfs/xdr_subs.h>
72#include <nfs/nfsm_subs.h>
73#include <nfs/nfsmount.h>
74#include <nfs/nfsnode.h>
75#include <nfs/nfsrtt.h>
76#include <nfs/nfs_var.h>
77
78static void nfsrv_wakenfsd_locked(struct nfssvc_sock *);
79
80int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *,
81 struct nfssvc_sock *, struct lwp *,
82 struct mbuf **) = {
83 nfsrv_null,
84 nfsrv_getattr,
85 nfsrv_setattr,
86 nfsrv_lookup,
87 nfsrv3_access,
88 nfsrv_readlink,
89 nfsrv_read,
90 nfsrv_write,
91 nfsrv_create,
92 nfsrv_mkdir,
93 nfsrv_symlink,
94 nfsrv_mknod,
95 nfsrv_remove,
96 nfsrv_rmdir,
97 nfsrv_rename,
98 nfsrv_link,
99 nfsrv_readdir,
100 nfsrv_readdirplus,
101 nfsrv_statfs,
102 nfsrv_fsinfo,
103 nfsrv_pathconf,
104 nfsrv_commit,
105 nfsrv_noop
106};
107
108/*
109 * Socket upcall routine for the nfsd sockets.
110 * The void *arg is a pointer to the "struct nfssvc_sock".
111 */
112void
113nfsrv_soupcall(struct socket *so, void *arg, int events, int waitflag)
114{
115 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
116
117 nfsdsock_setbits(slp, SLP_A_NEEDQ);
118 nfsrv_wakenfsd(slp);
119}
120
121void
122nfsrv_rcv(struct nfssvc_sock *slp)
123{
124 struct socket *so;
125 struct mbuf *m;
126 struct mbuf *mp, *nam;
127 struct uio auio;
128 int flags;
129 int error;
130 int setflags = 0;
131
132 error = nfsdsock_lock(slp, true);
133 if (error) {
134 setflags |= SLP_A_NEEDQ;
135 goto dorecs_unlocked;
136 }
137
138 nfsdsock_clearbits(slp, SLP_A_NEEDQ);
139
140 so = slp->ns_so;
141 if (so->so_type == SOCK_STREAM) {
142 /*
143 * Do soreceive().
144 */
145 auio.uio_resid = 1000000000;
146 /* not need to setup uio_vmspace */
147 flags = MSG_DONTWAIT;
148 error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags);
149 if (error || mp == NULL) {
150 if (error == EWOULDBLOCK)
151 setflags |= SLP_A_NEEDQ;
152 else
153 setflags |= SLP_A_DISCONN;
154 goto dorecs;
155 }
156 m = mp;
157 m_claimm(m, &nfs_mowner);
158 if (slp->ns_rawend) {
159 slp->ns_rawend->m_next = m;
160 slp->ns_cc += 1000000000 - auio.uio_resid;
161 } else {
162 slp->ns_raw = m;
163 slp->ns_cc = 1000000000 - auio.uio_resid;
164 }
165 while (m->m_next)
166 m = m->m_next;
167 slp->ns_rawend = m;
168
169 /*
170 * Now try and parse record(s) out of the raw stream data.
171 */
172 error = nfsrv_getstream(slp, M_WAIT);
173 if (error) {
174 if (error == EPERM)
175 setflags |= SLP_A_DISCONN;
176 else
177 setflags |= SLP_A_NEEDQ;
178 }
179 } else {
180 do {
181 auio.uio_resid = 1000000000;
182 /* not need to setup uio_vmspace */
183 flags = MSG_DONTWAIT;
184 error = (*so->so_receive)(so, &nam, &auio, &mp, NULL,
185 &flags);
186 if (mp) {
187 if (nam) {
188 m = nam;
189 m->m_next = mp;
190 } else
191 m = mp;
192 m_claimm(m, &nfs_mowner);
193 if (slp->ns_recend)
194 slp->ns_recend->m_nextpkt = m;
195 else
196 slp->ns_rec = m;
197 slp->ns_recend = m;
198 m->m_nextpkt = (struct mbuf *)0;
199 }
200 if (error) {
201 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
202 && error != EWOULDBLOCK) {
203 setflags |= SLP_A_DISCONN;
204 goto dorecs;
205 }
206 }
207 } while (mp);
208 }
209dorecs:
210 nfsdsock_unlock(slp);
211
212dorecs_unlocked:
213 if (setflags) {
214 nfsdsock_setbits(slp, setflags);
215 }
216}
217
218int
219nfsdsock_lock(struct nfssvc_sock *slp, bool waitok)
220{
221
222 mutex_enter(&slp->ns_lock);
223 while ((~slp->ns_flags & (SLP_BUSY|SLP_VALID)) == 0) {
224 if (!waitok) {
225 mutex_exit(&slp->ns_lock);
226 return EWOULDBLOCK;
227 }
228 cv_wait(&slp->ns_cv, &slp->ns_lock);
229 }
230 if ((slp->ns_flags & SLP_VALID) == 0) {
231 mutex_exit(&slp->ns_lock);
232 return EINVAL;
233 }
234 KASSERT((slp->ns_flags & SLP_BUSY) == 0);
235 slp->ns_flags |= SLP_BUSY;
236 mutex_exit(&slp->ns_lock);
237
238 return 0;
239}
240
241void
242nfsdsock_unlock(struct nfssvc_sock *slp)
243{
244
245 mutex_enter(&slp->ns_lock);
246 KASSERT((slp->ns_flags & SLP_BUSY) != 0);
247 cv_broadcast(&slp->ns_cv);
248 slp->ns_flags &= ~SLP_BUSY;
249 mutex_exit(&slp->ns_lock);
250}
251
252int
253nfsdsock_drain(struct nfssvc_sock *slp)
254{
255 int error = 0;
256
257 mutex_enter(&slp->ns_lock);
258 if ((slp->ns_flags & SLP_VALID) == 0) {
259 error = EINVAL;
260 goto done;
261 }
262 slp->ns_flags &= ~SLP_VALID;
263 while ((slp->ns_flags & SLP_BUSY) != 0) {
264 cv_wait(&slp->ns_cv, &slp->ns_lock);
265 }
266done:
267 mutex_exit(&slp->ns_lock);
268
269 return error;
270}
271
272/*
273 * Try and extract an RPC request from the mbuf data list received on a
274 * stream socket. The "waitflag" argument indicates whether or not it
275 * can sleep.
276 */
277int
278nfsrv_getstream(struct nfssvc_sock *slp, int waitflag)
279{
280 struct mbuf *m, **mpp;
281 struct mbuf *recm;
282 u_int32_t recmark;
283 int error = 0;
284
285 KASSERT((slp->ns_flags & SLP_BUSY) != 0);
286 for (;;) {
287 if (slp->ns_reclen == 0) {
288 if (slp->ns_cc < NFSX_UNSIGNED) {
289 break;
290 }
291 m = slp->ns_raw;
292 m_copydata(m, 0, NFSX_UNSIGNED, (void *)&recmark);
293 m_adj(m, NFSX_UNSIGNED);
294 slp->ns_cc -= NFSX_UNSIGNED;
295 recmark = ntohl(recmark);
296 slp->ns_reclen = recmark & ~0x80000000;
297 if (recmark & 0x80000000)
298 slp->ns_sflags |= SLP_S_LASTFRAG;
299 else
300 slp->ns_sflags &= ~SLP_S_LASTFRAG;
301 if (slp->ns_reclen > NFS_MAXPACKET) {
302 error = EPERM;
303 break;
304 }
305 }
306
307 /*
308 * Now get the record part.
309 *
310 * Note that slp->ns_reclen may be 0. Linux sometimes
311 * generates 0-length records.
312 */
313 if (slp->ns_cc == slp->ns_reclen) {
314 recm = slp->ns_raw;
315 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
316 slp->ns_cc = slp->ns_reclen = 0;
317 } else if (slp->ns_cc > slp->ns_reclen) {
318 recm = slp->ns_raw;
319 m = m_split(recm, slp->ns_reclen, waitflag);
320 if (m == NULL) {
321 error = EWOULDBLOCK;
322 break;
323 }
324 m_claimm(recm, &nfs_mowner);
325 slp->ns_raw = m;
326 if (m->m_next == NULL)
327 slp->ns_rawend = m;
328 slp->ns_cc -= slp->ns_reclen;
329 slp->ns_reclen = 0;
330 } else {
331 break;
332 }
333
334 /*
335 * Accumulate the fragments into a record.
336 */
337 mpp = &slp->ns_frag;
338 while (*mpp)
339 mpp = &((*mpp)->m_next);
340 *mpp = recm;
341 if (slp->ns_sflags & SLP_S_LASTFRAG) {
342 if (slp->ns_recend)
343 slp->ns_recend->m_nextpkt = slp->ns_frag;
344 else
345 slp->ns_rec = slp->ns_frag;
346 slp->ns_recend = slp->ns_frag;
347 slp->ns_frag = NULL;
348 }
349 }
350
351 return error;
352}
353
354/*
355 * Parse an RPC header.
356 */
357int
358nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
359 struct nfsrv_descript **ndp, bool *more)
360{
361 struct mbuf *m, *nam;
362 struct nfsrv_descript *nd;
363 int error;
364
365 *ndp = NULL;
366 *more = false;
367
368 if (nfsdsock_lock(slp, true)) {
369 return ENOBUFS;
370 }
371 m = slp->ns_rec;
372 if (m == NULL) {
373 nfsdsock_unlock(slp);
374 return ENOBUFS;
375 }
376 slp->ns_rec = m->m_nextpkt;
377 if (slp->ns_rec) {
378 m->m_nextpkt = NULL;
379 *more = true;
380 } else {
381 slp->ns_recend = NULL;
382 }
383 nfsdsock_unlock(slp);
384
385 if (m->m_type == MT_SONAME) {
386 nam = m;
387 m = m->m_next;
388 nam->m_next = NULL;
389 } else
390 nam = NULL;
391 nd = nfsdreq_alloc();
392 nd->nd_md = nd->nd_mrep = m;
393 nd->nd_nam2 = nam;
394 nd->nd_dpos = mtod(m, void *);
395 error = nfs_getreq(nd, nfsd, true);
396 if (error) {
397 m_freem(nam);
398 nfsdreq_free(nd);
399 return (error);
400 }
401 *ndp = nd;
402 nfsd->nfsd_nd = nd;
403 return (0);
404}
405
406bool
407nfsrv_timer(void)
408{
409 struct timeval tv;
410 struct nfssvc_sock *slp;
411 u_quad_t cur_usec;
412 struct nfsrv_descript *nd;
413 bool more;
414
415 /*
416 * Scan the write gathering queues for writes that need to be
417 * completed now.
418 */
419 getmicrotime(&tv);
420 cur_usec = (u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec;
421 more = false;
422 mutex_enter(&nfsd_lock);
423 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
424 nd = LIST_FIRST(&slp->ns_tq);
425 if (nd != NULL) {
426 if (nd->nd_time <= cur_usec) {
427 nfsrv_wakenfsd_locked(slp);
428 }
429 more = true;
430 }
431 }
432 mutex_exit(&nfsd_lock);
433 return more;
434}
435
436/*
437 * Search for a sleeping nfsd and wake it up.
438 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
439 * running nfsds will go look for the work in the nfssvc_sock list.
440 */
441static void
442nfsrv_wakenfsd_locked(struct nfssvc_sock *slp)
443{
444 struct nfsd *nd;
445
446 KASSERT(mutex_owned(&nfsd_lock));
447
448 if ((slp->ns_flags & SLP_VALID) == 0)
449 return;
450 if (slp->ns_gflags & SLP_G_DOREC)
451 return;
452 nd = SLIST_FIRST(&nfsd_idle_head);
453 if (nd) {
454 SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle);
455 if (nd->nfsd_slp)
456 panic("nfsd wakeup");
457 slp->ns_sref++;
458 KASSERT(slp->ns_sref > 0);
459 nd->nfsd_slp = slp;
460 cv_signal(&nd->nfsd_cv);
461 } else {
462 slp->ns_gflags |= SLP_G_DOREC;
463 nfsd_head_flag |= NFSD_CHECKSLP;
464 TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending);
465 }
466}
467
468void
469nfsrv_wakenfsd(struct nfssvc_sock *slp)
470{
471
472 mutex_enter(&nfsd_lock);
473 nfsrv_wakenfsd_locked(slp);
474 mutex_exit(&nfsd_lock);
475}
476
477int
478nfsdsock_sendreply(struct nfssvc_sock *slp, struct nfsrv_descript *nd)
479{
480 int error;
481
482 if (nd->nd_mrep != NULL) {
483 m_freem(nd->nd_mrep);
484 nd->nd_mrep = NULL;
485 }
486
487 mutex_enter(&slp->ns_lock);
488 if ((slp->ns_flags & SLP_SENDING) != 0) {
489 SIMPLEQ_INSERT_TAIL(&slp->ns_sendq, nd, nd_sendq);
490 mutex_exit(&slp->ns_lock);
491 return 0;
492 }
493 KASSERT(SIMPLEQ_EMPTY(&slp->ns_sendq));
494 slp->ns_flags |= SLP_SENDING;
495 mutex_exit(&slp->ns_lock);
496
497again:
498 error = nfs_send(slp->ns_so, nd->nd_nam2, nd->nd_mreq, NULL, curlwp);
499 if (nd->nd_nam2) {
500 m_free(nd->nd_nam2);
501 }
502 nfsdreq_free(nd);
503
504 mutex_enter(&slp->ns_lock);
505 KASSERT((slp->ns_flags & SLP_SENDING) != 0);
506 nd = SIMPLEQ_FIRST(&slp->ns_sendq);
507 if (nd != NULL) {
508 SIMPLEQ_REMOVE_HEAD(&slp->ns_sendq, nd_sendq);
509 mutex_exit(&slp->ns_lock);
510 goto again;
511 }
512 slp->ns_flags &= ~SLP_SENDING;
513 mutex_exit(&slp->ns_lock);
514
515 return error;
516}
517
518void
519nfsdsock_setbits(struct nfssvc_sock *slp, int bits)
520{
521
522 mutex_enter(&slp->ns_alock);
523 slp->ns_aflags |= bits;
524 mutex_exit(&slp->ns_alock);
525}
526
527void
528nfsdsock_clearbits(struct nfssvc_sock *slp, int bits)
529{
530
531 mutex_enter(&slp->ns_alock);
532 slp->ns_aflags &= ~bits;
533 mutex_exit(&slp->ns_alock);
534}
535
536bool
537nfsdsock_testbits(struct nfssvc_sock *slp, int bits)
538{
539
540 return (slp->ns_aflags & bits);
541}
542