1/* $NetBSD: nfs_clntsocket.c,v 1.5 2016/06/17 14:28:29 christos Exp $ */
2
3/*
4 * Copyright (c) 1989, 1991, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
35 */
36
37/*
38 * Socket operations for use by nfs
39 */
40
41#include <sys/cdefs.h>
42__KERNEL_RCSID(0, "$NetBSD: nfs_clntsocket.c,v 1.5 2016/06/17 14:28:29 christos Exp $");
43
44#ifdef _KERNEL_OPT
45#include "opt_nfs.h"
46#include "opt_mbuftrace.h"
47#endif
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/evcnt.h>
52#include <sys/callout.h>
53#include <sys/proc.h>
54#include <sys/mount.h>
55#include <sys/kernel.h>
56#include <sys/kmem.h>
57#include <sys/mbuf.h>
58#include <sys/vnode.h>
59#include <sys/domain.h>
60#include <sys/protosw.h>
61#include <sys/socket.h>
62#include <sys/socketvar.h>
63#include <sys/syslog.h>
64#include <sys/tprintf.h>
65#include <sys/namei.h>
66#include <sys/signal.h>
67#include <sys/signalvar.h>
68#include <sys/kauth.h>
69
70#include <netinet/in.h>
71#include <netinet/tcp.h>
72
73#include <nfs/rpcv2.h>
74#include <nfs/nfsproto.h>
75#include <nfs/nfs.h>
76#include <nfs/xdr_subs.h>
77#include <nfs/nfsm_subs.h>
78#include <nfs/nfsmount.h>
79#include <nfs/nfsnode.h>
80#include <nfs/nfsrtt.h>
81#include <nfs/nfs_var.h>
82
83static int nfs_sndlock(struct nfsmount *, struct nfsreq *);
84static void nfs_sndunlock(struct nfsmount *);
85
86/*
87 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
88 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
89 * Mark and consolidate the data into a new mbuf list.
90 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
91 * small mbufs.
92 * For SOCK_STREAM we must be very careful to read an entire record once
93 * we have read any of it, even if the system call has been interrupted.
94 */
95static int
96nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp,
97 struct lwp *l)
98{
99 struct socket *so;
100 struct uio auio;
101 struct iovec aio;
102 struct mbuf *m;
103 struct mbuf *control;
104 u_int32_t len;
105 struct mbuf **getnam;
106 int error, sotype, rcvflg;
107
108 /*
109 * Set up arguments for soreceive()
110 */
111 *mp = NULL;
112 *aname = NULL;
113 sotype = rep->r_nmp->nm_sotype;
114
115 /*
116 * For reliable protocols, lock against other senders/receivers
117 * in case a reconnect is necessary.
118 * For SOCK_STREAM, first get the Record Mark to find out how much
119 * more there is to get.
120 * We must lock the socket against other receivers
121 * until we have an entire rpc request/reply.
122 */
123 if (sotype != SOCK_DGRAM) {
124 error = nfs_sndlock(rep->r_nmp, rep);
125 if (error)
126 return (error);
127tryagain:
128 /*
129 * Check for fatal errors and resending request.
130 */
131 /*
132 * Ugh: If a reconnect attempt just happened, nm_so
133 * would have changed. NULL indicates a failed
134 * attempt that has essentially shut down this
135 * mount point.
136 */
137 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
138 nfs_sndunlock(rep->r_nmp);
139 return (EINTR);
140 }
141 so = rep->r_nmp->nm_so;
142 if (!so) {
143 error = nfs_reconnect(rep);
144 if (error) {
145 nfs_sndunlock(rep->r_nmp);
146 return (error);
147 }
148 goto tryagain;
149 }
150 while (rep->r_flags & R_MUSTRESEND) {
151 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
152 nfsstats.rpcretries++;
153 rep->r_rtt = 0;
154 rep->r_flags &= ~R_TIMING;
155 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l);
156 if (error) {
157 if (error == EINTR || error == ERESTART ||
158 (error = nfs_reconnect(rep)) != 0) {
159 nfs_sndunlock(rep->r_nmp);
160 return (error);
161 }
162 goto tryagain;
163 }
164 }
165 nfs_sndunlock(rep->r_nmp);
166 if (sotype == SOCK_STREAM) {
167 aio.iov_base = (void *) &len;
168 aio.iov_len = sizeof(u_int32_t);
169 auio.uio_iov = &aio;
170 auio.uio_iovcnt = 1;
171 auio.uio_rw = UIO_READ;
172 auio.uio_offset = 0;
173 auio.uio_resid = sizeof(u_int32_t);
174 UIO_SETUP_SYSSPACE(&auio);
175 do {
176 rcvflg = MSG_WAITALL;
177 error = (*so->so_receive)(so, NULL, &auio,
178 NULL, NULL, &rcvflg);
179 if (error == EWOULDBLOCK && rep) {
180 if (rep->r_flags & R_SOFTTERM)
181 return (EINTR);
182 /*
183 * if it seems that the server died after it
184 * received our request, set EPIPE so that
185 * we'll reconnect and retransmit requests.
186 */
187 if (rep->r_rexmit >= rep->r_nmp->nm_retry) {
188 nfsstats.rpctimeouts++;
189 error = EPIPE;
190 }
191 }
192 } while (error == EWOULDBLOCK);
193 if (!error && auio.uio_resid > 0) {
194 /*
195 * Don't log a 0 byte receive; it means
196 * that the socket has been closed, and
197 * can happen during normal operation
198 * (forcible unmount or Solaris server).
199 */
200 if (auio.uio_resid != sizeof (u_int32_t))
201 log(LOG_INFO,
202 "short receive (%lu/%lu) from nfs server %s\n",
203 (u_long)sizeof(u_int32_t) - auio.uio_resid,
204 (u_long)sizeof(u_int32_t),
205 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
206 error = EPIPE;
207 }
208 if (error)
209 goto errout;
210 len = ntohl(len) & ~0x80000000;
211 /*
212 * This is SERIOUS! We are out of sync with the sender
213 * and forcing a disconnect/reconnect is all I can do.
214 */
215 if (len > NFS_MAXPACKET) {
216 log(LOG_ERR, "%s (%d) from nfs server %s\n",
217 "impossible packet length",
218 len,
219 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
220 error = EFBIG;
221 goto errout;
222 }
223 auio.uio_resid = len;
224 do {
225 rcvflg = MSG_WAITALL;
226 error = (*so->so_receive)(so, NULL,
227 &auio, mp, NULL, &rcvflg);
228 } while (error == EWOULDBLOCK || error == EINTR ||
229 error == ERESTART);
230 if (!error && auio.uio_resid > 0) {
231 if (len != auio.uio_resid)
232 log(LOG_INFO,
233 "short receive (%lu/%d) from nfs server %s\n",
234 (u_long)len - auio.uio_resid, len,
235 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
236 error = EPIPE;
237 }
238 } else {
239 /*
240 * NB: Since uio_resid is big, MSG_WAITALL is ignored
241 * and soreceive() will return when it has either a
242 * control msg or a data msg.
243 * We have no use for control msg., but must grab them
244 * and then throw them away so we know what is going
245 * on.
246 */
247 auio.uio_resid = len = 100000000; /* Anything Big */
248 /* not need to setup uio_vmspace */
249 do {
250 rcvflg = 0;
251 error = (*so->so_receive)(so, NULL,
252 &auio, mp, &control, &rcvflg);
253 if (control)
254 m_freem(control);
255 if (error == EWOULDBLOCK && rep) {
256 if (rep->r_flags & R_SOFTTERM)
257 return (EINTR);
258 }
259 } while (error == EWOULDBLOCK ||
260 (!error && *mp == NULL && control));
261 if ((rcvflg & MSG_EOR) == 0)
262 printf("Egad!!\n");
263 if (!error && *mp == NULL)
264 error = EPIPE;
265 len -= auio.uio_resid;
266 }
267errout:
268 if (error && error != EINTR && error != ERESTART) {
269 m_freem(*mp);
270 *mp = NULL;
271 if (error != EPIPE)
272 log(LOG_INFO,
273 "receive error %d from nfs server %s\n",
274 error,
275 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
276 error = nfs_sndlock(rep->r_nmp, rep);
277 if (!error)
278 error = nfs_reconnect(rep);
279 if (!error)
280 goto tryagain;
281 else
282 nfs_sndunlock(rep->r_nmp);
283 }
284 } else {
285 if ((so = rep->r_nmp->nm_so) == NULL)
286 return (EACCES);
287 if (so->so_state & SS_ISCONNECTED)
288 getnam = NULL;
289 else
290 getnam = aname;
291 auio.uio_resid = len = 1000000;
292 /* not need to setup uio_vmspace */
293 do {
294 rcvflg = 0;
295 error = (*so->so_receive)(so, getnam, &auio, mp,
296 NULL, &rcvflg);
297 if (error == EWOULDBLOCK &&
298 (rep->r_flags & R_SOFTTERM))
299 return (EINTR);
300 } while (error == EWOULDBLOCK);
301 len -= auio.uio_resid;
302 if (!error && *mp == NULL)
303 error = EPIPE;
304 }
305 if (error) {
306 m_freem(*mp);
307 *mp = NULL;
308 }
309 return (error);
310}
311
312/*
313 * Implement receipt of reply on a socket.
314 * We must search through the list of received datagrams matching them
315 * with outstanding requests using the xid, until ours is found.
316 */
317/* ARGSUSED */
318static int
319nfs_reply(struct nfsreq *myrep, struct lwp *lwp)
320{
321 struct nfsreq *rep;
322 struct nfsmount *nmp = myrep->r_nmp;
323 int32_t t1;
324 struct mbuf *mrep, *nam, *md;
325 u_int32_t rxid, *tl;
326 char *dpos, *cp2;
327 int error, s;
328
329 /*
330 * Loop around until we get our own reply
331 */
332 for (;;) {
333 /*
334 * Lock against other receivers so that I don't get stuck in
335 * sbwait() after someone else has received my reply for me.
336 * Also necessary for connection based protocols to avoid
337 * race conditions during a reconnect.
338 */
339 error = nfs_rcvlock(nmp, myrep);
340 if (error == EALREADY)
341 return (0);
342 if (error)
343 return (error);
344 /*
345 * Get the next Rpc reply off the socket
346 */
347
348 mutex_enter(&nmp->nm_lock);
349 nmp->nm_waiters++;
350 mutex_exit(&nmp->nm_lock);
351
352 error = nfs_receive(myrep, &nam, &mrep, lwp);
353
354 mutex_enter(&nmp->nm_lock);
355 nmp->nm_waiters--;
356 cv_signal(&nmp->nm_disconcv);
357 mutex_exit(&nmp->nm_lock);
358
359 if (error) {
360 nfs_rcvunlock(nmp);
361
362 if (nmp->nm_iflag & NFSMNT_DISMNT) {
363 /*
364 * Oops, we're going away now..
365 */
366 return error;
367 }
368 /*
369 * Ignore routing errors on connectionless protocols? ?
370 */
371 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
372 nmp->nm_so->so_error = 0;
373#ifdef DEBUG
374 if (ratecheck(&nfs_reply_last_err_time,
375 &nfs_err_interval))
376 printf("%s: ignoring error %d\n",
377 __func__, error);
378#endif
379 continue;
380 }
381 return (error);
382 }
383 if (nam)
384 m_freem(nam);
385
386 /*
387 * Get the xid and check that it is an rpc reply
388 */
389 md = mrep;
390 dpos = mtod(md, void *);
391 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
392 rxid = *tl++;
393 if (*tl != rpc_reply) {
394 nfsstats.rpcinvalid++;
395 m_freem(mrep);
396nfsmout:
397 nfs_rcvunlock(nmp);
398 continue;
399 }
400
401 /*
402 * Loop through the request list to match up the reply
403 * Iff no match, just drop the datagram
404 */
405 s = splsoftnet();
406 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
407 if (rep->r_mrep != NULL || rxid != rep->r_xid)
408 continue;
409
410 /* Found it.. */
411 rep->r_mrep = mrep;
412 rep->r_md = md;
413 rep->r_dpos = dpos;
414 if (nfsrtton) {
415 struct rttl *rt;
416 int proct = nfs_proct[rep->r_procnum];
417
418 rt = &nfsrtt.rttl[nfsrtt.pos];
419 rt->proc = rep->r_procnum;
420 rt->rto = NFS_RTO(nmp, proct);
421 rt->sent = nmp->nm_sent;
422 rt->cwnd = nmp->nm_cwnd;
423 rt->srtt = nmp->nm_srtt[proct - 1];
424 rt->sdrtt = nmp->nm_sdrtt[proct - 1];
425 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx;
426 getmicrotime(&rt->tstamp);
427 if (rep->r_flags & R_TIMING)
428 rt->rtt = rep->r_rtt;
429 else
430 rt->rtt = 1000000;
431 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
432 }
433 /*
434 * Update congestion window.
435 * Do the additive increase of
436 * one rpc/rtt.
437 */
438 if (nmp->nm_cwnd <= nmp->nm_sent) {
439 nmp->nm_cwnd +=
440 (NFS_CWNDSCALE * NFS_CWNDSCALE +
441 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
442 if (nmp->nm_cwnd > NFS_MAXCWND)
443 nmp->nm_cwnd = NFS_MAXCWND;
444 }
445 rep->r_flags &= ~R_SENT;
446 nmp->nm_sent -= NFS_CWNDSCALE;
447 /*
448 * Update rtt using a gain of 0.125 on the mean
449 * and a gain of 0.25 on the deviation.
450 */
451 if (rep->r_flags & R_TIMING) {
452 /*
453 * Since the timer resolution of
454 * NFS_HZ is so course, it can often
455 * result in r_rtt == 0. Since
456 * r_rtt == N means that the actual
457 * rtt is between N+dt and N+2-dt ticks,
458 * add 1.
459 */
460 t1 = rep->r_rtt + 1;
461 t1 -= (NFS_SRTT(rep) >> 3);
462 NFS_SRTT(rep) += t1;
463 if (t1 < 0)
464 t1 = -t1;
465 t1 -= (NFS_SDRTT(rep) >> 2);
466 NFS_SDRTT(rep) += t1;
467 }
468 nmp->nm_timeouts = 0;
469 break;
470 }
471 splx(s);
472 nfs_rcvunlock(nmp);
473 /*
474 * If not matched to a request, drop it.
475 * If it's mine, get out.
476 */
477 if (rep == 0) {
478 nfsstats.rpcunexpected++;
479 m_freem(mrep);
480 } else if (rep == myrep) {
481 if (rep->r_mrep == NULL)
482 panic("nfsreply nil");
483 return (0);
484 }
485 }
486}
487
488/*
489 * nfs_request - goes something like this
490 * - fill in request struct
491 * - links it into list
492 * - calls nfs_send() for first transmit
493 * - calls nfs_receive() to get reply
494 * - break down rpc header and return with nfs reply pointed to
495 * by mrep or error
496 * nb: always frees up mreq mbuf list
497 */
498int
499nfs_request(struct nfsnode *np, struct mbuf *mrest, int procnum, struct lwp *lwp, kauth_cred_t cred, struct mbuf **mrp, struct mbuf **mdp, char **dposp, int *rexmitp)
500{
501 struct mbuf *m, *mrep;
502 struct nfsreq *rep;
503 u_int32_t *tl;
504 int i;
505 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount);
506 struct mbuf *md, *mheadend;
507 char nickv[RPCX_NICKVERF];
508 time_t waituntil;
509 char *dpos, *cp2;
510 int t1, s, error = 0, mrest_len, auth_len, auth_type;
511 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0;
512 int verf_len, verf_type;
513 u_int32_t xid;
514 char *auth_str, *verf_str;
515 NFSKERBKEY_T key; /* save session key */
516 kauth_cred_t acred;
517 struct mbuf *mrest_backup = NULL;
518 kauth_cred_t origcred = NULL; /* XXX: gcc */
519 bool retry_cred = true;
520 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0;
521
522 if (rexmitp != NULL)
523 *rexmitp = 0;
524
525 acred = kauth_cred_alloc();
526
527tryagain_cred:
528 KASSERT(cred != NULL);
529 rep = kmem_alloc(sizeof(*rep), KM_SLEEP);
530 rep->r_nmp = nmp;
531 KASSERT(lwp == NULL || lwp == curlwp);
532 rep->r_lwp = lwp;
533 rep->r_procnum = procnum;
534 i = 0;
535 m = mrest;
536 while (m) {
537 i += m->m_len;
538 m = m->m_next;
539 }
540 mrest_len = i;
541
542 /*
543 * Get the RPC header with authorization.
544 */
545kerbauth:
546 verf_str = auth_str = NULL;
547 if (nmp->nm_flag & NFSMNT_KERB) {
548 verf_str = nickv;
549 verf_len = sizeof (nickv);
550 auth_type = RPCAUTH_KERB4;
551 memset((void *)key, 0, sizeof (key));
552 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
553 &auth_len, verf_str, verf_len)) {
554 error = nfs_getauth(nmp, rep, cred, &auth_str,
555 &auth_len, verf_str, &verf_len, key);
556 if (error) {
557 kmem_free(rep, sizeof(*rep));
558 m_freem(mrest);
559 KASSERT(kauth_cred_getrefcnt(acred) == 1);
560 kauth_cred_free(acred);
561 return (error);
562 }
563 }
564 retry_cred = false;
565 } else {
566 /* AUTH_UNIX */
567 uid_t uid;
568 gid_t gid;
569
570 /*
571 * on the most unix filesystems, permission checks are
572 * done when the file is open(2)'ed.
573 * ie. once a file is successfully open'ed,
574 * following i/o operations never fail with EACCES.
575 * we try to follow the semantics as far as possible.
576 *
577 * note that we expect that the nfs server always grant
578 * accesses by the file's owner.
579 */
580 origcred = cred;
581 switch (procnum) {
582 case NFSPROC_READ:
583 case NFSPROC_WRITE:
584 case NFSPROC_COMMIT:
585 uid = np->n_vattr->va_uid;
586 gid = np->n_vattr->va_gid;
587 if (kauth_cred_geteuid(cred) == uid &&
588 kauth_cred_getegid(cred) == gid) {
589 retry_cred = false;
590 break;
591 }
592 if (use_opencred)
593 break;
594 kauth_cred_setuid(acred, uid);
595 kauth_cred_seteuid(acred, uid);
596 kauth_cred_setsvuid(acred, uid);
597 kauth_cred_setgid(acred, gid);
598 kauth_cred_setegid(acred, gid);
599 kauth_cred_setsvgid(acred, gid);
600 cred = acred;
601 break;
602 default:
603 retry_cred = false;
604 break;
605 }
606 /*
607 * backup mbuf chain if we can need it later to retry.
608 *
609 * XXX maybe we can keep a direct reference to
610 * mrest without doing m_copym, but it's ...ugly.
611 */
612 if (retry_cred)
613 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT);
614 auth_type = RPCAUTH_UNIX;
615 /* XXX elad - ngroups */
616 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ?
617 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) +
618 5 * NFSX_UNSIGNED;
619 }
620 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
621 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
622 if (auth_str)
623 free(auth_str, M_TEMP);
624
625 /*
626 * For stream protocols, insert a Sun RPC Record Mark.
627 */
628 if (nmp->nm_sotype == SOCK_STREAM) {
629 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
630 *mtod(m, u_int32_t *) = htonl(0x80000000 |
631 (m->m_pkthdr.len - NFSX_UNSIGNED));
632 }
633 rep->r_mreq = m;
634 rep->r_xid = xid;
635tryagain:
636 if (nmp->nm_flag & NFSMNT_SOFT)
637 rep->r_retry = nmp->nm_retry;
638 else
639 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
640 rep->r_rtt = rep->r_rexmit = 0;
641 if (nfs_proct[procnum] > 0)
642 rep->r_flags = R_TIMING;
643 else
644 rep->r_flags = 0;
645 rep->r_mrep = NULL;
646
647 /*
648 * Do the client side RPC.
649 */
650 nfsstats.rpcrequests++;
651 /*
652 * Chain request into list of outstanding requests. Be sure
653 * to put it LAST so timer finds oldest requests first.
654 */
655 s = splsoftnet();
656 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
657 nfs_timer_start();
658
659 /*
660 * If backing off another request or avoiding congestion, don't
661 * send this one now but let timer do it. If not timing a request,
662 * do it now.
663 */
664 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
665 (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) {
666 splx(s);
667 if (nmp->nm_soflags & PR_CONNREQUIRED)
668 error = nfs_sndlock(nmp, rep);
669 if (!error) {
670 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
671 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp);
672 if (nmp->nm_soflags & PR_CONNREQUIRED)
673 nfs_sndunlock(nmp);
674 }
675 s = splsoftnet();
676 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
677 if ((rep->r_flags & R_SENT) == 0) {
678 nmp->nm_sent += NFS_CWNDSCALE;
679 rep->r_flags |= R_SENT;
680 }
681 }
682 splx(s);
683 } else {
684 splx(s);
685 rep->r_rtt = -1;
686 }
687
688 /*
689 * Wait for the reply from our send or the timer's.
690 */
691 if (!error || error == EPIPE || error == EWOULDBLOCK)
692 error = nfs_reply(rep, lwp);
693
694 /*
695 * RPC done, unlink the request.
696 */
697 s = splsoftnet();
698 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
699
700 /*
701 * Decrement the outstanding request count.
702 */
703 if (rep->r_flags & R_SENT) {
704 rep->r_flags &= ~R_SENT; /* paranoia */
705 nmp->nm_sent -= NFS_CWNDSCALE;
706 }
707 splx(s);
708
709 if (rexmitp != NULL) {
710 int rexmit;
711
712 if (nmp->nm_sotype != SOCK_DGRAM)
713 rexmit = (rep->r_flags & R_REXMITTED) != 0;
714 else
715 rexmit = rep->r_rexmit;
716 *rexmitp = rexmit;
717 }
718
719 /*
720 * If there was a successful reply and a tprintf msg.
721 * tprintf a response.
722 */
723 if (!error && (rep->r_flags & R_TPRINTFMSG))
724 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname,
725 "is alive again");
726 mrep = rep->r_mrep;
727 md = rep->r_md;
728 dpos = rep->r_dpos;
729 if (error)
730 goto nfsmout;
731
732 /*
733 * break down the rpc header and check if ok
734 */
735 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
736 if (*tl++ == rpc_msgdenied) {
737 if (*tl == rpc_mismatch)
738 error = EOPNOTSUPP;
739 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
740 if (!failed_auth) {
741 failed_auth++;
742 mheadend->m_next = NULL;
743 m_freem(mrep);
744 m_freem(rep->r_mreq);
745 goto kerbauth;
746 } else
747 error = EAUTH;
748 } else
749 error = EACCES;
750 m_freem(mrep);
751 goto nfsmout;
752 }
753
754 /*
755 * Grab any Kerberos verifier, otherwise just throw it away.
756 */
757 verf_type = fxdr_unsigned(int, *tl++);
758 i = fxdr_unsigned(int32_t, *tl);
759 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
760 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
761 if (error)
762 goto nfsmout;
763 } else if (i > 0)
764 nfsm_adv(nfsm_rndup(i));
765 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
766 /* 0 == ok */
767 if (*tl == 0) {
768 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
769 if (*tl != 0) {
770 error = fxdr_unsigned(int, *tl);
771 switch (error) {
772 case NFSERR_PERM:
773 error = EPERM;
774 break;
775
776 case NFSERR_NOENT:
777 error = ENOENT;
778 break;
779
780 case NFSERR_IO:
781 error = EIO;
782 break;
783
784 case NFSERR_NXIO:
785 error = ENXIO;
786 break;
787
788 case NFSERR_ACCES:
789 error = EACCES;
790 if (!retry_cred)
791 break;
792 m_freem(mrep);
793 m_freem(rep->r_mreq);
794 kmem_free(rep, sizeof(*rep));
795 use_opencred = !use_opencred;
796 if (mrest_backup == NULL) {
797 /* m_copym failure */
798 KASSERT(
799 kauth_cred_getrefcnt(acred) == 1);
800 kauth_cred_free(acred);
801 return ENOMEM;
802 }
803 mrest = mrest_backup;
804 mrest_backup = NULL;
805 cred = origcred;
806 error = 0;
807 retry_cred = false;
808 goto tryagain_cred;
809
810 case NFSERR_EXIST:
811 error = EEXIST;
812 break;
813
814 case NFSERR_XDEV:
815 error = EXDEV;
816 break;
817
818 case NFSERR_NODEV:
819 error = ENODEV;
820 break;
821
822 case NFSERR_NOTDIR:
823 error = ENOTDIR;
824 break;
825
826 case NFSERR_ISDIR:
827 error = EISDIR;
828 break;
829
830 case NFSERR_INVAL:
831 error = EINVAL;
832 break;
833
834 case NFSERR_FBIG:
835 error = EFBIG;
836 break;
837
838 case NFSERR_NOSPC:
839 error = ENOSPC;
840 break;
841
842 case NFSERR_ROFS:
843 error = EROFS;
844 break;
845
846 case NFSERR_MLINK:
847 error = EMLINK;
848 break;
849
850 case NFSERR_TIMEDOUT:
851 error = ETIMEDOUT;
852 break;
853
854 case NFSERR_NAMETOL:
855 error = ENAMETOOLONG;
856 break;
857
858 case NFSERR_NOTEMPTY:
859 error = ENOTEMPTY;
860 break;
861
862 case NFSERR_DQUOT:
863 error = EDQUOT;
864 break;
865
866 case NFSERR_STALE:
867 /*
868 * If the File Handle was stale, invalidate the
869 * lookup cache, just in case.
870 */
871 error = ESTALE;
872 cache_purge(NFSTOV(np));
873 break;
874
875 case NFSERR_REMOTE:
876 error = EREMOTE;
877 break;
878
879 case NFSERR_WFLUSH:
880 case NFSERR_BADHANDLE:
881 case NFSERR_NOT_SYNC:
882 case NFSERR_BAD_COOKIE:
883 error = EINVAL;
884 break;
885
886 case NFSERR_NOTSUPP:
887 error = ENOTSUP;
888 break;
889
890 case NFSERR_TOOSMALL:
891 case NFSERR_SERVERFAULT:
892 case NFSERR_BADTYPE:
893 error = EINVAL;
894 break;
895
896 case NFSERR_TRYLATER:
897 if ((nmp->nm_flag & NFSMNT_NFSV3) == 0)
898 break;
899 m_freem(mrep);
900 error = 0;
901 waituntil = time_second + trylater_delay;
902 while (time_second < waituntil) {
903 kpause("nfstrylater", false, hz, NULL);
904 }
905 trylater_delay *= NFS_TRYLATERDELMUL;
906 if (trylater_delay > NFS_TRYLATERDELMAX)
907 trylater_delay = NFS_TRYLATERDELMAX;
908 /*
909 * RFC1813:
910 * The client should wait and then try
911 * the request with a new RPC transaction ID.
912 */
913 nfs_renewxid(rep);
914 goto tryagain;
915
916 default:
917#ifdef DIAGNOSTIC
918 printf("Invalid rpc error code %d\n", error);
919#endif
920 error = EINVAL;
921 break;
922 }
923
924 if (nmp->nm_flag & NFSMNT_NFSV3) {
925 *mrp = mrep;
926 *mdp = md;
927 *dposp = dpos;
928 error |= NFSERR_RETERR;
929 } else
930 m_freem(mrep);
931 goto nfsmout;
932 }
933
934 /*
935 * note which credential worked to minimize number of retries.
936 */
937 if (use_opencred)
938 np->n_flag |= NUSEOPENCRED;
939 else
940 np->n_flag &= ~NUSEOPENCRED;
941
942 *mrp = mrep;
943 *mdp = md;
944 *dposp = dpos;
945
946 KASSERT(error == 0);
947 goto nfsmout;
948 }
949 m_freem(mrep);
950 error = EPROTONOSUPPORT;
951nfsmout:
952 KASSERT(kauth_cred_getrefcnt(acred) == 1);
953 kauth_cred_free(acred);
954 m_freem(rep->r_mreq);
955 kmem_free(rep, sizeof(*rep));
956 m_freem(mrest_backup);
957 return (error);
958}
959
960/*
961 * Lock a socket against others.
962 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
963 * and also to avoid race conditions between the processes with nfs requests
964 * in progress when a reconnect is necessary.
965 */
966static int
967nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep)
968{
969 struct lwp *l;
970 int timeo = 0;
971 bool catch_p = false;
972 int error = 0;
973
974 if (nmp->nm_flag & NFSMNT_SOFT)
975 timeo = nmp->nm_retry * nmp->nm_timeo;
976
977 if (nmp->nm_iflag & NFSMNT_DISMNTFORCE)
978 timeo = hz;
979
980 if (rep) {
981 l = rep->r_lwp;
982 if (rep->r_nmp->nm_flag & NFSMNT_INT)
983 catch_p = true;
984 } else
985 l = NULL;
986 mutex_enter(&nmp->nm_lock);
987 while ((nmp->nm_iflag & NFSMNT_SNDLOCK) != 0) {
988 if (rep && nfs_sigintr(rep->r_nmp, rep, l)) {
989 error = EINTR;
990 goto quit;
991 }
992 if (catch_p) {
993 error = cv_timedwait_sig(&nmp->nm_sndcv,
994 &nmp->nm_lock, timeo);
995 } else {
996 error = cv_timedwait(&nmp->nm_sndcv,
997 &nmp->nm_lock, timeo);
998 }
999
1000 if (error) {
1001 if ((error == EWOULDBLOCK) &&
1002 (nmp->nm_flag & NFSMNT_SOFT)) {
1003 error = EIO;
1004 goto quit;
1005 }
1006 error = 0;
1007 }
1008 if (catch_p) {
1009 catch_p = false;
1010 timeo = 2 * hz;
1011 }
1012 }
1013 nmp->nm_iflag |= NFSMNT_SNDLOCK;
1014quit:
1015 mutex_exit(&nmp->nm_lock);
1016 return error;
1017}
1018
1019/*
1020 * Unlock the stream socket for others.
1021 */
1022static void
1023nfs_sndunlock(struct nfsmount *nmp)
1024{
1025
1026 mutex_enter(&nmp->nm_lock);
1027 if ((nmp->nm_iflag & NFSMNT_SNDLOCK) == 0)
1028 panic("nfs sndunlock");
1029 nmp->nm_iflag &= ~NFSMNT_SNDLOCK;
1030 cv_signal(&nmp->nm_sndcv);
1031 mutex_exit(&nmp->nm_lock);
1032}
1033