1 | /* $NetBSD: nfs_iod.c,v 1.7 2015/07/15 03:28:55 manu Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1989, 1993 |
5 | * The Regents of the University of California. All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to Berkeley by |
8 | * Rick Macklem at The University of Guelph. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * 3. Neither the name of the University nor the names of its contributors |
19 | * may be used to endorse or promote products derived from this software |
20 | * without specific prior written permission. |
21 | * |
22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 | * SUCH DAMAGE. |
33 | * |
34 | * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 |
35 | */ |
36 | |
37 | #include <sys/cdefs.h> |
38 | __KERNEL_RCSID(0, "$NetBSD: nfs_iod.c,v 1.7 2015/07/15 03:28:55 manu Exp $" ); |
39 | |
40 | #include <sys/param.h> |
41 | #include <sys/systm.h> |
42 | #include <sys/kernel.h> |
43 | #include <sys/file.h> |
44 | #include <sys/stat.h> |
45 | #include <sys/vnode.h> |
46 | #include <sys/mount.h> |
47 | #include <sys/proc.h> |
48 | #include <sys/uio.h> |
49 | #include <sys/malloc.h> |
50 | #include <sys/kmem.h> |
51 | #include <sys/buf.h> |
52 | #include <sys/mbuf.h> |
53 | #include <sys/socket.h> |
54 | #include <sys/socketvar.h> |
55 | #include <sys/signalvar.h> |
56 | #include <sys/domain.h> |
57 | #include <sys/protosw.h> |
58 | #include <sys/namei.h> |
59 | #include <sys/syslog.h> |
60 | #include <sys/filedesc.h> |
61 | #include <sys/kthread.h> |
62 | #include <sys/kauth.h> |
63 | #include <sys/syscallargs.h> |
64 | |
65 | #include <netinet/in.h> |
66 | #include <netinet/tcp.h> |
67 | #include <nfs/xdr_subs.h> |
68 | #include <nfs/rpcv2.h> |
69 | #include <nfs/nfsproto.h> |
70 | #include <nfs/nfs.h> |
71 | #include <nfs/nfsm_subs.h> |
72 | #include <nfs/nfsrvcache.h> |
73 | #include <nfs/nfsmount.h> |
74 | #include <nfs/nfsnode.h> |
75 | #include <nfs/nfsrtt.h> |
76 | #include <nfs/nfs_var.h> |
77 | |
78 | extern int nuidhash_max; |
79 | |
80 | /* |
81 | * locking order: |
82 | * nfs_iodlist_lock -> nid_lock -> nm_lock |
83 | */ |
84 | kmutex_t nfs_iodlist_lock; |
85 | struct nfs_iodlist nfs_iodlist_idle; |
86 | struct nfs_iodlist nfs_iodlist_all; |
87 | int nfs_niothreads = -1; /* == "0, and has never been set" */ |
88 | int nfs_defect = 0; |
89 | |
90 | /* |
91 | * Asynchronous I/O threads for client nfs. |
92 | * They do read-ahead and write-behind operations on the block I/O cache. |
93 | * Never returns unless it fails or gets killed. |
94 | */ |
95 | |
96 | static void |
97 | nfssvc_iod(void *arg) |
98 | { |
99 | struct buf *bp; |
100 | struct nfs_iod *myiod; |
101 | struct nfsmount *nmp; |
102 | |
103 | myiod = kmem_alloc(sizeof(*myiod), KM_SLEEP); |
104 | mutex_init(&myiod->nid_lock, MUTEX_DEFAULT, IPL_NONE); |
105 | cv_init(&myiod->nid_cv, "nfsiod" ); |
106 | myiod->nid_exiting = false; |
107 | myiod->nid_mount = NULL; |
108 | mutex_enter(&nfs_iodlist_lock); |
109 | LIST_INSERT_HEAD(&nfs_iodlist_all, myiod, nid_all); |
110 | mutex_exit(&nfs_iodlist_lock); |
111 | |
112 | for (;;) { |
113 | mutex_enter(&nfs_iodlist_lock); |
114 | LIST_INSERT_HEAD(&nfs_iodlist_idle, myiod, nid_idle); |
115 | mutex_exit(&nfs_iodlist_lock); |
116 | |
117 | mutex_enter(&myiod->nid_lock); |
118 | while (/*CONSTCOND*/ true) { |
119 | nmp = myiod->nid_mount; |
120 | if (nmp) { |
121 | myiod->nid_mount = NULL; |
122 | break; |
123 | } |
124 | if (__predict_false(myiod->nid_exiting)) { |
125 | /* |
126 | * drop nid_lock to preserve locking order. |
127 | */ |
128 | mutex_exit(&myiod->nid_lock); |
129 | mutex_enter(&nfs_iodlist_lock); |
130 | mutex_enter(&myiod->nid_lock); |
131 | /* |
132 | * recheck nid_mount because nfs_asyncio can |
133 | * pick us in the meantime as we are still on |
134 | * nfs_iodlist_lock. |
135 | */ |
136 | if (myiod->nid_mount != NULL) { |
137 | mutex_exit(&nfs_iodlist_lock); |
138 | continue; |
139 | } |
140 | LIST_REMOVE(myiod, nid_idle); |
141 | mutex_exit(&nfs_iodlist_lock); |
142 | goto quit; |
143 | } |
144 | cv_wait(&myiod->nid_cv, &myiod->nid_lock); |
145 | } |
146 | mutex_exit(&myiod->nid_lock); |
147 | |
148 | mutex_enter(&nmp->nm_lock); |
149 | while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { |
150 | /* Take one off the front of the list */ |
151 | TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); |
152 | nmp->nm_bufqlen--; |
153 | if (nmp->nm_bufqlen < 2 * nmp->nm_bufqiods) { |
154 | cv_broadcast(&nmp->nm_aiocv); |
155 | } |
156 | mutex_exit(&nmp->nm_lock); |
157 | KERNEL_LOCK(1, curlwp); |
158 | (void)nfs_doio(bp); |
159 | KERNEL_UNLOCK_LAST(curlwp); |
160 | mutex_enter(&nmp->nm_lock); |
161 | /* |
162 | * If there are more than one iod on this mount, |
163 | * then defect so that the iods can be shared out |
164 | * fairly between the mounts |
165 | */ |
166 | if (nfs_defect && nmp->nm_bufqiods > 1) { |
167 | break; |
168 | } |
169 | } |
170 | KASSERT(nmp->nm_bufqiods > 0); |
171 | nmp->nm_bufqiods--; |
172 | mutex_exit(&nmp->nm_lock); |
173 | } |
174 | quit: |
175 | KASSERT(myiod->nid_mount == NULL); |
176 | mutex_exit(&myiod->nid_lock); |
177 | |
178 | cv_destroy(&myiod->nid_cv); |
179 | mutex_destroy(&myiod->nid_lock); |
180 | kmem_free(myiod, sizeof(*myiod)); |
181 | |
182 | kthread_exit(0); |
183 | } |
184 | |
185 | void |
186 | nfs_iodinit(void) |
187 | { |
188 | |
189 | mutex_init(&nfs_iodlist_lock, MUTEX_DEFAULT, IPL_NONE); |
190 | LIST_INIT(&nfs_iodlist_all); |
191 | LIST_INIT(&nfs_iodlist_idle); |
192 | } |
193 | |
194 | void |
195 | nfs_iodfini(void) |
196 | { |
197 | int error __diagused; |
198 | |
199 | error = nfs_set_niothreads(0); |
200 | KASSERT(error == 0); |
201 | mutex_destroy(&nfs_iodlist_lock); |
202 | } |
203 | |
204 | int |
205 | nfs_iodbusy(struct nfsmount *nmp) |
206 | { |
207 | struct nfs_iod *iod; |
208 | int ret = 0; |
209 | |
210 | mutex_enter(&nfs_iodlist_lock); |
211 | LIST_FOREACH(iod, &nfs_iodlist_all, nid_all) { |
212 | if (iod->nid_mount == nmp) |
213 | ret++; |
214 | } |
215 | mutex_exit(&nfs_iodlist_lock); |
216 | |
217 | return ret; |
218 | } |
219 | |
220 | int |
221 | nfs_set_niothreads(int newval) |
222 | { |
223 | struct nfs_iod *nid; |
224 | int error = 0; |
225 | int hold_count; |
226 | |
227 | KERNEL_UNLOCK_ALL(curlwp, &hold_count); |
228 | |
229 | mutex_enter(&nfs_iodlist_lock); |
230 | /* clamp to sane range */ |
231 | nfs_niothreads = max(0, min(newval, NFS_MAXASYNCDAEMON)); |
232 | |
233 | while (nfs_numasync != nfs_niothreads && error == 0) { |
234 | while (nfs_numasync < nfs_niothreads) { |
235 | |
236 | /* |
237 | * kthread_create can wait for pagedaemon and |
238 | * pagedaemon can wait for nfsiod which needs to acquire |
239 | * nfs_iodlist_lock. |
240 | */ |
241 | |
242 | mutex_exit(&nfs_iodlist_lock); |
243 | error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, |
244 | nfssvc_iod, NULL, NULL, "nfsio" ); |
245 | mutex_enter(&nfs_iodlist_lock); |
246 | if (error) { |
247 | /* give up */ |
248 | nfs_niothreads = nfs_numasync; |
249 | break; |
250 | } |
251 | nfs_numasync++; |
252 | } |
253 | while (nfs_numasync > nfs_niothreads) { |
254 | nid = LIST_FIRST(&nfs_iodlist_all); |
255 | if (nid == NULL) { |
256 | /* iod has not started yet. */ |
257 | kpause("nfsiorm" , false, hz, &nfs_iodlist_lock); |
258 | continue; |
259 | } |
260 | LIST_REMOVE(nid, nid_all); |
261 | mutex_enter(&nid->nid_lock); |
262 | KASSERT(!nid->nid_exiting); |
263 | nid->nid_exiting = true; |
264 | cv_signal(&nid->nid_cv); |
265 | mutex_exit(&nid->nid_lock); |
266 | nfs_numasync--; |
267 | } |
268 | } |
269 | mutex_exit(&nfs_iodlist_lock); |
270 | |
271 | KERNEL_LOCK(hold_count, curlwp); |
272 | return error; |
273 | } |
274 | |
275 | /* |
276 | * Get an authorization string for the uid by having the mount_nfs sitting |
277 | * on this mount point porpous out of the kernel and do it. |
278 | */ |
279 | int |
280 | nfs_getauth(struct nfsmount *nmp, struct nfsreq *rep, kauth_cred_t cred, char **auth_str, int *auth_len, char *verf_str, int *verf_len, NFSKERBKEY_T key) |
281 | /* key: return session key */ |
282 | { |
283 | int error = 0; |
284 | |
285 | while ((nmp->nm_iflag & NFSMNT_WAITAUTH) == 0) { |
286 | nmp->nm_iflag |= NFSMNT_WANTAUTH; |
287 | (void) tsleep((void *)&nmp->nm_authtype, PSOCK, |
288 | "nfsauth1" , 2 * hz); |
289 | error = nfs_sigintr(nmp, rep, rep->r_lwp); |
290 | if (error) { |
291 | nmp->nm_iflag &= ~NFSMNT_WANTAUTH; |
292 | return (error); |
293 | } |
294 | } |
295 | nmp->nm_iflag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH); |
296 | nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK); |
297 | nmp->nm_authlen = RPCAUTH_MAXSIZ; |
298 | nmp->nm_verfstr = verf_str; |
299 | nmp->nm_verflen = *verf_len; |
300 | nmp->nm_authuid = kauth_cred_geteuid(cred); |
301 | wakeup((void *)&nmp->nm_authstr); |
302 | |
303 | /* |
304 | * And wait for mount_nfs to do its stuff. |
305 | */ |
306 | while ((nmp->nm_iflag & NFSMNT_HASAUTH) == 0 && error == 0) { |
307 | (void) tsleep((void *)&nmp->nm_authlen, PSOCK, |
308 | "nfsauth2" , 2 * hz); |
309 | error = nfs_sigintr(nmp, rep, rep->r_lwp); |
310 | } |
311 | if (nmp->nm_iflag & NFSMNT_AUTHERR) { |
312 | nmp->nm_iflag &= ~NFSMNT_AUTHERR; |
313 | error = EAUTH; |
314 | } |
315 | if (error) |
316 | free((void *)*auth_str, M_TEMP); |
317 | else { |
318 | *auth_len = nmp->nm_authlen; |
319 | *verf_len = nmp->nm_verflen; |
320 | memcpy(key, nmp->nm_key, sizeof (NFSKERBKEY_T)); |
321 | } |
322 | nmp->nm_iflag &= ~NFSMNT_HASAUTH; |
323 | nmp->nm_iflag |= NFSMNT_WAITAUTH; |
324 | if (nmp->nm_iflag & NFSMNT_WANTAUTH) { |
325 | nmp->nm_iflag &= ~NFSMNT_WANTAUTH; |
326 | wakeup((void *)&nmp->nm_authtype); |
327 | } |
328 | return (error); |
329 | } |
330 | |
331 | /* |
332 | * Get a nickname authenticator and verifier. |
333 | */ |
334 | int |
335 | nfs_getnickauth(struct nfsmount *nmp, kauth_cred_t cred, char **auth_str, |
336 | int *auth_len, char *verf_str, int verf_len) |
337 | { |
338 | #ifdef NFSKERB |
339 | struct timeval ktvin; |
340 | #endif |
341 | struct timeval ktvout, tv; |
342 | struct nfsuid *nuidp; |
343 | u_int32_t *nickp, *verfp; |
344 | |
345 | memset(&ktvout, 0, sizeof ktvout); /* XXX gcc */ |
346 | |
347 | #ifdef DIAGNOSTIC |
348 | if (verf_len < (4 * NFSX_UNSIGNED)) |
349 | panic("nfs_getnickauth verf too small" ); |
350 | #endif |
351 | LIST_FOREACH(nuidp, NMUIDHASH(nmp, kauth_cred_geteuid(cred)), nu_hash) { |
352 | if (kauth_cred_geteuid(nuidp->nu_cr) == kauth_cred_geteuid(cred)) |
353 | break; |
354 | } |
355 | if (!nuidp || nuidp->nu_expire < time_second) |
356 | return (EACCES); |
357 | |
358 | /* |
359 | * Move to the end of the lru list (end of lru == most recently used). |
360 | */ |
361 | TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); |
362 | TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru); |
363 | |
364 | nickp = (u_int32_t *)malloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK); |
365 | *nickp++ = txdr_unsigned(RPCAKN_NICKNAME); |
366 | *nickp = txdr_unsigned(nuidp->nu_nickname); |
367 | *auth_str = (char *)nickp; |
368 | *auth_len = 2 * NFSX_UNSIGNED; |
369 | |
370 | /* |
371 | * Now we must encrypt the verifier and package it up. |
372 | */ |
373 | verfp = (u_int32_t *)verf_str; |
374 | *verfp++ = txdr_unsigned(RPCAKN_NICKNAME); |
375 | getmicrotime(&tv); |
376 | if (tv.tv_sec > nuidp->nu_timestamp.tv_sec || |
377 | (tv.tv_sec == nuidp->nu_timestamp.tv_sec && |
378 | tv.tv_usec > nuidp->nu_timestamp.tv_usec)) |
379 | nuidp->nu_timestamp = tv; |
380 | else |
381 | nuidp->nu_timestamp.tv_usec++; |
382 | #ifdef NFSKERB |
383 | ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec); |
384 | ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec); |
385 | |
386 | /* |
387 | * Now encrypt the timestamp verifier in ecb mode using the session |
388 | * key. |
389 | */ |
390 | XXX |
391 | #endif |
392 | |
393 | *verfp++ = ktvout.tv_sec; |
394 | *verfp++ = ktvout.tv_usec; |
395 | *verfp = 0; |
396 | return (0); |
397 | } |
398 | |
399 | /* |
400 | * Save the current nickname in a hash list entry on the mount point. |
401 | */ |
402 | int |
403 | nfs_savenickauth(struct nfsmount *nmp, kauth_cred_t cred, int len, NFSKERBKEY_T key, struct mbuf **mdp, char **dposp, struct mbuf *mrep) |
404 | { |
405 | struct nfsuid *nuidp; |
406 | u_int32_t *tl; |
407 | int32_t t1; |
408 | struct mbuf *md = *mdp; |
409 | struct timeval ktvin, ktvout; |
410 | u_int32_t nick; |
411 | char *dpos = *dposp, *cp2; |
412 | int deltasec, error = 0; |
413 | |
414 | memset(&ktvout, 0, sizeof ktvout); /* XXX gcc */ |
415 | |
416 | if (len == (3 * NFSX_UNSIGNED)) { |
417 | nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); |
418 | ktvin.tv_sec = *tl++; |
419 | ktvin.tv_usec = *tl++; |
420 | nick = fxdr_unsigned(u_int32_t, *tl); |
421 | |
422 | /* |
423 | * Decrypt the timestamp in ecb mode. |
424 | */ |
425 | #ifdef NFSKERB |
426 | XXX |
427 | #else |
428 | (void)ktvin.tv_sec; |
429 | #endif |
430 | ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec); |
431 | ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec); |
432 | deltasec = time_second - ktvout.tv_sec; |
433 | if (deltasec < 0) |
434 | deltasec = -deltasec; |
435 | /* |
436 | * If ok, add it to the hash list for the mount point. |
437 | */ |
438 | if (deltasec <= NFS_KERBCLOCKSKEW) { |
439 | if (nmp->nm_numuids < nuidhash_max) { |
440 | nmp->nm_numuids++; |
441 | nuidp = kmem_alloc(sizeof(*nuidp), KM_SLEEP); |
442 | } else { |
443 | nuidp = TAILQ_FIRST(&nmp->nm_uidlruhead); |
444 | LIST_REMOVE(nuidp, nu_hash); |
445 | TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, |
446 | nu_lru); |
447 | } |
448 | nuidp->nu_flag = 0; |
449 | kauth_cred_seteuid(nuidp->nu_cr, kauth_cred_geteuid(cred)); |
450 | nuidp->nu_expire = time_second + NFS_KERBTTL; |
451 | nuidp->nu_timestamp = ktvout; |
452 | nuidp->nu_nickname = nick; |
453 | memcpy(nuidp->nu_key, key, sizeof (NFSKERBKEY_T)); |
454 | TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, |
455 | nu_lru); |
456 | LIST_INSERT_HEAD(NMUIDHASH(nmp, kauth_cred_geteuid(cred)), |
457 | nuidp, nu_hash); |
458 | } |
459 | } else |
460 | nfsm_adv(nfsm_rndup(len)); |
461 | nfsmout: |
462 | *mdp = md; |
463 | *dposp = dpos; |
464 | return (error); |
465 | } |
466 | |