1 | /* $NetBSD: vfs_vnode.c,v 1.59 2016/11/03 11:04:21 hannken Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, |
9 | * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. |
10 | * |
11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions |
13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. |
19 | * |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | * POSSIBILITY OF SUCH DAMAGE. |
31 | */ |
32 | |
33 | /* |
34 | * Copyright (c) 1989, 1993 |
35 | * The Regents of the University of California. All rights reserved. |
36 | * (c) UNIX System Laboratories, Inc. |
37 | * All or some portions of this file are derived from material licensed |
38 | * to the University of California by American Telephone and Telegraph |
39 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
40 | * the permission of UNIX System Laboratories, Inc. |
41 | * |
42 | * Redistribution and use in source and binary forms, with or without |
43 | * modification, are permitted provided that the following conditions |
44 | * are met: |
45 | * 1. Redistributions of source code must retain the above copyright |
46 | * notice, this list of conditions and the following disclaimer. |
47 | * 2. Redistributions in binary form must reproduce the above copyright |
48 | * notice, this list of conditions and the following disclaimer in the |
49 | * documentation and/or other materials provided with the distribution. |
50 | * 3. Neither the name of the University nor the names of its contributors |
51 | * may be used to endorse or promote products derived from this software |
52 | * without specific prior written permission. |
53 | * |
54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
64 | * SUCH DAMAGE. |
65 | * |
66 | * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 |
67 | */ |
68 | |
69 | /* |
70 | * The vnode cache subsystem. |
71 | * |
72 | * Life-cycle |
73 | * |
74 | * Normally, there are two points where new vnodes are created: |
75 | * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode |
76 | * starts in one of the following ways: |
77 | * |
78 | * - Allocation, via vcache_get(9) or vcache_new(9). |
79 | * - Reclamation of inactive vnode, via vget(9). |
80 | * |
81 | * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9) |
82 | * was another, traditional way. Currently, only the draining thread |
83 | * recycles the vnodes. This behaviour might be revisited. |
84 | * |
85 | * The life-cycle ends when the last reference is dropped, usually |
86 | * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform |
87 | * the file system that vnode is inactive. Via this call, file system |
88 | * indicates whether vnode can be recycled (usually, it checks its own |
89 | * references, e.g. count of links, whether the file was removed). |
90 | * |
91 | * Depending on indication, vnode can be put into a free list (cache), |
92 | * or cleaned via vcache_reclaim, which calls VOP_RECLAIM(9) to |
93 | * disassociate underlying file system from the vnode, and finally |
94 | * destroyed. |
95 | * |
96 | * Vnode state |
97 | * |
98 | * Vnode is always in one of six states: |
99 | * - MARKER This is a marker vnode to help list traversal. It |
100 | * will never change its state. |
101 | * - LOADING Vnode is associating underlying file system and not |
102 | * yet ready to use. |
103 | * - ACTIVE Vnode has associated underlying file system and is |
104 | * ready to use. |
105 | * - BLOCKED Vnode is active but cannot get new references. |
106 | * - RECLAIMING Vnode is disassociating from the underlying file |
107 | * system. |
108 | * - RECLAIMED Vnode has disassociated from underlying file system |
109 | * and is dead. |
110 | * |
111 | * Valid state changes are: |
112 | * LOADING -> ACTIVE |
113 | * Vnode has been initialised in vcache_get() or |
114 | * vcache_new() and is ready to use. |
115 | * ACTIVE -> RECLAIMING |
116 | * Vnode starts disassociation from underlying file |
117 | * system in vcache_reclaim(). |
118 | * RECLAIMING -> RECLAIMED |
119 | * Vnode finished disassociation from underlying file |
120 | * system in vcache_reclaim(). |
121 | * ACTIVE -> BLOCKED |
122 | * Either vcache_rekey*() is changing the vnode key or |
123 | * vrelel() is about to call VOP_INACTIVE(). |
124 | * BLOCKED -> ACTIVE |
125 | * The block condition is over. |
126 | * LOADING -> RECLAIMED |
127 | * Either vcache_get() or vcache_new() failed to |
128 | * associate the underlying file system or vcache_rekey*() |
129 | * drops a vnode used as placeholder. |
130 | * |
131 | * Of these states LOADING, BLOCKED and RECLAIMING are intermediate |
132 | * and it is possible to wait for state change. |
133 | * |
134 | * State is protected with v_interlock with one exception: |
135 | * to change from LOADING both v_interlock and vcache.lock must be held |
136 | * so it is possible to check "state == LOADING" without holding |
137 | * v_interlock. See vcache_get() for details. |
138 | * |
139 | * Reference counting |
140 | * |
141 | * Vnode is considered active, if reference count (vnode_t::v_usecount) |
142 | * is non-zero. It is maintained using: vref(9) and vrele(9), as well |
143 | * as vput(9), routines. Common points holding references are e.g. |
144 | * file openings, current working directory, mount points, etc. |
145 | * |
146 | * Note on v_usecount and its locking |
147 | * |
148 | * At nearly all points it is known that v_usecount could be zero, |
149 | * the vnode_t::v_interlock will be held. To change v_usecount away |
150 | * from zero, the interlock must be held. To change from a non-zero |
151 | * value to zero, again the interlock must be held. |
152 | * |
153 | * Changing the usecount from a non-zero value to a non-zero value can |
154 | * safely be done using atomic operations, without the interlock held. |
155 | * |
156 | */ |
157 | |
158 | #include <sys/cdefs.h> |
159 | __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.59 2016/11/03 11:04:21 hannken Exp $" ); |
160 | |
161 | #include <sys/param.h> |
162 | #include <sys/kernel.h> |
163 | |
164 | #include <sys/atomic.h> |
165 | #include <sys/buf.h> |
166 | #include <sys/conf.h> |
167 | #include <sys/device.h> |
168 | #include <sys/hash.h> |
169 | #include <sys/kauth.h> |
170 | #include <sys/kmem.h> |
171 | #include <sys/kthread.h> |
172 | #include <sys/module.h> |
173 | #include <sys/mount.h> |
174 | #include <sys/namei.h> |
175 | #include <sys/syscallargs.h> |
176 | #include <sys/sysctl.h> |
177 | #include <sys/systm.h> |
178 | #include <sys/vnode_impl.h> |
179 | #include <sys/wapbl.h> |
180 | #include <sys/fstrans.h> |
181 | |
182 | #include <uvm/uvm.h> |
183 | #include <uvm/uvm_readahead.h> |
184 | |
185 | /* Flags to vrelel. */ |
186 | #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */ |
187 | |
188 | u_int numvnodes __cacheline_aligned; |
189 | |
190 | /* |
191 | * There are two free lists: one is for vnodes which have no buffer/page |
192 | * references and one for those which do (i.e. v_holdcnt is non-zero). |
193 | * Vnode recycling mechanism first attempts to look into the former list. |
194 | */ |
195 | static kmutex_t vnode_free_list_lock __cacheline_aligned; |
196 | static vnodelst_t vnode_free_list __cacheline_aligned; |
197 | static vnodelst_t vnode_hold_list __cacheline_aligned; |
198 | static kcondvar_t vdrain_cv __cacheline_aligned; |
199 | |
200 | static vnodelst_t vrele_list __cacheline_aligned; |
201 | static kmutex_t vrele_lock __cacheline_aligned; |
202 | static kcondvar_t vrele_cv __cacheline_aligned; |
203 | static lwp_t * vrele_lwp __cacheline_aligned; |
204 | static int vrele_pending __cacheline_aligned; |
205 | static int vrele_gen __cacheline_aligned; |
206 | |
207 | SLIST_HEAD(hashhead, vnode_impl); |
208 | static struct { |
209 | kmutex_t lock; |
210 | kcondvar_t cv; |
211 | u_long hashmask; |
212 | struct hashhead *hashtab; |
213 | pool_cache_t pool; |
214 | } vcache __cacheline_aligned; |
215 | |
216 | static int cleanvnode(void); |
217 | static vnode_impl_t *vcache_alloc(void); |
218 | static void vcache_free(vnode_impl_t *); |
219 | static void vcache_init(void); |
220 | static void vcache_reinit(void); |
221 | static void vcache_reclaim(vnode_t *); |
222 | static void vrelel(vnode_t *, int); |
223 | static void vdrain_thread(void *); |
224 | static void vrele_thread(void *); |
225 | static void vnpanic(vnode_t *, const char *, ...) |
226 | __printflike(2, 3); |
227 | |
228 | /* Routines having to do with the management of the vnode table. */ |
229 | extern struct mount *dead_rootmount; |
230 | extern int (**dead_vnodeop_p)(void *); |
231 | extern struct vfsops dead_vfsops; |
232 | |
233 | /* Vnode state operations and diagnostics. */ |
234 | |
235 | #if defined(DIAGNOSTIC) |
236 | |
237 | #define VSTATE_GET(vp) \ |
238 | vstate_assert_get((vp), __func__, __LINE__) |
239 | #define VSTATE_CHANGE(vp, from, to) \ |
240 | vstate_assert_change((vp), (from), (to), __func__, __LINE__) |
241 | #define VSTATE_WAIT_STABLE(vp) \ |
242 | vstate_assert_wait_stable((vp), __func__, __LINE__) |
243 | #define VSTATE_ASSERT(vp, state) \ |
244 | vstate_assert((vp), (state), __func__, __LINE__) |
245 | |
246 | static void |
247 | vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line) |
248 | { |
249 | vnode_impl_t *node = VNODE_TO_VIMPL(vp); |
250 | |
251 | KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d" , func, line); |
252 | |
253 | if (__predict_true(node->vi_state == state)) |
254 | return; |
255 | vnpanic(vp, "state is %s, expected %s at %s:%d" , |
256 | vstate_name(node->vi_state), vstate_name(state), func, line); |
257 | } |
258 | |
259 | static enum vnode_state |
260 | vstate_assert_get(vnode_t *vp, const char *func, int line) |
261 | { |
262 | vnode_impl_t *node = VNODE_TO_VIMPL(vp); |
263 | |
264 | KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d" , func, line); |
265 | if (node->vi_state == VS_MARKER) |
266 | vnpanic(vp, "state is %s at %s:%d" , |
267 | vstate_name(node->vi_state), func, line); |
268 | |
269 | return node->vi_state; |
270 | } |
271 | |
272 | static void |
273 | vstate_assert_wait_stable(vnode_t *vp, const char *func, int line) |
274 | { |
275 | vnode_impl_t *node = VNODE_TO_VIMPL(vp); |
276 | |
277 | KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d" , func, line); |
278 | if (node->vi_state == VS_MARKER) |
279 | vnpanic(vp, "state is %s at %s:%d" , |
280 | vstate_name(node->vi_state), func, line); |
281 | |
282 | while (node->vi_state != VS_ACTIVE && node->vi_state != VS_RECLAIMED) |
283 | cv_wait(&vp->v_cv, vp->v_interlock); |
284 | |
285 | if (node->vi_state == VS_MARKER) |
286 | vnpanic(vp, "state is %s at %s:%d" , |
287 | vstate_name(node->vi_state), func, line); |
288 | } |
289 | |
290 | static void |
291 | vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to, |
292 | const char *func, int line) |
293 | { |
294 | vnode_impl_t *node = VNODE_TO_VIMPL(vp); |
295 | |
296 | KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d" , func, line); |
297 | if (from == VS_LOADING) |
298 | KASSERTMSG(mutex_owned(&vcache.lock), "at %s:%d" , func, line); |
299 | |
300 | if (from == VS_MARKER) |
301 | vnpanic(vp, "from is %s at %s:%d" , |
302 | vstate_name(from), func, line); |
303 | if (to == VS_MARKER) |
304 | vnpanic(vp, "to is %s at %s:%d" , |
305 | vstate_name(to), func, line); |
306 | if (node->vi_state != from) |
307 | vnpanic(vp, "from is %s, expected %s at %s:%d\n" , |
308 | vstate_name(node->vi_state), vstate_name(from), func, line); |
309 | |
310 | node->vi_state = to; |
311 | if (from == VS_LOADING) |
312 | cv_broadcast(&vcache.cv); |
313 | if (to == VS_ACTIVE || to == VS_RECLAIMED) |
314 | cv_broadcast(&vp->v_cv); |
315 | } |
316 | |
317 | #else /* defined(DIAGNOSTIC) */ |
318 | |
319 | #define VSTATE_GET(vp) \ |
320 | (VNODE_TO_VIMPL((vp))->vi_state) |
321 | #define VSTATE_CHANGE(vp, from, to) \ |
322 | vstate_change((vp), (from), (to)) |
323 | #define VSTATE_WAIT_STABLE(vp) \ |
324 | vstate_wait_stable((vp)) |
325 | #define VSTATE_ASSERT(vp, state) |
326 | |
327 | static void |
328 | vstate_wait_stable(vnode_t *vp) |
329 | { |
330 | vnode_impl_t *node = VNODE_TO_VIMPL(vp); |
331 | |
332 | while (node->vi_state != VS_ACTIVE && node->vi_state != VS_RECLAIMED) |
333 | cv_wait(&vp->v_cv, vp->v_interlock); |
334 | } |
335 | |
336 | static void |
337 | vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to) |
338 | { |
339 | vnode_impl_t *node = VNODE_TO_VIMPL(vp); |
340 | |
341 | node->vi_state = to; |
342 | if (from == VS_LOADING) |
343 | cv_broadcast(&vcache.cv); |
344 | if (to == VS_ACTIVE || to == VS_RECLAIMED) |
345 | cv_broadcast(&vp->v_cv); |
346 | } |
347 | |
348 | #endif /* defined(DIAGNOSTIC) */ |
349 | |
350 | void |
351 | vfs_vnode_sysinit(void) |
352 | { |
353 | int error __diagused; |
354 | |
355 | dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL); |
356 | KASSERT(dead_rootmount != NULL); |
357 | dead_rootmount->mnt_iflag = IMNT_MPSAFE; |
358 | |
359 | mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); |
360 | TAILQ_INIT(&vnode_free_list); |
361 | TAILQ_INIT(&vnode_hold_list); |
362 | TAILQ_INIT(&vrele_list); |
363 | |
364 | vcache_init(); |
365 | |
366 | mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); |
367 | cv_init(&vdrain_cv, "vdrain" ); |
368 | cv_init(&vrele_cv, "vrele" ); |
369 | error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread, |
370 | NULL, NULL, "vdrain" ); |
371 | KASSERTMSG((error == 0), "kthread_create(vdrain) failed: %d" , error); |
372 | error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, |
373 | NULL, &vrele_lwp, "vrele" ); |
374 | KASSERTMSG((error == 0), "kthread_create(vrele) failed: %d" , error); |
375 | } |
376 | |
377 | /* |
378 | * Allocate a new marker vnode. |
379 | */ |
380 | vnode_t * |
381 | vnalloc_marker(struct mount *mp) |
382 | { |
383 | vnode_impl_t *node; |
384 | vnode_t *vp; |
385 | |
386 | node = pool_cache_get(vcache.pool, PR_WAITOK); |
387 | memset(node, 0, sizeof(*node)); |
388 | vp = VIMPL_TO_VNODE(node); |
389 | uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); |
390 | vp->v_mount = mp; |
391 | vp->v_type = VBAD; |
392 | node->vi_state = VS_MARKER; |
393 | |
394 | return vp; |
395 | } |
396 | |
397 | /* |
398 | * Free a marker vnode. |
399 | */ |
400 | void |
401 | vnfree_marker(vnode_t *vp) |
402 | { |
403 | vnode_impl_t *node; |
404 | |
405 | node = VNODE_TO_VIMPL(vp); |
406 | KASSERT(node->vi_state == VS_MARKER); |
407 | uvm_obj_destroy(&vp->v_uobj, true); |
408 | pool_cache_put(vcache.pool, node); |
409 | } |
410 | |
411 | /* |
412 | * Test a vnode for being a marker vnode. |
413 | */ |
414 | bool |
415 | vnis_marker(vnode_t *vp) |
416 | { |
417 | |
418 | return (VNODE_TO_VIMPL(vp)->vi_state == VS_MARKER); |
419 | } |
420 | |
421 | /* |
422 | * cleanvnode: grab a vnode from freelist, clean and free it. |
423 | * |
424 | * => Releases vnode_free_list_lock. |
425 | */ |
426 | static int |
427 | cleanvnode(void) |
428 | { |
429 | vnode_t *vp; |
430 | vnodelst_t *listhd; |
431 | struct mount *mp; |
432 | |
433 | KASSERT(mutex_owned(&vnode_free_list_lock)); |
434 | |
435 | listhd = &vnode_free_list; |
436 | try_nextlist: |
437 | TAILQ_FOREACH(vp, listhd, v_freelist) { |
438 | /* |
439 | * It's safe to test v_usecount and v_iflag |
440 | * without holding the interlock here, since |
441 | * these vnodes should never appear on the |
442 | * lists. |
443 | */ |
444 | KASSERT(vp->v_usecount == 0); |
445 | KASSERT(vp->v_freelisthd == listhd); |
446 | |
447 | if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) |
448 | continue; |
449 | if (!mutex_tryenter(vp->v_interlock)) { |
450 | VOP_UNLOCK(vp); |
451 | continue; |
452 | } |
453 | mp = vp->v_mount; |
454 | if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { |
455 | mutex_exit(vp->v_interlock); |
456 | VOP_UNLOCK(vp); |
457 | continue; |
458 | } |
459 | break; |
460 | } |
461 | |
462 | if (vp == NULL) { |
463 | if (listhd == &vnode_free_list) { |
464 | listhd = &vnode_hold_list; |
465 | goto try_nextlist; |
466 | } |
467 | mutex_exit(&vnode_free_list_lock); |
468 | return EBUSY; |
469 | } |
470 | |
471 | /* Remove it from the freelist. */ |
472 | TAILQ_REMOVE(listhd, vp, v_freelist); |
473 | vp->v_freelisthd = NULL; |
474 | mutex_exit(&vnode_free_list_lock); |
475 | |
476 | KASSERT(vp->v_usecount == 0); |
477 | |
478 | /* |
479 | * The vnode is still associated with a file system, so we must |
480 | * clean it out before freeing it. We need to add a reference |
481 | * before doing this. |
482 | */ |
483 | vp->v_usecount = 1; |
484 | vcache_reclaim(vp); |
485 | vrelel(vp, 0); |
486 | fstrans_done(mp); |
487 | |
488 | return 0; |
489 | } |
490 | |
491 | /* |
492 | * Helper thread to keep the number of vnodes below desiredvnodes. |
493 | */ |
494 | static void |
495 | vdrain_thread(void *cookie) |
496 | { |
497 | int error; |
498 | |
499 | mutex_enter(&vnode_free_list_lock); |
500 | |
501 | for (;;) { |
502 | cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz); |
503 | while (numvnodes > desiredvnodes) { |
504 | error = cleanvnode(); |
505 | if (error) |
506 | kpause("vndsbusy" , false, hz, NULL); |
507 | mutex_enter(&vnode_free_list_lock); |
508 | if (error) |
509 | break; |
510 | } |
511 | } |
512 | } |
513 | |
514 | /* |
515 | * Remove a vnode from its freelist. |
516 | */ |
517 | void |
518 | vremfree(vnode_t *vp) |
519 | { |
520 | |
521 | KASSERT(mutex_owned(vp->v_interlock)); |
522 | KASSERT(vp->v_usecount == 0); |
523 | |
524 | /* |
525 | * Note that the reference count must not change until |
526 | * the vnode is removed. |
527 | */ |
528 | mutex_enter(&vnode_free_list_lock); |
529 | if (vp->v_holdcnt > 0) { |
530 | KASSERT(vp->v_freelisthd == &vnode_hold_list); |
531 | } else { |
532 | KASSERT(vp->v_freelisthd == &vnode_free_list); |
533 | } |
534 | TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); |
535 | vp->v_freelisthd = NULL; |
536 | mutex_exit(&vnode_free_list_lock); |
537 | } |
538 | |
539 | /* |
540 | * vget: get a particular vnode from the free list, increment its reference |
541 | * count and return it. |
542 | * |
543 | * => Must be called with v_interlock held. |
544 | * |
545 | * If state is VS_RECLAIMING, the vnode may be eliminated in vcache_reclaim(). |
546 | * In that case, we cannot grab the vnode, so the process is awakened when |
547 | * the transition is completed, and an error returned to indicate that the |
548 | * vnode is no longer usable. |
549 | * |
550 | * If state is VS_LOADING or VS_BLOCKED, wait until the vnode enters a |
551 | * stable state (VS_ACTIVE or VS_RECLAIMED). |
552 | */ |
553 | int |
554 | vget(vnode_t *vp, int flags, bool waitok) |
555 | { |
556 | |
557 | KASSERT(mutex_owned(vp->v_interlock)); |
558 | KASSERT((flags & ~LK_NOWAIT) == 0); |
559 | KASSERT(waitok == ((flags & LK_NOWAIT) == 0)); |
560 | |
561 | /* |
562 | * Before adding a reference, we must remove the vnode |
563 | * from its freelist. |
564 | */ |
565 | if (vp->v_usecount == 0) { |
566 | vremfree(vp); |
567 | vp->v_usecount = 1; |
568 | } else { |
569 | atomic_inc_uint(&vp->v_usecount); |
570 | } |
571 | |
572 | /* |
573 | * If the vnode is in the process of changing state we wait |
574 | * for the change to complete and take care not to return |
575 | * a clean vnode. |
576 | */ |
577 | if (! ISSET(flags, LK_NOWAIT)) |
578 | VSTATE_WAIT_STABLE(vp); |
579 | if (VSTATE_GET(vp) == VS_RECLAIMED) { |
580 | vrelel(vp, 0); |
581 | return ENOENT; |
582 | } else if (VSTATE_GET(vp) != VS_ACTIVE) { |
583 | KASSERT(ISSET(flags, LK_NOWAIT)); |
584 | vrelel(vp, 0); |
585 | return EBUSY; |
586 | } |
587 | |
588 | /* |
589 | * Ok, we got it in good shape. |
590 | */ |
591 | VSTATE_ASSERT(vp, VS_ACTIVE); |
592 | mutex_exit(vp->v_interlock); |
593 | |
594 | return 0; |
595 | } |
596 | |
597 | /* |
598 | * vput: unlock and release the reference. |
599 | */ |
600 | void |
601 | vput(vnode_t *vp) |
602 | { |
603 | |
604 | VOP_UNLOCK(vp); |
605 | vrele(vp); |
606 | } |
607 | |
608 | /* |
609 | * Try to drop reference on a vnode. Abort if we are releasing the |
610 | * last reference. Note: this _must_ succeed if not the last reference. |
611 | */ |
612 | static inline bool |
613 | vtryrele(vnode_t *vp) |
614 | { |
615 | u_int use, next; |
616 | |
617 | for (use = vp->v_usecount;; use = next) { |
618 | if (use == 1) { |
619 | return false; |
620 | } |
621 | KASSERT(use > 1); |
622 | next = atomic_cas_uint(&vp->v_usecount, use, use - 1); |
623 | if (__predict_true(next == use)) { |
624 | return true; |
625 | } |
626 | } |
627 | } |
628 | |
629 | /* |
630 | * Vnode release. If reference count drops to zero, call inactive |
631 | * routine and either return to freelist or free to the pool. |
632 | */ |
633 | static void |
634 | vrelel(vnode_t *vp, int flags) |
635 | { |
636 | bool recycle, defer; |
637 | int error; |
638 | |
639 | KASSERT(mutex_owned(vp->v_interlock)); |
640 | KASSERT(vp->v_freelisthd == NULL); |
641 | |
642 | if (__predict_false(vp->v_op == dead_vnodeop_p && |
643 | VSTATE_GET(vp) != VS_RECLAIMED)) { |
644 | vnpanic(vp, "dead but not clean" ); |
645 | } |
646 | |
647 | /* |
648 | * If not the last reference, just drop the reference count |
649 | * and unlock. |
650 | */ |
651 | if (vtryrele(vp)) { |
652 | mutex_exit(vp->v_interlock); |
653 | return; |
654 | } |
655 | if (vp->v_usecount <= 0 || vp->v_writecount != 0) { |
656 | vnpanic(vp, "%s: bad ref count" , __func__); |
657 | } |
658 | |
659 | #ifdef DIAGNOSTIC |
660 | if ((vp->v_type == VBLK || vp->v_type == VCHR) && |
661 | vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { |
662 | vprint("vrelel: missing VOP_CLOSE()" , vp); |
663 | } |
664 | #endif |
665 | |
666 | /* |
667 | * If not clean, deactivate the vnode, but preserve |
668 | * our reference across the call to VOP_INACTIVE(). |
669 | */ |
670 | if (VSTATE_GET(vp) != VS_RECLAIMED) { |
671 | recycle = false; |
672 | |
673 | /* |
674 | * XXX This ugly block can be largely eliminated if |
675 | * locking is pushed down into the file systems. |
676 | * |
677 | * Defer vnode release to vrele_thread if caller |
678 | * requests it explicitly or is the pagedaemon. |
679 | */ |
680 | if ((curlwp == uvm.pagedaemon_lwp) || |
681 | (flags & VRELEL_ASYNC_RELE) != 0) { |
682 | defer = true; |
683 | } else if (curlwp == vrele_lwp) { |
684 | /* |
685 | * We have to try harder. |
686 | */ |
687 | mutex_exit(vp->v_interlock); |
688 | error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
689 | KASSERTMSG((error == 0), "vn_lock failed: %d" , error); |
690 | mutex_enter(vp->v_interlock); |
691 | defer = false; |
692 | } else { |
693 | /* If we can't acquire the lock, then defer. */ |
694 | mutex_exit(vp->v_interlock); |
695 | error = vn_lock(vp, |
696 | LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); |
697 | defer = (error != 0); |
698 | mutex_enter(vp->v_interlock); |
699 | } |
700 | |
701 | KASSERT(mutex_owned(vp->v_interlock)); |
702 | KASSERT(! (curlwp == vrele_lwp && defer)); |
703 | |
704 | if (defer) { |
705 | /* |
706 | * Defer reclaim to the kthread; it's not safe to |
707 | * clean it here. We donate it our last reference. |
708 | */ |
709 | mutex_enter(&vrele_lock); |
710 | TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); |
711 | if (++vrele_pending > (desiredvnodes >> 8)) |
712 | cv_signal(&vrele_cv); |
713 | mutex_exit(&vrele_lock); |
714 | mutex_exit(vp->v_interlock); |
715 | return; |
716 | } |
717 | |
718 | /* |
719 | * If the node got another reference while we |
720 | * released the interlock, don't try to inactivate it yet. |
721 | */ |
722 | if (__predict_false(vtryrele(vp))) { |
723 | VOP_UNLOCK(vp); |
724 | mutex_exit(vp->v_interlock); |
725 | return; |
726 | } |
727 | VSTATE_CHANGE(vp, VS_ACTIVE, VS_BLOCKED); |
728 | mutex_exit(vp->v_interlock); |
729 | |
730 | /* |
731 | * The vnode must not gain another reference while being |
732 | * deactivated. If VOP_INACTIVE() indicates that |
733 | * the described file has been deleted, then recycle |
734 | * the vnode. |
735 | * |
736 | * Note that VOP_INACTIVE() will drop the vnode lock. |
737 | */ |
738 | VOP_INACTIVE(vp, &recycle); |
739 | if (recycle) { |
740 | /* vcache_reclaim() below will drop the lock. */ |
741 | if (vn_lock(vp, LK_EXCLUSIVE) != 0) |
742 | recycle = false; |
743 | } |
744 | mutex_enter(vp->v_interlock); |
745 | VSTATE_CHANGE(vp, VS_BLOCKED, VS_ACTIVE); |
746 | if (!recycle) { |
747 | if (vtryrele(vp)) { |
748 | mutex_exit(vp->v_interlock); |
749 | return; |
750 | } |
751 | } |
752 | |
753 | /* Take care of space accounting. */ |
754 | if (vp->v_iflag & VI_EXECMAP) { |
755 | atomic_add_int(&uvmexp.execpages, |
756 | -vp->v_uobj.uo_npages); |
757 | atomic_add_int(&uvmexp.filepages, |
758 | vp->v_uobj.uo_npages); |
759 | } |
760 | vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); |
761 | vp->v_vflag &= ~VV_MAPPED; |
762 | |
763 | /* |
764 | * Recycle the vnode if the file is now unused (unlinked), |
765 | * otherwise just free it. |
766 | */ |
767 | if (recycle) { |
768 | VSTATE_ASSERT(vp, VS_ACTIVE); |
769 | vcache_reclaim(vp); |
770 | } |
771 | KASSERT(vp->v_usecount > 0); |
772 | } |
773 | |
774 | if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { |
775 | /* Gained another reference while being reclaimed. */ |
776 | mutex_exit(vp->v_interlock); |
777 | return; |
778 | } |
779 | |
780 | if (VSTATE_GET(vp) == VS_RECLAIMED) { |
781 | /* |
782 | * It's clean so destroy it. It isn't referenced |
783 | * anywhere since it has been reclaimed. |
784 | */ |
785 | KASSERT(vp->v_holdcnt == 0); |
786 | KASSERT(vp->v_writecount == 0); |
787 | mutex_exit(vp->v_interlock); |
788 | vfs_insmntque(vp, NULL); |
789 | if (vp->v_type == VBLK || vp->v_type == VCHR) { |
790 | spec_node_destroy(vp); |
791 | } |
792 | vcache_free(VNODE_TO_VIMPL(vp)); |
793 | } else { |
794 | /* |
795 | * Otherwise, put it back onto the freelist. It |
796 | * can't be destroyed while still associated with |
797 | * a file system. |
798 | */ |
799 | mutex_enter(&vnode_free_list_lock); |
800 | if (vp->v_holdcnt > 0) { |
801 | vp->v_freelisthd = &vnode_hold_list; |
802 | } else { |
803 | vp->v_freelisthd = &vnode_free_list; |
804 | } |
805 | TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); |
806 | mutex_exit(&vnode_free_list_lock); |
807 | mutex_exit(vp->v_interlock); |
808 | } |
809 | } |
810 | |
811 | void |
812 | vrele(vnode_t *vp) |
813 | { |
814 | |
815 | if (vtryrele(vp)) { |
816 | return; |
817 | } |
818 | mutex_enter(vp->v_interlock); |
819 | vrelel(vp, 0); |
820 | } |
821 | |
822 | /* |
823 | * Asynchronous vnode release, vnode is released in different context. |
824 | */ |
825 | void |
826 | vrele_async(vnode_t *vp) |
827 | { |
828 | |
829 | if (vtryrele(vp)) { |
830 | return; |
831 | } |
832 | mutex_enter(vp->v_interlock); |
833 | vrelel(vp, VRELEL_ASYNC_RELE); |
834 | } |
835 | |
836 | static void |
837 | vrele_thread(void *cookie) |
838 | { |
839 | vnodelst_t skip_list; |
840 | vnode_t *vp; |
841 | struct mount *mp; |
842 | |
843 | TAILQ_INIT(&skip_list); |
844 | |
845 | mutex_enter(&vrele_lock); |
846 | for (;;) { |
847 | while (TAILQ_EMPTY(&vrele_list)) { |
848 | vrele_gen++; |
849 | cv_broadcast(&vrele_cv); |
850 | cv_timedwait(&vrele_cv, &vrele_lock, hz); |
851 | TAILQ_CONCAT(&vrele_list, &skip_list, v_freelist); |
852 | } |
853 | vp = TAILQ_FIRST(&vrele_list); |
854 | mp = vp->v_mount; |
855 | TAILQ_REMOVE(&vrele_list, vp, v_freelist); |
856 | if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0) { |
857 | TAILQ_INSERT_TAIL(&skip_list, vp, v_freelist); |
858 | continue; |
859 | } |
860 | vrele_pending--; |
861 | mutex_exit(&vrele_lock); |
862 | |
863 | /* |
864 | * If not the last reference, then ignore the vnode |
865 | * and look for more work. |
866 | */ |
867 | mutex_enter(vp->v_interlock); |
868 | vrelel(vp, 0); |
869 | fstrans_done(mp); |
870 | mutex_enter(&vrele_lock); |
871 | } |
872 | } |
873 | |
874 | void |
875 | vrele_flush(void) |
876 | { |
877 | int gen; |
878 | |
879 | mutex_enter(&vrele_lock); |
880 | gen = vrele_gen; |
881 | while (vrele_pending && gen == vrele_gen) { |
882 | cv_broadcast(&vrele_cv); |
883 | cv_wait(&vrele_cv, &vrele_lock); |
884 | } |
885 | mutex_exit(&vrele_lock); |
886 | } |
887 | |
888 | /* |
889 | * Vnode reference, where a reference is already held by some other |
890 | * object (for example, a file structure). |
891 | */ |
892 | void |
893 | vref(vnode_t *vp) |
894 | { |
895 | |
896 | KASSERT(vp->v_usecount != 0); |
897 | |
898 | atomic_inc_uint(&vp->v_usecount); |
899 | } |
900 | |
901 | /* |
902 | * Page or buffer structure gets a reference. |
903 | * Called with v_interlock held. |
904 | */ |
905 | void |
906 | vholdl(vnode_t *vp) |
907 | { |
908 | |
909 | KASSERT(mutex_owned(vp->v_interlock)); |
910 | |
911 | if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { |
912 | mutex_enter(&vnode_free_list_lock); |
913 | KASSERT(vp->v_freelisthd == &vnode_free_list); |
914 | TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); |
915 | vp->v_freelisthd = &vnode_hold_list; |
916 | TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); |
917 | mutex_exit(&vnode_free_list_lock); |
918 | } |
919 | } |
920 | |
921 | /* |
922 | * Page or buffer structure frees a reference. |
923 | * Called with v_interlock held. |
924 | */ |
925 | void |
926 | holdrelel(vnode_t *vp) |
927 | { |
928 | |
929 | KASSERT(mutex_owned(vp->v_interlock)); |
930 | |
931 | if (vp->v_holdcnt <= 0) { |
932 | vnpanic(vp, "%s: holdcnt vp %p" , __func__, vp); |
933 | } |
934 | |
935 | vp->v_holdcnt--; |
936 | if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { |
937 | mutex_enter(&vnode_free_list_lock); |
938 | KASSERT(vp->v_freelisthd == &vnode_hold_list); |
939 | TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); |
940 | vp->v_freelisthd = &vnode_free_list; |
941 | TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); |
942 | mutex_exit(&vnode_free_list_lock); |
943 | } |
944 | } |
945 | |
946 | /* |
947 | * Recycle an unused vnode if caller holds the last reference. |
948 | */ |
949 | bool |
950 | vrecycle(vnode_t *vp) |
951 | { |
952 | |
953 | if (vn_lock(vp, LK_EXCLUSIVE) != 0) |
954 | return false; |
955 | |
956 | mutex_enter(vp->v_interlock); |
957 | |
958 | if (vp->v_usecount != 1) { |
959 | mutex_exit(vp->v_interlock); |
960 | VOP_UNLOCK(vp); |
961 | return false; |
962 | } |
963 | vcache_reclaim(vp); |
964 | vrelel(vp, 0); |
965 | return true; |
966 | } |
967 | |
968 | /* |
969 | * Eliminate all activity associated with the requested vnode |
970 | * and with all vnodes aliased to the requested vnode. |
971 | */ |
972 | void |
973 | vrevoke(vnode_t *vp) |
974 | { |
975 | vnode_t *vq; |
976 | enum vtype type; |
977 | dev_t dev; |
978 | |
979 | KASSERT(vp->v_usecount > 0); |
980 | |
981 | mutex_enter(vp->v_interlock); |
982 | VSTATE_WAIT_STABLE(vp); |
983 | if (VSTATE_GET(vp) == VS_RECLAIMED) { |
984 | mutex_exit(vp->v_interlock); |
985 | return; |
986 | } else if (vp->v_type != VBLK && vp->v_type != VCHR) { |
987 | atomic_inc_uint(&vp->v_usecount); |
988 | mutex_exit(vp->v_interlock); |
989 | vgone(vp); |
990 | return; |
991 | } else { |
992 | dev = vp->v_rdev; |
993 | type = vp->v_type; |
994 | mutex_exit(vp->v_interlock); |
995 | } |
996 | |
997 | while (spec_node_lookup_by_dev(type, dev, &vq) == 0) { |
998 | vgone(vq); |
999 | } |
1000 | } |
1001 | |
1002 | /* |
1003 | * Eliminate all activity associated with a vnode in preparation for |
1004 | * reuse. Drops a reference from the vnode. |
1005 | */ |
1006 | void |
1007 | vgone(vnode_t *vp) |
1008 | { |
1009 | |
1010 | if (vn_lock(vp, LK_EXCLUSIVE) != 0) { |
1011 | VSTATE_ASSERT(vp, VS_RECLAIMED); |
1012 | vrele(vp); |
1013 | } |
1014 | |
1015 | mutex_enter(vp->v_interlock); |
1016 | vcache_reclaim(vp); |
1017 | vrelel(vp, 0); |
1018 | } |
1019 | |
1020 | static inline uint32_t |
1021 | vcache_hash(const struct vcache_key *key) |
1022 | { |
1023 | uint32_t hash = HASH32_BUF_INIT; |
1024 | |
1025 | hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash); |
1026 | hash = hash32_buf(key->vk_key, key->vk_key_len, hash); |
1027 | return hash; |
1028 | } |
1029 | |
1030 | static void |
1031 | vcache_init(void) |
1032 | { |
1033 | |
1034 | vcache.pool = pool_cache_init(sizeof(vnode_impl_t), 0, 0, 0, |
1035 | "vcachepl" , NULL, IPL_NONE, NULL, NULL, NULL); |
1036 | KASSERT(vcache.pool != NULL); |
1037 | mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE); |
1038 | cv_init(&vcache.cv, "vcache" ); |
1039 | vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true, |
1040 | &vcache.hashmask); |
1041 | } |
1042 | |
1043 | static void |
1044 | vcache_reinit(void) |
1045 | { |
1046 | int i; |
1047 | uint32_t hash; |
1048 | u_long oldmask, newmask; |
1049 | struct hashhead *oldtab, *newtab; |
1050 | vnode_impl_t *node; |
1051 | |
1052 | newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask); |
1053 | mutex_enter(&vcache.lock); |
1054 | oldtab = vcache.hashtab; |
1055 | oldmask = vcache.hashmask; |
1056 | vcache.hashtab = newtab; |
1057 | vcache.hashmask = newmask; |
1058 | for (i = 0; i <= oldmask; i++) { |
1059 | while ((node = SLIST_FIRST(&oldtab[i])) != NULL) { |
1060 | SLIST_REMOVE(&oldtab[i], node, vnode_impl, vi_hash); |
1061 | hash = vcache_hash(&node->vi_key); |
1062 | SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask], |
1063 | node, vi_hash); |
1064 | } |
1065 | } |
1066 | mutex_exit(&vcache.lock); |
1067 | hashdone(oldtab, HASH_SLIST, oldmask); |
1068 | } |
1069 | |
1070 | static inline vnode_impl_t * |
1071 | vcache_hash_lookup(const struct vcache_key *key, uint32_t hash) |
1072 | { |
1073 | struct hashhead *hashp; |
1074 | vnode_impl_t *node; |
1075 | |
1076 | KASSERT(mutex_owned(&vcache.lock)); |
1077 | |
1078 | hashp = &vcache.hashtab[hash & vcache.hashmask]; |
1079 | SLIST_FOREACH(node, hashp, vi_hash) { |
1080 | if (key->vk_mount != node->vi_key.vk_mount) |
1081 | continue; |
1082 | if (key->vk_key_len != node->vi_key.vk_key_len) |
1083 | continue; |
1084 | if (memcmp(key->vk_key, node->vi_key.vk_key, key->vk_key_len)) |
1085 | continue; |
1086 | return node; |
1087 | } |
1088 | return NULL; |
1089 | } |
1090 | |
1091 | /* |
1092 | * Allocate a new, uninitialized vcache node. |
1093 | */ |
1094 | static vnode_impl_t * |
1095 | vcache_alloc(void) |
1096 | { |
1097 | vnode_impl_t *node; |
1098 | vnode_t *vp; |
1099 | |
1100 | node = pool_cache_get(vcache.pool, PR_WAITOK); |
1101 | memset(node, 0, sizeof(*node)); |
1102 | |
1103 | /* SLIST_INIT(&node->vi_hash); */ |
1104 | |
1105 | vp = VIMPL_TO_VNODE(node); |
1106 | uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); |
1107 | cv_init(&vp->v_cv, "vnode" ); |
1108 | /* LIST_INIT(&vp->v_nclist); */ |
1109 | /* LIST_INIT(&vp->v_dnclist); */ |
1110 | |
1111 | mutex_enter(&vnode_free_list_lock); |
1112 | numvnodes++; |
1113 | if (numvnodes > desiredvnodes + desiredvnodes / 10) |
1114 | cv_signal(&vdrain_cv); |
1115 | mutex_exit(&vnode_free_list_lock); |
1116 | |
1117 | rw_init(&vp->v_lock); |
1118 | vp->v_usecount = 1; |
1119 | vp->v_type = VNON; |
1120 | vp->v_size = vp->v_writesize = VSIZENOTSET; |
1121 | |
1122 | node->vi_state = VS_LOADING; |
1123 | |
1124 | return node; |
1125 | } |
1126 | |
1127 | /* |
1128 | * Free an unused, unreferenced vcache node. |
1129 | */ |
1130 | static void |
1131 | vcache_free(vnode_impl_t *node) |
1132 | { |
1133 | vnode_t *vp; |
1134 | |
1135 | vp = VIMPL_TO_VNODE(node); |
1136 | |
1137 | KASSERT(vp->v_usecount == 0); |
1138 | |
1139 | rw_destroy(&vp->v_lock); |
1140 | mutex_enter(&vnode_free_list_lock); |
1141 | numvnodes--; |
1142 | mutex_exit(&vnode_free_list_lock); |
1143 | |
1144 | uvm_obj_destroy(&vp->v_uobj, true); |
1145 | cv_destroy(&vp->v_cv); |
1146 | pool_cache_put(vcache.pool, node); |
1147 | } |
1148 | |
1149 | /* |
1150 | * Get a vnode / fs node pair by key and return it referenced through vpp. |
1151 | */ |
1152 | int |
1153 | vcache_get(struct mount *mp, const void *key, size_t key_len, |
1154 | struct vnode **vpp) |
1155 | { |
1156 | int error; |
1157 | uint32_t hash; |
1158 | const void *new_key; |
1159 | struct vnode *vp; |
1160 | struct vcache_key vcache_key; |
1161 | vnode_impl_t *node, *new_node; |
1162 | |
1163 | new_key = NULL; |
1164 | *vpp = NULL; |
1165 | |
1166 | vcache_key.vk_mount = mp; |
1167 | vcache_key.vk_key = key; |
1168 | vcache_key.vk_key_len = key_len; |
1169 | hash = vcache_hash(&vcache_key); |
1170 | |
1171 | again: |
1172 | mutex_enter(&vcache.lock); |
1173 | node = vcache_hash_lookup(&vcache_key, hash); |
1174 | |
1175 | /* If found, take a reference or retry. */ |
1176 | if (__predict_true(node != NULL)) { |
1177 | /* |
1178 | * If the vnode is loading we cannot take the v_interlock |
1179 | * here as it might change during load (see uvm_obj_setlock()). |
1180 | * As changing state from VS_LOADING requires both vcache.lock |
1181 | * and v_interlock it is safe to test with vcache.lock held. |
1182 | * |
1183 | * Wait for vnodes changing state from VS_LOADING and retry. |
1184 | */ |
1185 | if (__predict_false(node->vi_state == VS_LOADING)) { |
1186 | cv_wait(&vcache.cv, &vcache.lock); |
1187 | mutex_exit(&vcache.lock); |
1188 | goto again; |
1189 | } |
1190 | vp = VIMPL_TO_VNODE(node); |
1191 | mutex_enter(vp->v_interlock); |
1192 | mutex_exit(&vcache.lock); |
1193 | error = vget(vp, 0, true /* wait */); |
1194 | if (error == ENOENT) |
1195 | goto again; |
1196 | if (error == 0) |
1197 | *vpp = vp; |
1198 | KASSERT((error != 0) == (*vpp == NULL)); |
1199 | return error; |
1200 | } |
1201 | mutex_exit(&vcache.lock); |
1202 | |
1203 | /* Allocate and initialize a new vcache / vnode pair. */ |
1204 | error = vfs_busy(mp, NULL); |
1205 | if (error) |
1206 | return error; |
1207 | new_node = vcache_alloc(); |
1208 | new_node->vi_key = vcache_key; |
1209 | vp = VIMPL_TO_VNODE(new_node); |
1210 | mutex_enter(&vcache.lock); |
1211 | node = vcache_hash_lookup(&vcache_key, hash); |
1212 | if (node == NULL) { |
1213 | SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], |
1214 | new_node, vi_hash); |
1215 | node = new_node; |
1216 | } |
1217 | |
1218 | /* If another thread beat us inserting this node, retry. */ |
1219 | if (node != new_node) { |
1220 | mutex_enter(vp->v_interlock); |
1221 | VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED); |
1222 | mutex_exit(&vcache.lock); |
1223 | vrelel(vp, 0); |
1224 | vfs_unbusy(mp, false, NULL); |
1225 | goto again; |
1226 | } |
1227 | mutex_exit(&vcache.lock); |
1228 | |
1229 | /* Load the fs node. Exclusive as new_node is VS_LOADING. */ |
1230 | error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key); |
1231 | if (error) { |
1232 | mutex_enter(&vcache.lock); |
1233 | SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], |
1234 | new_node, vnode_impl, vi_hash); |
1235 | mutex_enter(vp->v_interlock); |
1236 | VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED); |
1237 | mutex_exit(&vcache.lock); |
1238 | vrelel(vp, 0); |
1239 | vfs_unbusy(mp, false, NULL); |
1240 | KASSERT(*vpp == NULL); |
1241 | return error; |
1242 | } |
1243 | KASSERT(new_key != NULL); |
1244 | KASSERT(memcmp(key, new_key, key_len) == 0); |
1245 | KASSERT(vp->v_op != NULL); |
1246 | vfs_insmntque(vp, mp); |
1247 | if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) |
1248 | vp->v_vflag |= VV_MPSAFE; |
1249 | vfs_unbusy(mp, true, NULL); |
1250 | |
1251 | /* Finished loading, finalize node. */ |
1252 | mutex_enter(&vcache.lock); |
1253 | new_node->vi_key.vk_key = new_key; |
1254 | mutex_enter(vp->v_interlock); |
1255 | VSTATE_CHANGE(vp, VS_LOADING, VS_ACTIVE); |
1256 | mutex_exit(vp->v_interlock); |
1257 | mutex_exit(&vcache.lock); |
1258 | *vpp = vp; |
1259 | return 0; |
1260 | } |
1261 | |
1262 | /* |
1263 | * Create a new vnode / fs node pair and return it referenced through vpp. |
1264 | */ |
1265 | int |
1266 | vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap, |
1267 | kauth_cred_t cred, struct vnode **vpp) |
1268 | { |
1269 | int error; |
1270 | uint32_t hash; |
1271 | struct vnode *ovp, *vp; |
1272 | vnode_impl_t *new_node; |
1273 | vnode_impl_t *old_node __diagused; |
1274 | |
1275 | *vpp = NULL; |
1276 | |
1277 | /* Allocate and initialize a new vcache / vnode pair. */ |
1278 | error = vfs_busy(mp, NULL); |
1279 | if (error) |
1280 | return error; |
1281 | new_node = vcache_alloc(); |
1282 | new_node->vi_key.vk_mount = mp; |
1283 | vp = VIMPL_TO_VNODE(new_node); |
1284 | |
1285 | /* Create and load the fs node. */ |
1286 | error = VFS_NEWVNODE(mp, dvp, vp, vap, cred, |
1287 | &new_node->vi_key.vk_key_len, &new_node->vi_key.vk_key); |
1288 | if (error) { |
1289 | mutex_enter(&vcache.lock); |
1290 | mutex_enter(vp->v_interlock); |
1291 | VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED); |
1292 | mutex_exit(&vcache.lock); |
1293 | vrelel(vp, 0); |
1294 | vfs_unbusy(mp, false, NULL); |
1295 | KASSERT(*vpp == NULL); |
1296 | return error; |
1297 | } |
1298 | KASSERT(new_node->vi_key.vk_key != NULL); |
1299 | KASSERT(vp->v_op != NULL); |
1300 | hash = vcache_hash(&new_node->vi_key); |
1301 | |
1302 | /* Wait for previous instance to be reclaimed, then insert new node. */ |
1303 | mutex_enter(&vcache.lock); |
1304 | while ((old_node = vcache_hash_lookup(&new_node->vi_key, hash))) { |
1305 | ovp = VIMPL_TO_VNODE(old_node); |
1306 | mutex_enter(ovp->v_interlock); |
1307 | mutex_exit(&vcache.lock); |
1308 | error = vget(ovp, 0, true /* wait */); |
1309 | KASSERT(error == ENOENT); |
1310 | mutex_enter(&vcache.lock); |
1311 | } |
1312 | SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], |
1313 | new_node, vi_hash); |
1314 | mutex_exit(&vcache.lock); |
1315 | vfs_insmntque(vp, mp); |
1316 | if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) |
1317 | vp->v_vflag |= VV_MPSAFE; |
1318 | vfs_unbusy(mp, true, NULL); |
1319 | |
1320 | /* Finished loading, finalize node. */ |
1321 | mutex_enter(&vcache.lock); |
1322 | mutex_enter(vp->v_interlock); |
1323 | VSTATE_CHANGE(vp, VS_LOADING, VS_ACTIVE); |
1324 | mutex_exit(&vcache.lock); |
1325 | mutex_exit(vp->v_interlock); |
1326 | *vpp = vp; |
1327 | return 0; |
1328 | } |
1329 | |
1330 | /* |
1331 | * Prepare key change: lock old and new cache node. |
1332 | * Return an error if the new node already exists. |
1333 | */ |
1334 | int |
1335 | vcache_rekey_enter(struct mount *mp, struct vnode *vp, |
1336 | const void *old_key, size_t old_key_len, |
1337 | const void *new_key, size_t new_key_len) |
1338 | { |
1339 | uint32_t old_hash, new_hash; |
1340 | struct vcache_key old_vcache_key, new_vcache_key; |
1341 | vnode_impl_t *node, *new_node; |
1342 | struct vnode *tvp; |
1343 | |
1344 | old_vcache_key.vk_mount = mp; |
1345 | old_vcache_key.vk_key = old_key; |
1346 | old_vcache_key.vk_key_len = old_key_len; |
1347 | old_hash = vcache_hash(&old_vcache_key); |
1348 | |
1349 | new_vcache_key.vk_mount = mp; |
1350 | new_vcache_key.vk_key = new_key; |
1351 | new_vcache_key.vk_key_len = new_key_len; |
1352 | new_hash = vcache_hash(&new_vcache_key); |
1353 | |
1354 | new_node = vcache_alloc(); |
1355 | new_node->vi_key = new_vcache_key; |
1356 | tvp = VIMPL_TO_VNODE(new_node); |
1357 | |
1358 | /* Insert locked new node used as placeholder. */ |
1359 | mutex_enter(&vcache.lock); |
1360 | node = vcache_hash_lookup(&new_vcache_key, new_hash); |
1361 | if (node != NULL) { |
1362 | mutex_enter(tvp->v_interlock); |
1363 | VSTATE_CHANGE(tvp, VS_LOADING, VS_RECLAIMED); |
1364 | mutex_exit(&vcache.lock); |
1365 | vrelel(tvp, 0); |
1366 | return EEXIST; |
1367 | } |
1368 | SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask], |
1369 | new_node, vi_hash); |
1370 | |
1371 | /* Lock old node. */ |
1372 | node = vcache_hash_lookup(&old_vcache_key, old_hash); |
1373 | KASSERT(node != NULL); |
1374 | KASSERT(VIMPL_TO_VNODE(node) == vp); |
1375 | mutex_enter(vp->v_interlock); |
1376 | VSTATE_CHANGE(vp, VS_ACTIVE, VS_BLOCKED); |
1377 | node->vi_key = old_vcache_key; |
1378 | mutex_exit(vp->v_interlock); |
1379 | mutex_exit(&vcache.lock); |
1380 | return 0; |
1381 | } |
1382 | |
1383 | /* |
1384 | * Key change complete: remove old node and unlock new node. |
1385 | */ |
1386 | void |
1387 | vcache_rekey_exit(struct mount *mp, struct vnode *vp, |
1388 | const void *old_key, size_t old_key_len, |
1389 | const void *new_key, size_t new_key_len) |
1390 | { |
1391 | uint32_t old_hash, new_hash; |
1392 | struct vcache_key old_vcache_key, new_vcache_key; |
1393 | vnode_impl_t *old_node, *new_node; |
1394 | struct vnode *tvp; |
1395 | |
1396 | old_vcache_key.vk_mount = mp; |
1397 | old_vcache_key.vk_key = old_key; |
1398 | old_vcache_key.vk_key_len = old_key_len; |
1399 | old_hash = vcache_hash(&old_vcache_key); |
1400 | |
1401 | new_vcache_key.vk_mount = mp; |
1402 | new_vcache_key.vk_key = new_key; |
1403 | new_vcache_key.vk_key_len = new_key_len; |
1404 | new_hash = vcache_hash(&new_vcache_key); |
1405 | |
1406 | mutex_enter(&vcache.lock); |
1407 | |
1408 | /* Lookup old and new node. */ |
1409 | old_node = vcache_hash_lookup(&old_vcache_key, old_hash); |
1410 | KASSERT(old_node != NULL); |
1411 | KASSERT(VIMPL_TO_VNODE(old_node) == vp); |
1412 | mutex_enter(vp->v_interlock); |
1413 | VSTATE_ASSERT(vp, VS_BLOCKED); |
1414 | |
1415 | new_node = vcache_hash_lookup(&new_vcache_key, new_hash); |
1416 | KASSERT(new_node != NULL); |
1417 | KASSERT(new_node->vi_key.vk_key_len == new_key_len); |
1418 | tvp = VIMPL_TO_VNODE(new_node); |
1419 | mutex_enter(tvp->v_interlock); |
1420 | VSTATE_ASSERT(VIMPL_TO_VNODE(new_node), VS_LOADING); |
1421 | |
1422 | /* Rekey old node and put it onto its new hashlist. */ |
1423 | old_node->vi_key = new_vcache_key; |
1424 | if (old_hash != new_hash) { |
1425 | SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask], |
1426 | old_node, vnode_impl, vi_hash); |
1427 | SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask], |
1428 | old_node, vi_hash); |
1429 | } |
1430 | VSTATE_CHANGE(vp, VS_BLOCKED, VS_ACTIVE); |
1431 | mutex_exit(vp->v_interlock); |
1432 | |
1433 | /* Remove new node used as placeholder. */ |
1434 | SLIST_REMOVE(&vcache.hashtab[new_hash & vcache.hashmask], |
1435 | new_node, vnode_impl, vi_hash); |
1436 | VSTATE_CHANGE(tvp, VS_LOADING, VS_RECLAIMED); |
1437 | mutex_exit(&vcache.lock); |
1438 | vrelel(tvp, 0); |
1439 | } |
1440 | |
1441 | /* |
1442 | * Disassociate the underlying file system from a vnode. |
1443 | * |
1444 | * Must be called with vnode locked and will return unlocked. |
1445 | * Must be called with the interlock held, and will return with it held. |
1446 | */ |
1447 | static void |
1448 | vcache_reclaim(vnode_t *vp) |
1449 | { |
1450 | lwp_t *l = curlwp; |
1451 | vnode_impl_t *node = VNODE_TO_VIMPL(vp); |
1452 | uint32_t hash; |
1453 | uint8_t temp_buf[64], *temp_key; |
1454 | size_t temp_key_len; |
1455 | bool recycle, active; |
1456 | int error; |
1457 | |
1458 | KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 || |
1459 | VOP_ISLOCKED(vp) == LK_EXCLUSIVE); |
1460 | KASSERT(mutex_owned(vp->v_interlock)); |
1461 | KASSERT(vp->v_usecount != 0); |
1462 | |
1463 | active = (vp->v_usecount > 1); |
1464 | temp_key_len = node->vi_key.vk_key_len; |
1465 | /* |
1466 | * Prevent the vnode from being recycled or brought into use |
1467 | * while we clean it out. |
1468 | */ |
1469 | VSTATE_CHANGE(vp, VS_ACTIVE, VS_RECLAIMING); |
1470 | if (vp->v_iflag & VI_EXECMAP) { |
1471 | atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); |
1472 | atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); |
1473 | } |
1474 | vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); |
1475 | mutex_exit(vp->v_interlock); |
1476 | |
1477 | /* Replace the vnode key with a temporary copy. */ |
1478 | if (node->vi_key.vk_key_len > sizeof(temp_buf)) { |
1479 | temp_key = kmem_alloc(temp_key_len, KM_SLEEP); |
1480 | } else { |
1481 | temp_key = temp_buf; |
1482 | } |
1483 | mutex_enter(&vcache.lock); |
1484 | memcpy(temp_key, node->vi_key.vk_key, temp_key_len); |
1485 | node->vi_key.vk_key = temp_key; |
1486 | mutex_exit(&vcache.lock); |
1487 | |
1488 | /* |
1489 | * Clean out any cached data associated with the vnode. |
1490 | * If purging an active vnode, it must be closed and |
1491 | * deactivated before being reclaimed. Note that the |
1492 | * VOP_INACTIVE will unlock the vnode. |
1493 | */ |
1494 | error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); |
1495 | if (error != 0) { |
1496 | if (wapbl_vphaswapbl(vp)) |
1497 | WAPBL_DISCARD(wapbl_vptomp(vp)); |
1498 | error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); |
1499 | } |
1500 | KASSERTMSG((error == 0), "vinvalbuf failed: %d" , error); |
1501 | KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); |
1502 | if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { |
1503 | spec_node_revoke(vp); |
1504 | } |
1505 | if (active) { |
1506 | VOP_INACTIVE(vp, &recycle); |
1507 | } else { |
1508 | /* |
1509 | * Any other processes trying to obtain this lock must first |
1510 | * wait for VS_RECLAIMED, then call the new lock operation. |
1511 | */ |
1512 | VOP_UNLOCK(vp); |
1513 | } |
1514 | |
1515 | /* Disassociate the underlying file system from the vnode. */ |
1516 | if (VOP_RECLAIM(vp)) { |
1517 | vnpanic(vp, "%s: cannot reclaim" , __func__); |
1518 | } |
1519 | |
1520 | KASSERT(vp->v_data == NULL); |
1521 | KASSERT(vp->v_uobj.uo_npages == 0); |
1522 | |
1523 | if (vp->v_type == VREG && vp->v_ractx != NULL) { |
1524 | uvm_ra_freectx(vp->v_ractx); |
1525 | vp->v_ractx = NULL; |
1526 | } |
1527 | |
1528 | /* Purge name cache. */ |
1529 | cache_purge(vp); |
1530 | |
1531 | /* Move to dead mount. */ |
1532 | vp->v_vflag &= ~VV_ROOT; |
1533 | atomic_inc_uint(&dead_rootmount->mnt_refcnt); |
1534 | vfs_insmntque(vp, dead_rootmount); |
1535 | |
1536 | /* Remove from vnode cache. */ |
1537 | hash = vcache_hash(&node->vi_key); |
1538 | mutex_enter(&vcache.lock); |
1539 | KASSERT(node == vcache_hash_lookup(&node->vi_key, hash)); |
1540 | SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], |
1541 | node, vnode_impl, vi_hash); |
1542 | mutex_exit(&vcache.lock); |
1543 | if (temp_key != temp_buf) |
1544 | kmem_free(temp_key, temp_key_len); |
1545 | |
1546 | /* Done with purge, notify sleepers of the grim news. */ |
1547 | mutex_enter(vp->v_interlock); |
1548 | vp->v_op = dead_vnodeop_p; |
1549 | vp->v_vflag |= VV_LOCKSWORK; |
1550 | VSTATE_CHANGE(vp, VS_RECLAIMING, VS_RECLAIMED); |
1551 | vp->v_tag = VT_NON; |
1552 | KNOTE(&vp->v_klist, NOTE_REVOKE); |
1553 | |
1554 | KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); |
1555 | } |
1556 | |
1557 | /* |
1558 | * Update outstanding I/O count and do wakeup if requested. |
1559 | */ |
1560 | void |
1561 | vwakeup(struct buf *bp) |
1562 | { |
1563 | vnode_t *vp; |
1564 | |
1565 | if ((vp = bp->b_vp) == NULL) |
1566 | return; |
1567 | |
1568 | KASSERT(bp->b_objlock == vp->v_interlock); |
1569 | KASSERT(mutex_owned(bp->b_objlock)); |
1570 | |
1571 | if (--vp->v_numoutput < 0) |
1572 | vnpanic(vp, "%s: neg numoutput, vp %p" , __func__, vp); |
1573 | if (vp->v_numoutput == 0) |
1574 | cv_broadcast(&vp->v_cv); |
1575 | } |
1576 | |
1577 | /* |
1578 | * Test a vnode for being or becoming dead. Returns one of: |
1579 | * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only. |
1580 | * ENOENT: vnode is dead. |
1581 | * 0: otherwise. |
1582 | * |
1583 | * Whenever this function returns a non-zero value all future |
1584 | * calls will also return a non-zero value. |
1585 | */ |
1586 | int |
1587 | vdead_check(struct vnode *vp, int flags) |
1588 | { |
1589 | |
1590 | KASSERT(mutex_owned(vp->v_interlock)); |
1591 | |
1592 | if (! ISSET(flags, VDEAD_NOWAIT)) |
1593 | VSTATE_WAIT_STABLE(vp); |
1594 | |
1595 | if (VSTATE_GET(vp) == VS_RECLAIMING) { |
1596 | KASSERT(ISSET(flags, VDEAD_NOWAIT)); |
1597 | return EBUSY; |
1598 | } else if (VSTATE_GET(vp) == VS_RECLAIMED) { |
1599 | return ENOENT; |
1600 | } |
1601 | |
1602 | return 0; |
1603 | } |
1604 | |
1605 | int |
1606 | vfs_drainvnodes(long target) |
1607 | { |
1608 | int error; |
1609 | |
1610 | mutex_enter(&vnode_free_list_lock); |
1611 | |
1612 | while (numvnodes > target) { |
1613 | error = cleanvnode(); |
1614 | if (error != 0) |
1615 | return error; |
1616 | mutex_enter(&vnode_free_list_lock); |
1617 | } |
1618 | |
1619 | mutex_exit(&vnode_free_list_lock); |
1620 | |
1621 | vcache_reinit(); |
1622 | |
1623 | return 0; |
1624 | } |
1625 | |
1626 | void |
1627 | vnpanic(vnode_t *vp, const char *fmt, ...) |
1628 | { |
1629 | va_list ap; |
1630 | |
1631 | #ifdef DIAGNOSTIC |
1632 | vprint(NULL, vp); |
1633 | #endif |
1634 | va_start(ap, fmt); |
1635 | vpanic(fmt, ap); |
1636 | va_end(ap); |
1637 | } |
1638 | |