1 | /* $NetBSD: vfs_init.c,v 1.48 2015/05/06 15:57:08 hannken Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 1998, 2000, 2008 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, |
9 | * NASA Ames Research Center. |
10 | * |
11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions |
13 | * are met: |
14 | * 1. Redistributions of source code must retain the above copyright |
15 | * notice, this list of conditions and the following disclaimer. |
16 | * 2. Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. |
19 | * |
20 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
22 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
23 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
24 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | * POSSIBILITY OF SUCH DAMAGE. |
31 | */ |
32 | |
33 | /* |
34 | * Copyright (c) 1989, 1993 |
35 | * The Regents of the University of California. All rights reserved. |
36 | * |
37 | * This code is derived from software contributed |
38 | * to Berkeley by John Heidemann of the UCLA Ficus project. |
39 | * |
40 | * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project |
41 | * |
42 | * Redistribution and use in source and binary forms, with or without |
43 | * modification, are permitted provided that the following conditions |
44 | * are met: |
45 | * 1. Redistributions of source code must retain the above copyright |
46 | * notice, this list of conditions and the following disclaimer. |
47 | * 2. Redistributions in binary form must reproduce the above copyright |
48 | * notice, this list of conditions and the following disclaimer in the |
49 | * documentation and/or other materials provided with the distribution. |
50 | * 3. Neither the name of the University nor the names of its contributors |
51 | * may be used to endorse or promote products derived from this software |
52 | * without specific prior written permission. |
53 | * |
54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
64 | * SUCH DAMAGE. |
65 | * |
66 | * @(#)vfs_init.c 8.5 (Berkeley) 5/11/95 |
67 | */ |
68 | |
69 | #include <sys/cdefs.h> |
70 | __KERNEL_RCSID(0, "$NetBSD: vfs_init.c,v 1.48 2015/05/06 15:57:08 hannken Exp $" ); |
71 | |
72 | #include <sys/param.h> |
73 | #include <sys/mount.h> |
74 | #include <sys/time.h> |
75 | #include <sys/vnode.h> |
76 | #include <sys/stat.h> |
77 | #include <sys/namei.h> |
78 | #include <sys/ucred.h> |
79 | #include <sys/buf.h> |
80 | #include <sys/errno.h> |
81 | #include <sys/kmem.h> |
82 | #include <sys/systm.h> |
83 | #include <sys/module.h> |
84 | #include <sys/dirhash.h> |
85 | #include <sys/sysctl.h> |
86 | #include <sys/kauth.h> |
87 | |
88 | /* |
89 | * Sigh, such primitive tools are these... |
90 | */ |
91 | #if 0 |
92 | #define DODEBUG(A) A |
93 | #else |
94 | #define DODEBUG(A) |
95 | #endif |
96 | |
97 | /* |
98 | * The global list of vnode operations. |
99 | */ |
100 | extern const struct vnodeop_desc * const vfs_op_descs[]; |
101 | |
102 | /* |
103 | * These vnodeopv_descs are listed here because they are not |
104 | * associated with any particular file system, and thus cannot |
105 | * be initialized by vfs_attach(). |
106 | */ |
107 | extern const struct vnodeopv_desc dead_vnodeop_opv_desc; |
108 | extern const struct vnodeopv_desc fifo_vnodeop_opv_desc; |
109 | extern const struct vnodeopv_desc spec_vnodeop_opv_desc; |
110 | |
111 | const struct vnodeopv_desc * const vfs_special_vnodeopv_descs[] = { |
112 | &dead_vnodeop_opv_desc, |
113 | &fifo_vnodeop_opv_desc, |
114 | &spec_vnodeop_opv_desc, |
115 | NULL, |
116 | }; |
117 | |
118 | struct vfs_list_head vfs_list = /* vfs list */ |
119 | LIST_HEAD_INITIALIZER(vfs_list); |
120 | |
121 | static kauth_listener_t mount_listener; |
122 | |
123 | /* |
124 | * This code doesn't work if the defn is **vnodop_defns with cc. |
125 | * The problem is because of the compiler sometimes putting in an |
126 | * extra level of indirection for arrays. It's an interesting |
127 | * "feature" of C. |
128 | */ |
129 | typedef int (*PFI)(void *); |
130 | |
131 | /* |
132 | * A miscellaneous routine. |
133 | * A generic "default" routine that just returns an error. |
134 | */ |
135 | /*ARGSUSED*/ |
136 | int |
137 | vn_default_error(void *v) |
138 | { |
139 | |
140 | return (EOPNOTSUPP); |
141 | } |
142 | |
143 | static struct sysctllog *vfs_sysctllog; |
144 | |
145 | /* |
146 | * Top level filesystem related information gathering. |
147 | */ |
148 | static void |
149 | sysctl_vfs_setup(void) |
150 | { |
151 | extern int vfs_magiclinks; |
152 | |
153 | sysctl_createv(&vfs_sysctllog, 0, NULL, NULL, |
154 | CTLFLAG_PERMANENT, |
155 | CTLTYPE_NODE, "generic" , |
156 | SYSCTL_DESCR("Non-specific vfs related information" ), |
157 | NULL, 0, NULL, 0, |
158 | CTL_VFS, VFS_GENERIC, CTL_EOL); |
159 | sysctl_createv(&vfs_sysctllog, 0, NULL, NULL, |
160 | CTLFLAG_PERMANENT, |
161 | CTLTYPE_STRING, "fstypes" , |
162 | SYSCTL_DESCR("List of file systems present" ), |
163 | sysctl_vfs_generic_fstypes, 0, NULL, 0, |
164 | CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL); |
165 | sysctl_createv(&vfs_sysctllog, 0, NULL, NULL, |
166 | CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
167 | CTLTYPE_INT, "magiclinks" , |
168 | SYSCTL_DESCR("Whether \"magic\" symlinks are expanded" ), |
169 | NULL, 0, &vfs_magiclinks, 0, |
170 | CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL); |
171 | } |
172 | |
173 | |
174 | /* |
175 | * vfs_init.c |
176 | * |
177 | * Allocate and fill in operations vectors. |
178 | * |
179 | * An undocumented feature of this approach to defining operations is that |
180 | * there can be multiple entries in vfs_opv_descs for the same operations |
181 | * vector. This allows third parties to extend the set of operations |
182 | * supported by another layer in a binary compatibile way. For example, |
183 | * assume that NFS needed to be modified to support Ficus. NFS has an entry |
184 | * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by |
185 | * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions) |
186 | * listing those new operations Ficus adds to NFS, all without modifying the |
187 | * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but |
188 | * that is a(whole)nother story.) This is a feature. |
189 | */ |
190 | |
191 | /* |
192 | * Init the vector, if it needs it. |
193 | * Also handle backwards compatibility. |
194 | */ |
195 | static void |
196 | vfs_opv_init_explicit(const struct vnodeopv_desc *vfs_opv_desc) |
197 | { |
198 | int (**opv_desc_vector)(void *); |
199 | const struct vnodeopv_entry_desc *opve_descp; |
200 | |
201 | opv_desc_vector = *(vfs_opv_desc->opv_desc_vector_p); |
202 | |
203 | for (opve_descp = vfs_opv_desc->opv_desc_ops; |
204 | opve_descp->opve_op; |
205 | opve_descp++) { |
206 | /* |
207 | * Sanity check: is this operation listed |
208 | * in the list of operations? We check this |
209 | * by seeing if its offset is zero. Since |
210 | * the default routine should always be listed |
211 | * first, it should be the only one with a zero |
212 | * offset. Any other operation with a zero |
213 | * offset is probably not listed in |
214 | * vfs_op_descs, and so is probably an error. |
215 | * |
216 | * A panic here means the layer programmer |
217 | * has committed the all-too common bug |
218 | * of adding a new operation to the layer's |
219 | * list of vnode operations but |
220 | * not adding the operation to the system-wide |
221 | * list of supported operations. |
222 | */ |
223 | if (opve_descp->opve_op->vdesc_offset == 0 && |
224 | opve_descp->opve_op->vdesc_offset != VOFFSET(vop_default)) { |
225 | printf("operation %s not listed in %s.\n" , |
226 | opve_descp->opve_op->vdesc_name, "vfs_op_descs" ); |
227 | panic ("vfs_opv_init: bad operation" ); |
228 | } |
229 | |
230 | /* |
231 | * Fill in this entry. |
232 | */ |
233 | opv_desc_vector[opve_descp->opve_op->vdesc_offset] = |
234 | opve_descp->opve_impl; |
235 | } |
236 | } |
237 | |
238 | static void |
239 | vfs_opv_init_default(const struct vnodeopv_desc *vfs_opv_desc) |
240 | { |
241 | int j; |
242 | int (**opv_desc_vector)(void *); |
243 | |
244 | opv_desc_vector = *(vfs_opv_desc->opv_desc_vector_p); |
245 | |
246 | /* |
247 | * Force every operations vector to have a default routine. |
248 | */ |
249 | if (opv_desc_vector[VOFFSET(vop_default)] == NULL) |
250 | panic("vfs_opv_init: operation vector without default routine." ); |
251 | |
252 | for (j = 0; j < VNODE_OPS_COUNT; j++) |
253 | if (opv_desc_vector[j] == NULL) |
254 | opv_desc_vector[j] = |
255 | opv_desc_vector[VOFFSET(vop_default)]; |
256 | } |
257 | |
258 | void |
259 | vfs_opv_init(const struct vnodeopv_desc * const *vopvdpp) |
260 | { |
261 | int (**opv_desc_vector)(void *); |
262 | int i; |
263 | |
264 | /* |
265 | * Allocate the vectors. |
266 | */ |
267 | for (i = 0; vopvdpp[i] != NULL; i++) { |
268 | opv_desc_vector = |
269 | kmem_alloc(VNODE_OPS_COUNT * sizeof(PFI), KM_SLEEP); |
270 | memset(opv_desc_vector, 0, VNODE_OPS_COUNT * sizeof(PFI)); |
271 | *(vopvdpp[i]->opv_desc_vector_p) = opv_desc_vector; |
272 | DODEBUG(printf("vector at %p allocated\n" , |
273 | opv_desc_vector_p)); |
274 | } |
275 | |
276 | /* |
277 | * ...and fill them in. |
278 | */ |
279 | for (i = 0; vopvdpp[i] != NULL; i++) |
280 | vfs_opv_init_explicit(vopvdpp[i]); |
281 | |
282 | /* |
283 | * Finally, go back and replace unfilled routines |
284 | * with their default. |
285 | */ |
286 | for (i = 0; vopvdpp[i] != NULL; i++) |
287 | vfs_opv_init_default(vopvdpp[i]); |
288 | } |
289 | |
290 | void |
291 | vfs_opv_free(const struct vnodeopv_desc * const *vopvdpp) |
292 | { |
293 | int i; |
294 | |
295 | /* |
296 | * Free the vectors allocated in vfs_opv_init(). |
297 | */ |
298 | for (i = 0; vopvdpp[i] != NULL; i++) { |
299 | kmem_free(*(vopvdpp[i]->opv_desc_vector_p), |
300 | VNODE_OPS_COUNT * sizeof(PFI)); |
301 | *(vopvdpp[i]->opv_desc_vector_p) = NULL; |
302 | } |
303 | } |
304 | |
305 | #ifdef DEBUG |
306 | static void |
307 | vfs_op_check(void) |
308 | { |
309 | int i; |
310 | |
311 | DODEBUG(printf("Vnode_interface_init.\n" )); |
312 | |
313 | /* |
314 | * Check offset of each op. |
315 | */ |
316 | for (i = 0; vfs_op_descs[i]; i++) { |
317 | if (vfs_op_descs[i]->vdesc_offset != i) |
318 | panic("vfs_op_check: vfs_op_desc[] offset mismatch" ); |
319 | } |
320 | |
321 | if (i != VNODE_OPS_COUNT) { |
322 | panic("vfs_op_check: vnode ops count mismatch (%d != %d)" , |
323 | i, VNODE_OPS_COUNT); |
324 | } |
325 | |
326 | DODEBUG(printf ("vfs_opv_numops=%d\n" , VNODE_OPS_COUNT)); |
327 | } |
328 | #endif /* DEBUG */ |
329 | |
330 | /* |
331 | * Common routine to check if an unprivileged mount is allowed. |
332 | * |
333 | * We export just this part (i.e., without the access control) so that if a |
334 | * secmodel wants to implement finer grained user mounts it can do so without |
335 | * copying too much code. More elaborate policies (i.e., specific users allowed |
336 | * to also create devices and/or introduce set-id binaries, or export |
337 | * file-systems) will require a different implementation. |
338 | * |
339 | * This routine is intended to be called from listener context, and as such |
340 | * does not take credentials as an argument. |
341 | */ |
342 | int |
343 | usermount_common_policy(struct mount *mp, u_long flags) |
344 | { |
345 | |
346 | /* No exporting if unprivileged. */ |
347 | if (flags & MNT_EXPORTED) |
348 | return EPERM; |
349 | |
350 | /* Must have 'nosuid' and 'nodev'. */ |
351 | if ((flags & MNT_NODEV) == 0 || (flags & MNT_NOSUID) == 0) |
352 | return EPERM; |
353 | |
354 | /* Retain 'noexec'. */ |
355 | if ((mp->mnt_flag & MNT_NOEXEC) && (flags & MNT_NOEXEC) == 0) |
356 | return EPERM; |
357 | |
358 | return 0; |
359 | } |
360 | |
361 | static int |
362 | mount_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, |
363 | void *arg0, void *arg1, void *arg2, void *arg3) |
364 | { |
365 | int result; |
366 | enum kauth_system_req req; |
367 | |
368 | result = KAUTH_RESULT_DEFER; |
369 | req = (enum kauth_system_req)arg0; |
370 | |
371 | if (action != KAUTH_SYSTEM_MOUNT) |
372 | return result; |
373 | |
374 | if (req == KAUTH_REQ_SYSTEM_MOUNT_GET) |
375 | result = KAUTH_RESULT_ALLOW; |
376 | else if (req == KAUTH_REQ_SYSTEM_MOUNT_DEVICE) { |
377 | vnode_t *devvp = arg2; |
378 | mode_t access_mode = (mode_t)(unsigned long)arg3; |
379 | int error; |
380 | |
381 | error = VOP_ACCESS(devvp, access_mode, cred); |
382 | if (!error) |
383 | result = KAUTH_RESULT_ALLOW; |
384 | } |
385 | |
386 | return result; |
387 | } |
388 | |
389 | /* |
390 | * Initialize the vnode structures and initialize each file system type. |
391 | */ |
392 | void |
393 | vfsinit(void) |
394 | { |
395 | |
396 | /* |
397 | * Attach sysctl nodes |
398 | */ |
399 | sysctl_vfs_setup(); |
400 | |
401 | /* |
402 | * Initialize the namei pathname buffer pool and cache. |
403 | */ |
404 | pnbuf_cache = pool_cache_init(MAXPATHLEN, 0, 0, 0, "pnbufpl" , |
405 | NULL, IPL_NONE, NULL, NULL, NULL); |
406 | KASSERT(pnbuf_cache != NULL); |
407 | |
408 | /* |
409 | * Initialize the vnode table |
410 | */ |
411 | vntblinit(); |
412 | |
413 | /* |
414 | * Initialize the vnode name cache |
415 | */ |
416 | nchinit(); |
417 | |
418 | #ifdef DEBUG |
419 | /* |
420 | * Check the list of vnode operations. |
421 | */ |
422 | vfs_op_check(); |
423 | #endif |
424 | |
425 | /* |
426 | * Initialize the special vnode operations. |
427 | */ |
428 | vfs_opv_init(vfs_special_vnodeopv_descs); |
429 | |
430 | /* |
431 | * Initialise generic dirhash. |
432 | */ |
433 | dirhash_init(); |
434 | |
435 | /* |
436 | * Initialise VFS hooks. |
437 | */ |
438 | vfs_hooks_init(); |
439 | |
440 | mount_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, |
441 | mount_listener_cb, NULL); |
442 | |
443 | /* |
444 | * Establish each file system which was statically |
445 | * included in the kernel. |
446 | */ |
447 | module_init_class(MODULE_CLASS_VFS); |
448 | } |
449 | |
450 | /* |
451 | * Drop a reference to a file system type. |
452 | */ |
453 | void |
454 | vfs_delref(struct vfsops *vfs) |
455 | { |
456 | |
457 | mutex_enter(&vfs_list_lock); |
458 | vfs->vfs_refcount--; |
459 | mutex_exit(&vfs_list_lock); |
460 | } |
461 | |
462 | /* |
463 | * Establish a file system and initialize it. |
464 | */ |
465 | int |
466 | vfs_attach(struct vfsops *vfs) |
467 | { |
468 | struct vfsops *v; |
469 | int error = 0; |
470 | |
471 | mutex_enter(&vfs_list_lock); |
472 | |
473 | /* |
474 | * Make sure this file system doesn't already exist. |
475 | */ |
476 | LIST_FOREACH(v, &vfs_list, vfs_list) { |
477 | if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { |
478 | error = EEXIST; |
479 | goto out; |
480 | } |
481 | } |
482 | |
483 | /* |
484 | * Initialize the vnode operations for this file system. |
485 | */ |
486 | vfs_opv_init(vfs->vfs_opv_descs); |
487 | |
488 | /* |
489 | * Now initialize the file system itself. |
490 | */ |
491 | (*vfs->vfs_init)(); |
492 | |
493 | /* |
494 | * ...and link it into the kernel's list. |
495 | */ |
496 | LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); |
497 | |
498 | /* |
499 | * Sanity: make sure the reference count is 0. |
500 | */ |
501 | vfs->vfs_refcount = 0; |
502 | out: |
503 | mutex_exit(&vfs_list_lock); |
504 | return (error); |
505 | } |
506 | |
507 | /* |
508 | * Remove a file system from the kernel. |
509 | */ |
510 | int |
511 | vfs_detach(struct vfsops *vfs) |
512 | { |
513 | struct vfsops *v; |
514 | int error = 0; |
515 | |
516 | mutex_enter(&vfs_list_lock); |
517 | |
518 | /* |
519 | * Make sure no one is using the filesystem. |
520 | */ |
521 | if (vfs->vfs_refcount != 0) { |
522 | error = EBUSY; |
523 | goto out; |
524 | } |
525 | |
526 | /* |
527 | * ...and remove it from the kernel's list. |
528 | */ |
529 | LIST_FOREACH(v, &vfs_list, vfs_list) { |
530 | if (v == vfs) { |
531 | LIST_REMOVE(v, vfs_list); |
532 | break; |
533 | } |
534 | } |
535 | |
536 | if (v == NULL) { |
537 | error = ESRCH; |
538 | goto out; |
539 | } |
540 | |
541 | /* |
542 | * Now run the file system-specific cleanups. |
543 | */ |
544 | (*vfs->vfs_done)(); |
545 | |
546 | /* |
547 | * Free the vnode operations vector. |
548 | */ |
549 | vfs_opv_free(vfs->vfs_opv_descs); |
550 | out: |
551 | mutex_exit(&vfs_list_lock); |
552 | return (error); |
553 | } |
554 | |
555 | void |
556 | vfs_reinit(void) |
557 | { |
558 | struct vfsops *vfs; |
559 | |
560 | mutex_enter(&vfs_list_lock); |
561 | LIST_FOREACH(vfs, &vfs_list, vfs_list) { |
562 | if (vfs->vfs_reinit) { |
563 | vfs->vfs_refcount++; |
564 | mutex_exit(&vfs_list_lock); |
565 | (*vfs->vfs_reinit)(); |
566 | mutex_enter(&vfs_list_lock); |
567 | vfs->vfs_refcount--; |
568 | } |
569 | } |
570 | mutex_exit(&vfs_list_lock); |
571 | } |
572 | |