1/* $NetBSD: lfs_vnops.c,v 1.304 2016/07/13 16:26:26 maya Exp $ */
2
3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31/*
32 * Copyright (c) 1986, 1989, 1991, 1993, 1995
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95
60 */
61
62/* from NetBSD: ufs_vnops.c,v 1.232 2016/05/19 18:32:03 riastradh Exp */
63/*-
64 * Copyright (c) 2008 The NetBSD Foundation, Inc.
65 * All rights reserved.
66 *
67 * This code is derived from software contributed to The NetBSD Foundation
68 * by Wasabi Systems, Inc.
69 *
70 * Redistribution and use in source and binary forms, with or without
71 * modification, are permitted provided that the following conditions
72 * are met:
73 * 1. Redistributions of source code must retain the above copyright
74 * notice, this list of conditions and the following disclaimer.
75 * 2. Redistributions in binary form must reproduce the above copyright
76 * notice, this list of conditions and the following disclaimer in the
77 * documentation and/or other materials provided with the distribution.
78 *
79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
89 * POSSIBILITY OF SUCH DAMAGE.
90 */
91/*
92 * Copyright (c) 1982, 1986, 1989, 1993, 1995
93 * The Regents of the University of California. All rights reserved.
94 * (c) UNIX System Laboratories, Inc.
95 * All or some portions of this file are derived from material licensed
96 * to the University of California by American Telephone and Telegraph
97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
98 * the permission of UNIX System Laboratories, Inc.
99 *
100 * Redistribution and use in source and binary forms, with or without
101 * modification, are permitted provided that the following conditions
102 * are met:
103 * 1. Redistributions of source code must retain the above copyright
104 * notice, this list of conditions and the following disclaimer.
105 * 2. Redistributions in binary form must reproduce the above copyright
106 * notice, this list of conditions and the following disclaimer in the
107 * documentation and/or other materials provided with the distribution.
108 * 3. Neither the name of the University nor the names of its contributors
109 * may be used to endorse or promote products derived from this software
110 * without specific prior written permission.
111 *
112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
122 * SUCH DAMAGE.
123 *
124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95
125 */
126
127#include <sys/cdefs.h>
128__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.304 2016/07/13 16:26:26 maya Exp $");
129
130#ifdef _KERNEL_OPT
131#include "opt_compat_netbsd.h"
132#include "opt_uvm_page_trkown.h"
133#endif
134
135#include <sys/param.h>
136#include <sys/systm.h>
137#include <sys/namei.h>
138#include <sys/resourcevar.h>
139#include <sys/kernel.h>
140#include <sys/file.h>
141#include <sys/stat.h>
142#include <sys/buf.h>
143#include <sys/proc.h>
144#include <sys/mount.h>
145#include <sys/vnode.h>
146#include <sys/pool.h>
147#include <sys/signalvar.h>
148#include <sys/kauth.h>
149#include <sys/syslog.h>
150#include <sys/fstrans.h>
151
152#include <miscfs/fifofs/fifo.h>
153#include <miscfs/genfs/genfs.h>
154#include <miscfs/specfs/specdev.h>
155
156#include <ufs/lfs/ulfs_inode.h>
157#include <ufs/lfs/ulfsmount.h>
158#include <ufs/lfs/ulfs_bswap.h>
159#include <ufs/lfs/ulfs_extern.h>
160
161#include <uvm/uvm.h>
162#include <uvm/uvm_pmap.h>
163#include <uvm/uvm_stat.h>
164#include <uvm/uvm_pager.h>
165
166#include <ufs/lfs/lfs.h>
167#include <ufs/lfs/lfs_accessors.h>
168#include <ufs/lfs/lfs_kernel.h>
169#include <ufs/lfs/lfs_extern.h>
170
171extern pid_t lfs_writer_daemon;
172int lfs_ignore_lazy_sync = 1;
173
174static int lfs_openextattr(void *v);
175static int lfs_closeextattr(void *v);
176static int lfs_getextattr(void *v);
177static int lfs_setextattr(void *v);
178static int lfs_listextattr(void *v);
179static int lfs_deleteextattr(void *v);
180
181static int lfs_makeinode(struct vattr *vap, struct vnode *,
182 const struct ulfs_lookup_results *,
183 struct vnode **, struct componentname *);
184
185/* Global vfs data structures for lfs. */
186int (**lfs_vnodeop_p)(void *);
187const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
188 { &vop_default_desc, vn_default_error },
189 { &vop_lookup_desc, ulfs_lookup }, /* lookup */
190 { &vop_create_desc, lfs_create }, /* create */
191 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */
192 { &vop_mknod_desc, lfs_mknod }, /* mknod */
193 { &vop_open_desc, ulfs_open }, /* open */
194 { &vop_close_desc, lfs_close }, /* close */
195 { &vop_access_desc, ulfs_access }, /* access */
196 { &vop_getattr_desc, lfs_getattr }, /* getattr */
197 { &vop_setattr_desc, lfs_setattr }, /* setattr */
198 { &vop_read_desc, lfs_read }, /* read */
199 { &vop_write_desc, lfs_write }, /* write */
200 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */
201 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */
202 { &vop_ioctl_desc, ulfs_ioctl }, /* ioctl */
203 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */
204 { &vop_poll_desc, ulfs_poll }, /* poll */
205 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */
206 { &vop_revoke_desc, ulfs_revoke }, /* revoke */
207 { &vop_mmap_desc, lfs_mmap }, /* mmap */
208 { &vop_fsync_desc, lfs_fsync }, /* fsync */
209 { &vop_seek_desc, ulfs_seek }, /* seek */
210 { &vop_remove_desc, lfs_remove }, /* remove */
211 { &vop_link_desc, lfs_link }, /* link */
212 { &vop_rename_desc, lfs_rename }, /* rename */
213 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */
214 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */
215 { &vop_symlink_desc, lfs_symlink }, /* symlink */
216 { &vop_readdir_desc, ulfs_readdir }, /* readdir */
217 { &vop_readlink_desc, ulfs_readlink }, /* readlink */
218 { &vop_abortop_desc, ulfs_abortop }, /* abortop */
219 { &vop_inactive_desc, lfs_inactive }, /* inactive */
220 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
221 { &vop_lock_desc, ulfs_lock }, /* lock */
222 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
223 { &vop_bmap_desc, ulfs_bmap }, /* bmap */
224 { &vop_strategy_desc, lfs_strategy }, /* strategy */
225 { &vop_print_desc, ulfs_print }, /* print */
226 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
227 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */
228 { &vop_advlock_desc, ulfs_advlock }, /* advlock */
229 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
230 { &vop_getpages_desc, lfs_getpages }, /* getpages */
231 { &vop_putpages_desc, lfs_putpages }, /* putpages */
232 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
233 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
234 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
235 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
236 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
237 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
238 { NULL, NULL }
239};
240const struct vnodeopv_desc lfs_vnodeop_opv_desc =
241 { &lfs_vnodeop_p, lfs_vnodeop_entries };
242
243int (**lfs_specop_p)(void *);
244const struct vnodeopv_entry_desc lfs_specop_entries[] = {
245 { &vop_default_desc, vn_default_error },
246 { &vop_lookup_desc, spec_lookup }, /* lookup */
247 { &vop_create_desc, spec_create }, /* create */
248 { &vop_mknod_desc, spec_mknod }, /* mknod */
249 { &vop_open_desc, spec_open }, /* open */
250 { &vop_close_desc, lfsspec_close }, /* close */
251 { &vop_access_desc, ulfs_access }, /* access */
252 { &vop_getattr_desc, lfs_getattr }, /* getattr */
253 { &vop_setattr_desc, lfs_setattr }, /* setattr */
254 { &vop_read_desc, ulfsspec_read }, /* read */
255 { &vop_write_desc, ulfsspec_write }, /* write */
256 { &vop_fallocate_desc, spec_fallocate }, /* fallocate */
257 { &vop_fdiscard_desc, spec_fdiscard }, /* fdiscard */
258 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
259 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
260 { &vop_poll_desc, spec_poll }, /* poll */
261 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
262 { &vop_revoke_desc, spec_revoke }, /* revoke */
263 { &vop_mmap_desc, spec_mmap }, /* mmap */
264 { &vop_fsync_desc, spec_fsync }, /* fsync */
265 { &vop_seek_desc, spec_seek }, /* seek */
266 { &vop_remove_desc, spec_remove }, /* remove */
267 { &vop_link_desc, spec_link }, /* link */
268 { &vop_rename_desc, spec_rename }, /* rename */
269 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
270 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
271 { &vop_symlink_desc, spec_symlink }, /* symlink */
272 { &vop_readdir_desc, spec_readdir }, /* readdir */
273 { &vop_readlink_desc, spec_readlink }, /* readlink */
274 { &vop_abortop_desc, spec_abortop }, /* abortop */
275 { &vop_inactive_desc, lfs_inactive }, /* inactive */
276 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
277 { &vop_lock_desc, ulfs_lock }, /* lock */
278 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
279 { &vop_bmap_desc, spec_bmap }, /* bmap */
280 { &vop_strategy_desc, spec_strategy }, /* strategy */
281 { &vop_print_desc, ulfs_print }, /* print */
282 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
283 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
284 { &vop_advlock_desc, spec_advlock }, /* advlock */
285 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */
286 { &vop_getpages_desc, spec_getpages }, /* getpages */
287 { &vop_putpages_desc, spec_putpages }, /* putpages */
288 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
289 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
290 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
291 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
292 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
293 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
294 { NULL, NULL }
295};
296const struct vnodeopv_desc lfs_specop_opv_desc =
297 { &lfs_specop_p, lfs_specop_entries };
298
299int (**lfs_fifoop_p)(void *);
300const struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
301 { &vop_default_desc, vn_default_error },
302 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */
303 { &vop_create_desc, vn_fifo_bypass }, /* create */
304 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */
305 { &vop_open_desc, vn_fifo_bypass }, /* open */
306 { &vop_close_desc, lfsfifo_close }, /* close */
307 { &vop_access_desc, ulfs_access }, /* access */
308 { &vop_getattr_desc, lfs_getattr }, /* getattr */
309 { &vop_setattr_desc, lfs_setattr }, /* setattr */
310 { &vop_read_desc, ulfsfifo_read }, /* read */
311 { &vop_write_desc, ulfsfifo_write }, /* write */
312 { &vop_fallocate_desc, vn_fifo_bypass }, /* fallocate */
313 { &vop_fdiscard_desc, vn_fifo_bypass }, /* fdiscard */
314 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */
315 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
316 { &vop_poll_desc, vn_fifo_bypass }, /* poll */
317 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */
318 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */
319 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */
320 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */
321 { &vop_seek_desc, vn_fifo_bypass }, /* seek */
322 { &vop_remove_desc, vn_fifo_bypass }, /* remove */
323 { &vop_link_desc, vn_fifo_bypass }, /* link */
324 { &vop_rename_desc, vn_fifo_bypass }, /* rename */
325 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */
326 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */
327 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */
328 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */
329 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */
330 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */
331 { &vop_inactive_desc, lfs_inactive }, /* inactive */
332 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
333 { &vop_lock_desc, ulfs_lock }, /* lock */
334 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
335 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */
336 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */
337 { &vop_print_desc, ulfs_print }, /* print */
338 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
339 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */
340 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */
341 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
342 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */
343 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
344 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
345 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
346 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
347 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
348 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
349 { NULL, NULL }
350};
351const struct vnodeopv_desc lfs_fifoop_opv_desc =
352 { &lfs_fifoop_p, lfs_fifoop_entries };
353
354#define LFS_READWRITE
355#include <ufs/lfs/ulfs_readwrite.c>
356#undef LFS_READWRITE
357
358/*
359 * Allocate a new inode.
360 */
361static int
362lfs_makeinode(struct vattr *vap, struct vnode *dvp,
363 const struct ulfs_lookup_results *ulr,
364 struct vnode **vpp, struct componentname *cnp)
365{
366 struct inode *ip;
367 struct vnode *tvp;
368 int error;
369
370 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, &tvp);
371 if (error)
372 return error;
373 error = vn_lock(tvp, LK_EXCLUSIVE);
374 if (error) {
375 vrele(tvp);
376 return error;
377 }
378 lfs_mark_vnode(tvp);
379 *vpp = tvp;
380 ip = VTOI(tvp);
381 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
382 ip->i_nlink = 1;
383 DIP_ASSIGN(ip, nlink, 1);
384
385 /* Authorize setting SGID if needed. */
386 if (ip->i_mode & ISGID) {
387 error = kauth_authorize_vnode(cnp->cn_cred, KAUTH_VNODE_WRITE_SECURITY,
388 tvp, NULL, genfs_can_chmod(tvp->v_type, cnp->cn_cred, ip->i_uid,
389 ip->i_gid, MAKEIMODE(vap->va_type, vap->va_mode)));
390 if (error) {
391 ip->i_mode &= ~ISGID;
392 DIP_ASSIGN(ip, mode, ip->i_mode);
393 }
394 }
395
396 if (cnp->cn_flags & ISWHITEOUT) {
397 ip->i_flags |= UF_OPAQUE;
398 DIP_ASSIGN(ip, flags, ip->i_flags);
399 }
400
401 /*
402 * Make sure inode goes to disk before directory entry.
403 */
404 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0)
405 goto bad;
406 error = ulfs_direnter(dvp, ulr, tvp,
407 cnp, ip->i_number, LFS_IFTODT(ip->i_mode), NULL);
408 if (error)
409 goto bad;
410 *vpp = tvp;
411 return (0);
412
413 bad:
414 /*
415 * Write error occurred trying to update the inode
416 * or the directory so must deallocate the inode.
417 */
418 ip->i_nlink = 0;
419 DIP_ASSIGN(ip, nlink, 0);
420 ip->i_flag |= IN_CHANGE;
421 /* If IN_ADIROP, account for it */
422 lfs_unmark_vnode(tvp);
423 vput(tvp);
424 return (error);
425}
426
427/*
428 * Synch an open file.
429 */
430/* ARGSUSED */
431int
432lfs_fsync(void *v)
433{
434 struct vop_fsync_args /* {
435 struct vnode *a_vp;
436 kauth_cred_t a_cred;
437 int a_flags;
438 off_t offlo;
439 off_t offhi;
440 } */ *ap = v;
441 struct vnode *vp = ap->a_vp;
442 int error, wait;
443 struct inode *ip = VTOI(vp);
444 struct lfs *fs = ip->i_lfs;
445
446 /* If we're mounted read-only, don't try to sync. */
447 if (fs->lfs_ronly)
448 return 0;
449
450 /* If a removed vnode is being cleaned, no need to sync here. */
451 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0)
452 return 0;
453
454 /*
455 * Trickle sync simply adds this vnode to the pager list, as if
456 * the pagedaemon had requested a pageout.
457 */
458 if (ap->a_flags & FSYNC_LAZY) {
459 if (lfs_ignore_lazy_sync == 0) {
460 mutex_enter(&lfs_lock);
461 if (!(ip->i_flags & IN_PAGING)) {
462 ip->i_flags |= IN_PAGING;
463 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip,
464 i_lfs_pchain);
465 }
466 wakeup(&lfs_writer_daemon);
467 mutex_exit(&lfs_lock);
468 }
469 return 0;
470 }
471
472 /*
473 * If a vnode is bring cleaned, flush it out before we try to
474 * reuse it. This prevents the cleaner from writing files twice
475 * in the same partial segment, causing an accounting underflow.
476 */
477 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) {
478 lfs_vflush(vp);
479 }
480
481 wait = (ap->a_flags & FSYNC_WAIT);
482 do {
483 mutex_enter(vp->v_interlock);
484 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
485 round_page(ap->a_offhi),
486 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0));
487 if (error == EAGAIN) {
488 mutex_enter(&lfs_lock);
489 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER,
490 "lfs_fsync", hz / 100 + 1, &lfs_lock);
491 mutex_exit(&lfs_lock);
492 }
493 } while (error == EAGAIN);
494 if (error)
495 return error;
496
497 if ((ap->a_flags & FSYNC_DATAONLY) == 0)
498 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0);
499
500 if (error == 0 && ap->a_flags & FSYNC_CACHE) {
501 int l = 0;
502 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE,
503 curlwp->l_cred);
504 }
505 if (wait && !VPISEMPTY(vp))
506 LFS_SET_UINO(ip, IN_MODIFIED);
507
508 return error;
509}
510
511/*
512 * Take IN_ADIROP off, then call ulfs_inactive.
513 */
514int
515lfs_inactive(void *v)
516{
517 struct vop_inactive_args /* {
518 struct vnode *a_vp;
519 } */ *ap = v;
520
521 lfs_unmark_vnode(ap->a_vp);
522
523 /*
524 * The Ifile is only ever inactivated on unmount.
525 * Streamline this process by not giving it more dirty blocks.
526 */
527 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) {
528 mutex_enter(&lfs_lock);
529 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD);
530 mutex_exit(&lfs_lock);
531 VOP_UNLOCK(ap->a_vp);
532 return 0;
533 }
534
535#ifdef DEBUG
536 /*
537 * This might happen on unmount.
538 * XXX If it happens at any other time, it should be a panic.
539 */
540 if (ap->a_vp->v_uflag & VU_DIROP) {
541 struct inode *ip = VTOI(ap->a_vp);
542 printf("lfs_inactive: inactivating VU_DIROP? ino = %d\n", (int)ip->i_number);
543 }
544#endif /* DIAGNOSTIC */
545
546 return ulfs_inactive(v);
547}
548
549int
550lfs_set_dirop(struct vnode *dvp, struct vnode *vp)
551{
552 struct lfs *fs;
553 int error;
554
555 KASSERT(VOP_ISLOCKED(dvp));
556 KASSERT(vp == NULL || VOP_ISLOCKED(vp));
557
558 fs = VTOI(dvp)->i_lfs;
559
560 ASSERT_NO_SEGLOCK(fs);
561 /*
562 * LFS_NRESERVE calculates direct and indirect blocks as well
563 * as an inode block; an overestimate in most cases.
564 */
565 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0)
566 return (error);
567
568 restart:
569 mutex_enter(&lfs_lock);
570 if (fs->lfs_dirops == 0) {
571 mutex_exit(&lfs_lock);
572 lfs_check(dvp, LFS_UNUSED_LBN, 0);
573 mutex_enter(&lfs_lock);
574 }
575 while (fs->lfs_writer) {
576 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH,
577 "lfs_sdirop", 0, &lfs_lock);
578 if (error == EINTR) {
579 mutex_exit(&lfs_lock);
580 goto unreserve;
581 }
582 }
583 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) {
584 wakeup(&lfs_writer_daemon);
585 mutex_exit(&lfs_lock);
586 preempt();
587 goto restart;
588 }
589
590 if (lfs_dirvcount > LFS_MAX_DIROP) {
591 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, "
592 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount));
593 if ((error = mtsleep(&lfs_dirvcount,
594 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0,
595 &lfs_lock)) != 0) {
596 mutex_exit(&lfs_lock);
597 goto unreserve;
598 }
599 mutex_exit(&lfs_lock);
600 goto restart;
601 }
602
603 ++fs->lfs_dirops;
604 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */
605 mutex_exit(&lfs_lock);
606
607 /* Hold a reference so SET_ENDOP will be happy */
608 vref(dvp);
609 if (vp) {
610 vref(vp);
611 MARK_VNODE(vp);
612 }
613
614 MARK_VNODE(dvp);
615 return 0;
616
617 unreserve:
618 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs));
619 return error;
620}
621
622/*
623 * Opposite of lfs_set_dirop... mostly. For now at least must call
624 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up)
625 */
626void
627lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str)
628{
629 mutex_enter(&lfs_lock);
630 --fs->lfs_dirops;
631 if (!fs->lfs_dirops) {
632 if (fs->lfs_nadirop) {
633 panic("lfs_unset_dirop: %s: no dirops but "
634 " nadirop=%d", str,
635 fs->lfs_nadirop);
636 }
637 wakeup(&fs->lfs_writer);
638 mutex_exit(&lfs_lock);
639 lfs_check(dvp, LFS_UNUSED_LBN, 0);
640 } else {
641 mutex_exit(&lfs_lock);
642 }
643 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs));
644}
645
646void
647lfs_mark_vnode(struct vnode *vp)
648{
649 struct inode *ip = VTOI(vp);
650 struct lfs *fs = ip->i_lfs;
651
652 mutex_enter(&lfs_lock);
653 if (!(ip->i_flag & IN_ADIROP)) {
654 if (!(vp->v_uflag & VU_DIROP)) {
655 mutex_exit(&lfs_lock);
656 vref(vp);
657 mutex_enter(&lfs_lock);
658 ++lfs_dirvcount;
659 ++fs->lfs_dirvcount;
660 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
661 vp->v_uflag |= VU_DIROP;
662 }
663 ++fs->lfs_nadirop;
664 ip->i_flag &= ~IN_CDIROP;
665 ip->i_flag |= IN_ADIROP;
666 } else
667 KASSERT(vp->v_uflag & VU_DIROP);
668 mutex_exit(&lfs_lock);
669}
670
671void
672lfs_unmark_vnode(struct vnode *vp)
673{
674 struct inode *ip = VTOI(vp);
675
676 mutex_enter(&lfs_lock);
677 if (ip && (ip->i_flag & IN_ADIROP)) {
678 KASSERT(vp->v_uflag & VU_DIROP);
679 --ip->i_lfs->lfs_nadirop;
680 ip->i_flag &= ~IN_ADIROP;
681 }
682 mutex_exit(&lfs_lock);
683}
684
685int
686lfs_symlink(void *v)
687{
688 struct vop_symlink_v3_args /* {
689 struct vnode *a_dvp;
690 struct vnode **a_vpp;
691 struct componentname *a_cnp;
692 struct vattr *a_vap;
693 char *a_target;
694 } */ *ap = v;
695 struct lfs *fs;
696 struct vnode *dvp, **vpp;
697 struct inode *ip;
698 struct ulfs_lookup_results *ulr;
699 ssize_t len; /* XXX should be size_t */
700 int error;
701
702 dvp = ap->a_dvp;
703 vpp = ap->a_vpp;
704
705 KASSERT(vpp != NULL);
706 KASSERT(*vpp == NULL);
707 KASSERT(ap->a_vap->va_type == VLNK);
708
709 /* XXX should handle this material another way */
710 ulr = &VTOI(ap->a_dvp)->i_crap;
711 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
712
713 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
714 ASSERT_NO_SEGLOCK(fs);
715 if (fs->lfs_ronly) {
716 return EROFS;
717 }
718
719 error = lfs_set_dirop(dvp, NULL);
720 if (error)
721 return error;
722
723 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
724 error = lfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp);
725 if (error) {
726 goto out;
727 }
728
729 VN_KNOTE(ap->a_dvp, NOTE_WRITE);
730 ip = VTOI(*vpp);
731
732 /*
733 * This test is off by one. um_maxsymlinklen contains the
734 * number of bytes available, and we aren't storing a \0, so
735 * the test should properly be <=. However, it cannot be
736 * changed as this would break compatibility with existing fs
737 * images -- see the way ulfs_readlink() works.
738 */
739 len = strlen(ap->a_target);
740 if (len < ip->i_lfs->um_maxsymlinklen) {
741 memcpy((char *)SHORTLINK(ip), ap->a_target, len);
742 ip->i_size = len;
743 DIP_ASSIGN(ip, size, len);
744 uvm_vnp_setsize(*vpp, ip->i_size);
745 ip->i_flag |= IN_CHANGE | IN_UPDATE;
746 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME)
747 ip->i_flag |= IN_ACCESS;
748 } else {
749 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
750 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL,
751 NULL);
752 }
753
754 VOP_UNLOCK(*vpp);
755 if (error)
756 vrele(*vpp);
757
758out:
759 fstrans_done(dvp->v_mount);
760
761 UNMARK_VNODE(dvp);
762 /* XXX: is it even possible for the symlink to get MARK'd? */
763 UNMARK_VNODE(*vpp);
764 if (error) {
765 *vpp = NULL;
766 }
767 lfs_unset_dirop(fs, dvp, "symlink");
768
769 vrele(dvp);
770 return (error);
771}
772
773int
774lfs_mknod(void *v)
775{
776 struct vop_mknod_v3_args /* {
777 struct vnode *a_dvp;
778 struct vnode **a_vpp;
779 struct componentname *a_cnp;
780 struct vattr *a_vap;
781 } */ *ap = v;
782 struct lfs *fs;
783 struct vnode *dvp, **vpp;
784 struct vattr *vap;
785 struct inode *ip;
786 int error;
787 ino_t ino;
788 struct ulfs_lookup_results *ulr;
789
790 dvp = ap->a_dvp;
791 vpp = ap->a_vpp;
792 vap = ap->a_vap;
793
794 KASSERT(vpp != NULL);
795 KASSERT(*vpp == NULL);
796
797 /* XXX should handle this material another way */
798 ulr = &VTOI(dvp)->i_crap;
799 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
800
801 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
802 ASSERT_NO_SEGLOCK(fs);
803 if (fs->lfs_ronly) {
804 return EROFS;
805 }
806
807 error = lfs_set_dirop(dvp, NULL);
808 if (error)
809 return error;
810
811 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
812 error = lfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
813
814 /* Either way we're done with the dirop at this point */
815 UNMARK_VNODE(dvp);
816 UNMARK_VNODE(*vpp);
817 lfs_unset_dirop(fs, dvp, "mknod");
818
819 if (error) {
820 fstrans_done(dvp->v_mount);
821 vrele(dvp);
822 *vpp = NULL;
823 return (error);
824 }
825
826 VN_KNOTE(dvp, NOTE_WRITE);
827 ip = VTOI(*vpp);
828 ino = ip->i_number;
829 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
830
831 /*
832 * Call fsync to write the vnode so that we don't have to deal with
833 * flushing it when it's marked VU_DIROP or reclaiming.
834 *
835 * XXX KS - If we can't flush we also can't call vgone(), so must
836 * return. But, that leaves this vnode in limbo, also not good.
837 * Can this ever happen (barring hardware failure)?
838 */
839 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) {
840 panic("lfs_mknod: couldn't fsync (ino %llu)",
841 (unsigned long long)ino);
842 /* return (error); */
843 }
844
845 fstrans_done(dvp->v_mount);
846 vrele(dvp);
847 KASSERT(error == 0);
848 VOP_UNLOCK(*vpp);
849 return (0);
850}
851
852/*
853 * Create a regular file
854 */
855int
856lfs_create(void *v)
857{
858 struct vop_create_v3_args /* {
859 struct vnode *a_dvp;
860 struct vnode **a_vpp;
861 struct componentname *a_cnp;
862 struct vattr *a_vap;
863 } */ *ap = v;
864 struct lfs *fs;
865 struct vnode *dvp, **vpp;
866 struct vattr *vap;
867 struct ulfs_lookup_results *ulr;
868 int error;
869
870 dvp = ap->a_dvp;
871 vpp = ap->a_vpp;
872 vap = ap->a_vap;
873
874 KASSERT(vpp != NULL);
875 KASSERT(*vpp == NULL);
876
877 /* XXX should handle this material another way */
878 ulr = &VTOI(dvp)->i_crap;
879 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
880
881 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
882 ASSERT_NO_SEGLOCK(fs);
883 if (fs->lfs_ronly) {
884 return EROFS;
885 }
886
887 error = lfs_set_dirop(dvp, NULL);
888 if (error)
889 return error;
890
891 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
892 error = lfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
893 if (error) {
894 fstrans_done(dvp->v_mount);
895 goto out;
896 }
897 fstrans_done(dvp->v_mount);
898 VN_KNOTE(dvp, NOTE_WRITE);
899 VOP_UNLOCK(*vpp);
900
901out:
902
903 UNMARK_VNODE(dvp);
904 UNMARK_VNODE(*vpp);
905 if (error) {
906 *vpp = NULL;
907 }
908 lfs_unset_dirop(fs, dvp, "create");
909
910 vrele(dvp);
911 return (error);
912}
913
914int
915lfs_mkdir(void *v)
916{
917 struct vop_mkdir_v3_args /* {
918 struct vnode *a_dvp;
919 struct vnode **a_vpp;
920 struct componentname *a_cnp;
921 struct vattr *a_vap;
922 } */ *ap = v;
923 struct lfs *fs;
924 struct vnode *dvp, *tvp, **vpp;
925 struct inode *dp, *ip;
926 struct componentname *cnp;
927 struct vattr *vap;
928 struct ulfs_lookup_results *ulr;
929 struct buf *bp;
930 LFS_DIRHEADER *dirp;
931 int dirblksiz;
932 int error;
933
934 dvp = ap->a_dvp;
935 tvp = NULL;
936 vpp = ap->a_vpp;
937 cnp = ap->a_cnp;
938 vap = ap->a_vap;
939
940 dp = VTOI(dvp);
941 ip = NULL;
942
943 KASSERT(vap->va_type == VDIR);
944 KASSERT(vpp != NULL);
945 KASSERT(*vpp == NULL);
946
947 /* XXX should handle this material another way */
948 ulr = &dp->i_crap;
949 ULFS_CHECK_CRAPCOUNTER(dp);
950
951 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
952 ASSERT_NO_SEGLOCK(fs);
953 if (fs->lfs_ronly) {
954 return EROFS;
955 }
956 dirblksiz = fs->um_dirblksiz;
957 /* XXX dholland 20150911 I believe this to be true, but... */
958 //KASSERT(dirblksiz == LFS_DIRBLKSIZ);
959
960 error = lfs_set_dirop(dvp, NULL);
961 if (error)
962 return error;
963
964 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
965
966 if ((nlink_t)dp->i_nlink >= LINK_MAX) {
967 error = EMLINK;
968 goto out;
969 }
970
971 /*
972 * Must simulate part of lfs_makeinode here to acquire the inode,
973 * but not have it entered in the parent directory. The entry is
974 * made later after writing "." and ".." entries.
975 */
976 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, ap->a_vpp);
977 if (error)
978 goto out;
979
980 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE);
981 if (error) {
982 vrele(*ap->a_vpp);
983 *ap->a_vpp = NULL;
984 goto out;
985 }
986
987 tvp = *ap->a_vpp;
988 lfs_mark_vnode(tvp);
989 ip = VTOI(tvp);
990 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
991 ip->i_nlink = 2;
992 DIP_ASSIGN(ip, nlink, 2);
993 if (cnp->cn_flags & ISWHITEOUT) {
994 ip->i_flags |= UF_OPAQUE;
995 DIP_ASSIGN(ip, flags, ip->i_flags);
996 }
997
998 /*
999 * Bump link count in parent directory to reflect work done below.
1000 */
1001 dp->i_nlink++;
1002 DIP_ASSIGN(dp, nlink, dp->i_nlink);
1003 dp->i_flag |= IN_CHANGE;
1004 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
1005 goto bad;
1006
1007 /*
1008 * Initialize directory with "." and "..". This used to use a
1009 * static template but that adds moving parts for very little
1010 * benefit.
1011 */
1012 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
1013 B_CLRBUF, &bp)) != 0)
1014 goto bad;
1015 ip->i_size = dirblksiz;
1016 DIP_ASSIGN(ip, size, dirblksiz);
1017 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1018 uvm_vnp_setsize(tvp, ip->i_size);
1019 dirp = bp->b_data;
1020
1021 /* . */
1022 lfs_dir_setino(fs, dirp, ip->i_number);
1023 lfs_dir_setreclen(fs, dirp, LFS_DIRECTSIZ(fs, 1));
1024 lfs_dir_settype(fs, dirp, LFS_DT_DIR);
1025 lfs_dir_setnamlen(fs, dirp, 1);
1026 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), ".", 1,
1027 LFS_DIRECTSIZ(fs, 1));
1028 dirp = LFS_NEXTDIR(fs, dirp);
1029 /* .. */
1030 lfs_dir_setino(fs, dirp, dp->i_number);
1031 lfs_dir_setreclen(fs, dirp, dirblksiz - LFS_DIRECTSIZ(fs, 1));
1032 lfs_dir_settype(fs, dirp, LFS_DT_DIR);
1033 lfs_dir_setnamlen(fs, dirp, 2);
1034 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), "..", 2,
1035 dirblksiz - LFS_DIRECTSIZ(fs, 1));
1036
1037 /*
1038 * Directory set up; now install its entry in the parent directory.
1039 */
1040 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
1041 goto bad;
1042 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
1043 goto bad;
1044 }
1045 error = ulfs_direnter(dvp, ulr, tvp,
1046 cnp, ip->i_number, LFS_IFTODT(ip->i_mode), bp);
1047 bad:
1048 if (error == 0) {
1049 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1050 VOP_UNLOCK(tvp);
1051 } else {
1052 dp->i_nlink--;
1053 DIP_ASSIGN(dp, nlink, dp->i_nlink);
1054 dp->i_flag |= IN_CHANGE;
1055 /*
1056 * No need to do an explicit lfs_truncate here, vrele will
1057 * do this for us because we set the link count to 0.
1058 */
1059 ip->i_nlink = 0;
1060 DIP_ASSIGN(ip, nlink, 0);
1061 ip->i_flag |= IN_CHANGE;
1062 /* If IN_ADIROP, account for it */
1063 lfs_unmark_vnode(tvp);
1064 vput(tvp);
1065 }
1066
1067out:
1068 fstrans_done(dvp->v_mount);
1069
1070 UNMARK_VNODE(dvp);
1071 UNMARK_VNODE(*vpp);
1072 if (error) {
1073 *vpp = NULL;
1074 }
1075 lfs_unset_dirop(fs, dvp, "mkdir");
1076
1077 vrele(dvp);
1078 return (error);
1079}
1080
1081int
1082lfs_remove(void *v)
1083{
1084 struct vop_remove_args /* {
1085 struct vnode *a_dvp;
1086 struct vnode *a_vp;
1087 struct componentname *a_cnp;
1088 } */ *ap = v;
1089 struct vnode *dvp, *vp;
1090 struct inode *ip;
1091 int error;
1092
1093 dvp = ap->a_dvp;
1094 vp = ap->a_vp;
1095 ip = VTOI(vp);
1096 if ((error = lfs_set_dirop(dvp, vp)) != 0) {
1097 if (dvp == vp)
1098 vrele(vp);
1099 else
1100 vput(vp);
1101 vput(dvp);
1102 return error;
1103 }
1104 error = ulfs_remove(ap);
1105 if (ip->i_nlink == 0)
1106 lfs_orphan(ip->i_lfs, ip->i_number);
1107
1108 UNMARK_VNODE(dvp);
1109 if (ap->a_vp) {
1110 UNMARK_VNODE(ap->a_vp);
1111 }
1112 lfs_unset_dirop(ip->i_lfs, dvp, "remove");
1113 vrele(dvp);
1114 if (ap->a_vp) {
1115 vrele(ap->a_vp);
1116 }
1117
1118 return (error);
1119}
1120
1121int
1122lfs_rmdir(void *v)
1123{
1124 struct vop_rmdir_args /* {
1125 struct vnodeop_desc *a_desc;
1126 struct vnode *a_dvp;
1127 struct vnode *a_vp;
1128 struct componentname *a_cnp;
1129 } */ *ap = v;
1130 struct vnode *vp;
1131 struct inode *ip;
1132 int error;
1133
1134 vp = ap->a_vp;
1135 ip = VTOI(vp);
1136 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) {
1137 if (ap->a_dvp == vp)
1138 vrele(ap->a_dvp);
1139 else
1140 vput(ap->a_dvp);
1141 vput(vp);
1142 return error;
1143 }
1144 error = ulfs_rmdir(ap);
1145 if (ip->i_nlink == 0)
1146 lfs_orphan(ip->i_lfs, ip->i_number);
1147
1148 UNMARK_VNODE(ap->a_dvp);
1149 if (ap->a_vp) {
1150 UNMARK_VNODE(ap->a_vp);
1151 }
1152 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir");
1153 vrele(ap->a_dvp);
1154 if (ap->a_vp) {
1155 vrele(ap->a_vp);
1156 }
1157
1158 return (error);
1159}
1160
1161int
1162lfs_link(void *v)
1163{
1164 struct vop_link_v2_args /* {
1165 struct vnode *a_dvp;
1166 struct vnode *a_vp;
1167 struct componentname *a_cnp;
1168 } */ *ap = v;
1169 struct lfs *fs;
1170 struct vnode *dvp;
1171 int error;
1172
1173 dvp = ap->a_dvp;
1174
1175 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
1176 ASSERT_NO_SEGLOCK(fs);
1177 if (fs->lfs_ronly) {
1178 return EROFS;
1179 }
1180
1181 error = lfs_set_dirop(dvp, NULL);
1182 if (error) {
1183 return error;
1184 }
1185
1186 error = ulfs_link(ap);
1187
1188 UNMARK_VNODE(dvp);
1189 lfs_unset_dirop(fs, dvp, "link");
1190 vrele(dvp);
1191
1192 return (error);
1193}
1194
1195/* XXX hack to avoid calling ITIMES in getattr */
1196int
1197lfs_getattr(void *v)
1198{
1199 struct vop_getattr_args /* {
1200 struct vnode *a_vp;
1201 struct vattr *a_vap;
1202 kauth_cred_t a_cred;
1203 } */ *ap = v;
1204 struct vnode *vp = ap->a_vp;
1205 struct inode *ip = VTOI(vp);
1206 struct vattr *vap = ap->a_vap;
1207 struct lfs *fs = ip->i_lfs;
1208
1209 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1210 /*
1211 * Copy from inode table
1212 */
1213 vap->va_fsid = ip->i_dev;
1214 vap->va_fileid = ip->i_number;
1215 vap->va_mode = ip->i_mode & ~LFS_IFMT;
1216 vap->va_nlink = ip->i_nlink;
1217 vap->va_uid = ip->i_uid;
1218 vap->va_gid = ip->i_gid;
1219 switch (vp->v_type) {
1220 case VBLK:
1221 case VCHR:
1222 vap->va_rdev = (dev_t)lfs_dino_getrdev(fs, ip->i_din);
1223 break;
1224 default:
1225 vap->va_rdev = NODEV;
1226 break;
1227 }
1228 vap->va_size = vp->v_size;
1229 vap->va_atime.tv_sec = lfs_dino_getatime(fs, ip->i_din);
1230 vap->va_atime.tv_nsec = lfs_dino_getatimensec(fs, ip->i_din);
1231 vap->va_mtime.tv_sec = lfs_dino_getmtime(fs, ip->i_din);
1232 vap->va_mtime.tv_nsec = lfs_dino_getmtimensec(fs, ip->i_din);
1233 vap->va_ctime.tv_sec = lfs_dino_getctime(fs, ip->i_din);
1234 vap->va_ctime.tv_nsec = lfs_dino_getctimensec(fs, ip->i_din);
1235 vap->va_flags = ip->i_flags;
1236 vap->va_gen = ip->i_gen;
1237 /* this doesn't belong here */
1238 if (vp->v_type == VBLK)
1239 vap->va_blocksize = BLKDEV_IOSIZE;
1240 else if (vp->v_type == VCHR)
1241 vap->va_blocksize = MAXBSIZE;
1242 else
1243 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
1244 vap->va_bytes = lfs_fsbtob(fs, ip->i_lfs_effnblks);
1245 vap->va_type = vp->v_type;
1246 vap->va_filerev = ip->i_modrev;
1247 fstrans_done(vp->v_mount);
1248 return (0);
1249}
1250
1251/*
1252 * Check to make sure the inode blocks won't choke the buffer
1253 * cache, then call ulfs_setattr as usual.
1254 */
1255int
1256lfs_setattr(void *v)
1257{
1258 struct vop_setattr_args /* {
1259 struct vnode *a_vp;
1260 struct vattr *a_vap;
1261 kauth_cred_t a_cred;
1262 } */ *ap = v;
1263 struct vnode *vp = ap->a_vp;
1264
1265 lfs_check(vp, LFS_UNUSED_LBN, 0);
1266 return ulfs_setattr(v);
1267}
1268
1269/*
1270 * Release the block we hold on lfs_newseg wrapping. Called on file close,
1271 * or explicitly from LFCNWRAPGO. Called with the interlock held.
1272 */
1273static int
1274lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor)
1275{
1276 if (fs->lfs_stoplwp != curlwp)
1277 return EBUSY;
1278
1279 fs->lfs_stoplwp = NULL;
1280 cv_signal(&fs->lfs_stopcv);
1281
1282 KASSERT(fs->lfs_nowrap > 0);
1283 if (fs->lfs_nowrap <= 0) {
1284 return 0;
1285 }
1286
1287 if (--fs->lfs_nowrap == 0) {
1288 log(LOG_NOTICE, "%s: re-enabled log wrap\n",
1289 lfs_sb_getfsmnt(fs));
1290 wakeup(&fs->lfs_wrappass);
1291 lfs_wakeup_cleaner(fs);
1292 }
1293 if (waitfor) {
1294 mtsleep(&fs->lfs_nextsegsleep, PCATCH | PUSER, "segment",
1295 0, &lfs_lock);
1296 }
1297
1298 return 0;
1299}
1300
1301/*
1302 * Close called.
1303 *
1304 * Update the times on the inode.
1305 */
1306/* ARGSUSED */
1307int
1308lfs_close(void *v)
1309{
1310 struct vop_close_args /* {
1311 struct vnode *a_vp;
1312 int a_fflag;
1313 kauth_cred_t a_cred;
1314 } */ *ap = v;
1315 struct vnode *vp = ap->a_vp;
1316 struct inode *ip = VTOI(vp);
1317 struct lfs *fs = ip->i_lfs;
1318
1319 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) &&
1320 fs->lfs_stoplwp == curlwp) {
1321 mutex_enter(&lfs_lock);
1322 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n");
1323 lfs_wrapgo(fs, ip, 0);
1324 mutex_exit(&lfs_lock);
1325 }
1326
1327 if (vp == ip->i_lfs->lfs_ivnode &&
1328 vp->v_mount->mnt_iflag & IMNT_UNMOUNT)
1329 return 0;
1330
1331 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1332 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) {
1333 LFS_ITIMES(ip, NULL, NULL, NULL);
1334 }
1335 fstrans_done(vp->v_mount);
1336 return (0);
1337}
1338
1339/*
1340 * Close wrapper for special devices.
1341 *
1342 * Update the times on the inode then do device close.
1343 */
1344int
1345lfsspec_close(void *v)
1346{
1347 struct vop_close_args /* {
1348 struct vnode *a_vp;
1349 int a_fflag;
1350 kauth_cred_t a_cred;
1351 } */ *ap = v;
1352 struct vnode *vp;
1353 struct inode *ip;
1354
1355 vp = ap->a_vp;
1356 ip = VTOI(vp);
1357 if (vp->v_usecount > 1) {
1358 LFS_ITIMES(ip, NULL, NULL, NULL);
1359 }
1360 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
1361}
1362
1363/*
1364 * Close wrapper for fifo's.
1365 *
1366 * Update the times on the inode then do device close.
1367 */
1368int
1369lfsfifo_close(void *v)
1370{
1371 struct vop_close_args /* {
1372 struct vnode *a_vp;
1373 int a_fflag;
1374 kauth_cred_ a_cred;
1375 } */ *ap = v;
1376 struct vnode *vp;
1377 struct inode *ip;
1378
1379 vp = ap->a_vp;
1380 ip = VTOI(vp);
1381 if (ap->a_vp->v_usecount > 1) {
1382 LFS_ITIMES(ip, NULL, NULL, NULL);
1383 }
1384 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
1385}
1386
1387/*
1388 * Reclaim an inode so that it can be used for other purposes.
1389 */
1390
1391int
1392lfs_reclaim(void *v)
1393{
1394 struct vop_reclaim_args /* {
1395 struct vnode *a_vp;
1396 } */ *ap = v;
1397 struct vnode *vp = ap->a_vp;
1398 struct inode *ip = VTOI(vp);
1399 struct lfs *fs = ip->i_lfs;
1400 int error;
1401
1402 /*
1403 * The inode must be freed and updated before being removed
1404 * from its hash chain. Other threads trying to gain a hold
1405 * or lock on the inode will be stalled.
1406 */
1407 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1408 lfs_vfree(vp, ip->i_number, ip->i_omode);
1409
1410 mutex_enter(&lfs_lock);
1411 LFS_CLR_UINO(ip, IN_ALLMOD);
1412 mutex_exit(&lfs_lock);
1413 if ((error = ulfs_reclaim(vp)))
1414 return (error);
1415
1416 /*
1417 * Take us off the paging and/or dirop queues if we were on them.
1418 * We shouldn't be on them.
1419 */
1420 mutex_enter(&lfs_lock);
1421 if (ip->i_flags & IN_PAGING) {
1422 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n",
1423 lfs_sb_getfsmnt(fs));
1424 ip->i_flags &= ~IN_PAGING;
1425 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
1426 }
1427 if (vp->v_uflag & VU_DIROP) {
1428 panic("reclaimed vnode is VU_DIROP");
1429 vp->v_uflag &= ~VU_DIROP;
1430 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
1431 }
1432 mutex_exit(&lfs_lock);
1433
1434 pool_put(&lfs_dinode_pool, ip->i_din);
1435 lfs_deregister_all(vp);
1436 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs);
1437 ip->inode_ext.lfs = NULL;
1438 genfs_node_destroy(vp);
1439 pool_put(&lfs_inode_pool, vp->v_data);
1440 vp->v_data = NULL;
1441 return (0);
1442}
1443
1444/*
1445 * Read a block from a storage device.
1446 *
1447 * Calculate the logical to physical mapping if not done already,
1448 * then call the device strategy routine.
1449 *
1450 * In order to avoid reading blocks that are in the process of being
1451 * written by the cleaner---and hence are not mutexed by the normal
1452 * buffer cache / page cache mechanisms---check for collisions before
1453 * reading.
1454 *
1455 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before*
1456 * the active cleaner test.
1457 *
1458 * XXX This code assumes that lfs_markv makes synchronous checkpoints.
1459 */
1460int
1461lfs_strategy(void *v)
1462{
1463 struct vop_strategy_args /* {
1464 struct vnode *a_vp;
1465 struct buf *a_bp;
1466 } */ *ap = v;
1467 struct buf *bp;
1468 struct lfs *fs;
1469 struct vnode *vp;
1470 struct inode *ip;
1471 daddr_t tbn;
1472#define MAXLOOP 25
1473 int i, sn, error, slept, loopcount;
1474
1475 bp = ap->a_bp;
1476 vp = ap->a_vp;
1477 ip = VTOI(vp);
1478 fs = ip->i_lfs;
1479
1480 /* lfs uses its strategy routine only for read */
1481 KASSERT(bp->b_flags & B_READ);
1482
1483 if (vp->v_type == VBLK || vp->v_type == VCHR)
1484 panic("lfs_strategy: spec");
1485 KASSERT(bp->b_bcount != 0);
1486 if (bp->b_blkno == bp->b_lblkno) {
1487 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
1488 NULL);
1489 if (error) {
1490 bp->b_error = error;
1491 bp->b_resid = bp->b_bcount;
1492 biodone(bp);
1493 return (error);
1494 }
1495 if ((long)bp->b_blkno == -1) /* no valid data */
1496 clrbuf(bp);
1497 }
1498 if ((long)bp->b_blkno < 0) { /* block is not on disk */
1499 bp->b_resid = bp->b_bcount;
1500 biodone(bp);
1501 return (0);
1502 }
1503
1504 slept = 1;
1505 loopcount = 0;
1506 mutex_enter(&lfs_lock);
1507 while (slept && fs->lfs_seglock) {
1508 mutex_exit(&lfs_lock);
1509 /*
1510 * Look through list of intervals.
1511 * There will only be intervals to look through
1512 * if the cleaner holds the seglock.
1513 * Since the cleaner is synchronous, we can trust
1514 * the list of intervals to be current.
1515 */
1516 tbn = LFS_DBTOFSB(fs, bp->b_blkno);
1517 sn = lfs_dtosn(fs, tbn);
1518 slept = 0;
1519 for (i = 0; i < fs->lfs_cleanind; i++) {
1520 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) &&
1521 tbn >= fs->lfs_cleanint[i]) {
1522 DLOG((DLOG_CLEAN,
1523 "lfs_strategy: ino %d lbn %" PRId64
1524 " ind %d sn %d fsb %" PRIx64
1525 " given sn %d fsb %" PRIx64 "\n",
1526 ip->i_number, bp->b_lblkno, i,
1527 lfs_dtosn(fs, fs->lfs_cleanint[i]),
1528 fs->lfs_cleanint[i], sn, tbn));
1529 DLOG((DLOG_CLEAN,
1530 "lfs_strategy: sleeping on ino %d lbn %"
1531 PRId64 "\n", ip->i_number, bp->b_lblkno));
1532 mutex_enter(&lfs_lock);
1533 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) {
1534 /*
1535 * Cleaner can't wait for itself.
1536 * Instead, wait for the blocks
1537 * to be written to disk.
1538 * XXX we need pribio in the test
1539 * XXX here.
1540 */
1541 mtsleep(&fs->lfs_iocount,
1542 (PRIBIO + 1) | PNORELOCK,
1543 "clean2", hz/10 + 1,
1544 &lfs_lock);
1545 slept = 1;
1546 ++loopcount;
1547 break;
1548 } else if (fs->lfs_seglock) {
1549 mtsleep(&fs->lfs_seglock,
1550 (PRIBIO + 1) | PNORELOCK,
1551 "clean1", 0,
1552 &lfs_lock);
1553 slept = 1;
1554 break;
1555 }
1556 mutex_exit(&lfs_lock);
1557 }
1558 }
1559 mutex_enter(&lfs_lock);
1560 if (loopcount > MAXLOOP) {
1561 printf("lfs_strategy: breaking out of clean2 loop\n");
1562 break;
1563 }
1564 }
1565 mutex_exit(&lfs_lock);
1566
1567 vp = ip->i_devvp;
1568 return VOP_STRATEGY(vp, bp);
1569}
1570
1571/*
1572 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops.
1573 * Technically this is a checkpoint (the on-disk state is valid)
1574 * even though we are leaving out all the file data.
1575 */
1576int
1577lfs_flush_dirops(struct lfs *fs)
1578{
1579 struct inode *ip, *nip;
1580 struct vnode *vp;
1581 extern int lfs_dostats; /* XXX this does not belong here */
1582 struct segment *sp;
1583 SEGSUM *ssp;
1584 int flags = 0;
1585 int error = 0;
1586
1587 ASSERT_MAYBE_SEGLOCK(fs);
1588 KASSERT(fs->lfs_nadirop == 0);
1589
1590 if (fs->lfs_ronly)
1591 return EROFS;
1592
1593 mutex_enter(&lfs_lock);
1594 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) {
1595 mutex_exit(&lfs_lock);
1596 return 0;
1597 } else
1598 mutex_exit(&lfs_lock);
1599
1600 if (lfs_dostats)
1601 ++lfs_stats.flush_invoked;
1602
1603 lfs_imtime(fs);
1604 lfs_seglock(fs, flags);
1605 sp = fs->lfs_sp;
1606
1607 /*
1608 * lfs_writevnodes, optimized to get dirops out of the way.
1609 * Only write dirops, and don't flush files' pages, only
1610 * blocks from the directories.
1611 *
1612 * We don't need to vref these files because they are
1613 * dirops and so hold an extra reference until the
1614 * segunlock clears them of that status.
1615 *
1616 * We don't need to check for IN_ADIROP because we know that
1617 * no dirops are active.
1618 *
1619 */
1620 mutex_enter(&lfs_lock);
1621 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
1622 nip = TAILQ_NEXT(ip, i_lfs_dchain);
1623 mutex_exit(&lfs_lock);
1624 vp = ITOV(ip);
1625 mutex_enter(vp->v_interlock);
1626
1627 KASSERT((ip->i_flag & IN_ADIROP) == 0);
1628 KASSERT(vp->v_uflag & VU_DIROP);
1629 KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0);
1630
1631 /*
1632 * All writes to directories come from dirops; all
1633 * writes to files' direct blocks go through the page
1634 * cache, which we're not touching. Reads to files
1635 * and/or directories will not be affected by writing
1636 * directory blocks inodes and file inodes. So we don't
1637 * really need to lock.
1638 */
1639 if (vdead_check(vp, VDEAD_NOWAIT) != 0) {
1640 mutex_exit(vp->v_interlock);
1641 mutex_enter(&lfs_lock);
1642 continue;
1643 }
1644 mutex_exit(vp->v_interlock);
1645 /* XXX see below
1646 * waslocked = VOP_ISLOCKED(vp);
1647 */
1648 if (vp->v_type != VREG &&
1649 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) {
1650 error = lfs_writefile(fs, sp, vp);
1651 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1652 !(ip->i_flag & IN_ALLMOD)) {
1653 mutex_enter(&lfs_lock);
1654 LFS_SET_UINO(ip, IN_MODIFIED);
1655 mutex_exit(&lfs_lock);
1656 }
1657 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1658 mutex_enter(&lfs_lock);
1659 error = EAGAIN;
1660 break;
1661 }
1662 }
1663 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1664 error = lfs_writeinode(fs, sp, ip);
1665 mutex_enter(&lfs_lock);
1666 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1667 error = EAGAIN;
1668 break;
1669 }
1670
1671 /*
1672 * We might need to update these inodes again,
1673 * for example, if they have data blocks to write.
1674 * Make sure that after this flush, they are still
1675 * marked IN_MODIFIED so that we don't forget to
1676 * write them.
1677 */
1678 /* XXX only for non-directories? --KS */
1679 LFS_SET_UINO(ip, IN_MODIFIED);
1680 }
1681 mutex_exit(&lfs_lock);
1682 /* We've written all the dirops there are */
1683 ssp = (SEGSUM *)sp->segsum;
1684 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT));
1685 lfs_finalize_fs_seguse(fs);
1686 (void) lfs_writeseg(fs, sp);
1687 lfs_segunlock(fs);
1688
1689 return error;
1690}
1691
1692/*
1693 * Flush all vnodes for which the pagedaemon has requested pageouts.
1694 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop()
1695 * has just run, this would be an error). If we have to skip a vnode
1696 * for any reason, just skip it; if we have to wait for the cleaner,
1697 * abort. The writer daemon will call us again later.
1698 */
1699int
1700lfs_flush_pchain(struct lfs *fs)
1701{
1702 struct inode *ip, *nip;
1703 struct vnode *vp;
1704 extern int lfs_dostats;
1705 struct segment *sp;
1706 int error, error2;
1707
1708 ASSERT_NO_SEGLOCK(fs);
1709
1710 if (fs->lfs_ronly)
1711 return EROFS;
1712
1713 mutex_enter(&lfs_lock);
1714 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) {
1715 mutex_exit(&lfs_lock);
1716 return 0;
1717 } else
1718 mutex_exit(&lfs_lock);
1719
1720 /* Get dirops out of the way */
1721 if ((error = lfs_flush_dirops(fs)) != 0)
1722 return error;
1723
1724 if (lfs_dostats)
1725 ++lfs_stats.flush_invoked;
1726
1727 /*
1728 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts.
1729 */
1730 lfs_imtime(fs);
1731 lfs_seglock(fs, 0);
1732 sp = fs->lfs_sp;
1733
1734 /*
1735 * lfs_writevnodes, optimized to clear pageout requests.
1736 * Only write non-dirop files that are in the pageout queue.
1737 * We're very conservative about what we write; we want to be
1738 * fast and async.
1739 */
1740 mutex_enter(&lfs_lock);
1741 top:
1742 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) {
1743 struct mount *mp = ITOV(ip)->v_mount;
1744 ino_t ino = ip->i_number;
1745
1746 nip = TAILQ_NEXT(ip, i_lfs_pchain);
1747
1748 if (!(ip->i_flags & IN_PAGING))
1749 goto top;
1750
1751 mutex_exit(&lfs_lock);
1752 if (vcache_get(mp, &ino, sizeof(ino), &vp) != 0) {
1753 mutex_enter(&lfs_lock);
1754 continue;
1755 };
1756 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1757 vrele(vp);
1758 mutex_enter(&lfs_lock);
1759 continue;
1760 }
1761 ip = VTOI(vp);
1762 mutex_enter(&lfs_lock);
1763 if ((vp->v_uflag & VU_DIROP) != 0 || vp->v_type != VREG ||
1764 !(ip->i_flags & IN_PAGING)) {
1765 mutex_exit(&lfs_lock);
1766 vput(vp);
1767 mutex_enter(&lfs_lock);
1768 goto top;
1769 }
1770 mutex_exit(&lfs_lock);
1771
1772 error = lfs_writefile(fs, sp, vp);
1773 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1774 !(ip->i_flag & IN_ALLMOD)) {
1775 mutex_enter(&lfs_lock);
1776 LFS_SET_UINO(ip, IN_MODIFIED);
1777 mutex_exit(&lfs_lock);
1778 }
1779 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1780 error2 = lfs_writeinode(fs, sp, ip);
1781
1782 VOP_UNLOCK(vp);
1783 vrele(vp);
1784
1785 if (error == EAGAIN || error2 == EAGAIN) {
1786 lfs_writeseg(fs, sp);
1787 mutex_enter(&lfs_lock);
1788 break;
1789 }
1790 mutex_enter(&lfs_lock);
1791 }
1792 mutex_exit(&lfs_lock);
1793 (void) lfs_writeseg(fs, sp);
1794 lfs_segunlock(fs);
1795
1796 return 0;
1797}
1798
1799/*
1800 * Conversion for compat.
1801 */
1802static void
1803block_info_from_70(BLOCK_INFO *bi, const BLOCK_INFO_70 *bi70)
1804{
1805 bi->bi_inode = bi70->bi_inode;
1806 bi->bi_lbn = bi70->bi_lbn;
1807 bi->bi_daddr = bi70->bi_daddr;
1808 bi->bi_segcreate = bi70->bi_segcreate;
1809 bi->bi_version = bi70->bi_version;
1810 bi->bi_bp = bi70->bi_bp;
1811 bi->bi_size = bi70->bi_size;
1812}
1813
1814static void
1815block_info_to_70(BLOCK_INFO_70 *bi70, const BLOCK_INFO *bi)
1816{
1817 bi70->bi_inode = bi->bi_inode;
1818 bi70->bi_lbn = bi->bi_lbn;
1819 bi70->bi_daddr = bi->bi_daddr;
1820 bi70->bi_segcreate = bi->bi_segcreate;
1821 bi70->bi_version = bi->bi_version;
1822 bi70->bi_bp = bi->bi_bp;
1823 bi70->bi_size = bi->bi_size;
1824}
1825
1826/*
1827 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}.
1828 */
1829int
1830lfs_fcntl(void *v)
1831{
1832 struct vop_fcntl_args /* {
1833 struct vnode *a_vp;
1834 u_int a_command;
1835 void * a_data;
1836 int a_fflag;
1837 kauth_cred_t a_cred;
1838 } */ *ap = v;
1839 struct timeval tv;
1840 struct timeval *tvp;
1841 BLOCK_INFO *blkiov;
1842 BLOCK_INFO_70 *blkiov70;
1843 CLEANERINFO *cip;
1844 SEGUSE *sup;
1845 int blkcnt, i, error;
1846 size_t fh_size;
1847 struct lfs_fcntl_markv blkvp;
1848 struct lfs_fcntl_markv_70 blkvp70;
1849 struct lwp *l;
1850 fsid_t *fsidp;
1851 struct lfs *fs;
1852 struct buf *bp;
1853 fhandle_t *fhp;
1854 daddr_t off;
1855 int oclean;
1856
1857 /* Only respect LFS fcntls on fs root or Ifile */
1858 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO &&
1859 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) {
1860 return ulfs_fcntl(v);
1861 }
1862
1863 /* Avoid locking a draining lock */
1864 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) {
1865 return ESHUTDOWN;
1866 }
1867
1868 /* LFS control and monitoring fcntls are available only to root */
1869 l = curlwp;
1870 if (((ap->a_command & 0xff00) >> 8) == 'L' &&
1871 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
1872 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0)
1873 return (error);
1874
1875 fs = VTOI(ap->a_vp)->i_lfs;
1876 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx;
1877
1878 error = 0;
1879 switch ((int)ap->a_command) {
1880 case LFCNSEGWAITALL_COMPAT_50:
1881 case LFCNSEGWAITALL_COMPAT:
1882 fsidp = NULL;
1883 /* FALLTHROUGH */
1884 case LFCNSEGWAIT_COMPAT_50:
1885 case LFCNSEGWAIT_COMPAT:
1886 {
1887 struct timeval50 *tvp50
1888 = (struct timeval50 *)ap->a_data;
1889 timeval50_to_timeval(tvp50, &tv);
1890 tvp = &tv;
1891 }
1892 goto segwait_common;
1893 case LFCNSEGWAITALL:
1894 fsidp = NULL;
1895 /* FALLTHROUGH */
1896 case LFCNSEGWAIT:
1897 tvp = (struct timeval *)ap->a_data;
1898segwait_common:
1899 mutex_enter(&lfs_lock);
1900 ++fs->lfs_sleepers;
1901 mutex_exit(&lfs_lock);
1902
1903 error = lfs_segwait(fsidp, tvp);
1904
1905 mutex_enter(&lfs_lock);
1906 if (--fs->lfs_sleepers == 0)
1907 wakeup(&fs->lfs_sleepers);
1908 mutex_exit(&lfs_lock);
1909 return error;
1910
1911 case LFCNBMAPV_COMPAT_70:
1912 case LFCNMARKV_COMPAT_70:
1913 blkvp70 = *(struct lfs_fcntl_markv_70 *)ap->a_data;
1914
1915 blkcnt = blkvp70.blkcnt;
1916 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1917 return (EINVAL);
1918 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1919 blkiov70 = lfs_malloc(fs, sizeof(BLOCK_INFO_70), LFS_NB_BLKIOV);
1920 for (i = 0; i < blkcnt; i++) {
1921 error = copyin(&blkvp70.blkiov[i], blkiov70,
1922 sizeof(*blkiov70));
1923 if (error) {
1924 lfs_free(fs, blkiov70, LFS_NB_BLKIOV);
1925 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1926 return error;
1927 }
1928 block_info_from_70(&blkiov[i], blkiov70);
1929 }
1930
1931 mutex_enter(&lfs_lock);
1932 ++fs->lfs_sleepers;
1933 mutex_exit(&lfs_lock);
1934 if (ap->a_command == LFCNBMAPV)
1935 error = lfs_bmapv(l, fsidp, blkiov, blkcnt);
1936 else /* LFCNMARKV */
1937 error = lfs_markv(l, fsidp, blkiov, blkcnt);
1938 if (error == 0) {
1939 for (i = 0; i < blkcnt; i++) {
1940 block_info_to_70(blkiov70, &blkiov[i]);
1941 error = copyout(blkiov70, &blkvp70.blkiov[i],
1942 sizeof(*blkiov70));
1943 if (error) {
1944 break;
1945 }
1946 }
1947 }
1948 mutex_enter(&lfs_lock);
1949 if (--fs->lfs_sleepers == 0)
1950 wakeup(&fs->lfs_sleepers);
1951 mutex_exit(&lfs_lock);
1952 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1953 return error;
1954
1955 case LFCNBMAPV:
1956 case LFCNMARKV:
1957 blkvp = *(struct lfs_fcntl_markv *)ap->a_data;
1958
1959 blkcnt = blkvp.blkcnt;
1960 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1961 return (EINVAL);
1962 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1963 if ((error = copyin(blkvp.blkiov, blkiov,
1964 blkcnt * sizeof(BLOCK_INFO))) != 0) {
1965 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1966 return error;
1967 }
1968
1969 mutex_enter(&lfs_lock);
1970 ++fs->lfs_sleepers;
1971 mutex_exit(&lfs_lock);
1972 if (ap->a_command == LFCNBMAPV)
1973 error = lfs_bmapv(l, fsidp, blkiov, blkcnt);
1974 else /* LFCNMARKV */
1975 error = lfs_markv(l, fsidp, blkiov, blkcnt);
1976 if (error == 0)
1977 error = copyout(blkiov, blkvp.blkiov,
1978 blkcnt * sizeof(BLOCK_INFO));
1979 mutex_enter(&lfs_lock);
1980 if (--fs->lfs_sleepers == 0)
1981 wakeup(&fs->lfs_sleepers);
1982 mutex_exit(&lfs_lock);
1983 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1984 return error;
1985
1986 case LFCNRECLAIM:
1987 /*
1988 * Flush dirops and write Ifile, allowing empty segments
1989 * to be immediately reclaimed.
1990 */
1991 lfs_writer_enter(fs, "pndirop");
1992 off = lfs_sb_getoffset(fs);
1993 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP);
1994 lfs_flush_dirops(fs);
1995 LFS_CLEANERINFO(cip, fs, bp);
1996 oclean = lfs_ci_getclean(fs, cip);
1997 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
1998 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP);
1999 fs->lfs_sp->seg_flags |= SEGM_PROT;
2000 lfs_segunlock(fs);
2001 lfs_writer_leave(fs);
2002
2003#ifdef DEBUG
2004 LFS_CLEANERINFO(cip, fs, bp);
2005 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64
2006 " blocks, cleaned %" PRId32 " segments (activesb %d)\n",
2007 lfs_sb_getoffset(fs) - off,
2008 lfs_ci_getclean(fs, cip) - oclean,
2009 fs->lfs_activesb));
2010 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
2011#else
2012 __USE(oclean);
2013 __USE(off);
2014#endif
2015
2016 return 0;
2017
2018 case LFCNIFILEFH_COMPAT:
2019 /* Return the filehandle of the Ifile */
2020 if ((error = kauth_authorize_system(l->l_cred,
2021 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0)
2022 return (error);
2023 fhp = (struct fhandle *)ap->a_data;
2024 fhp->fh_fsid = *fsidp;
2025 fh_size = 16; /* former VFS_MAXFIDSIZ */
2026 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
2027
2028 case LFCNIFILEFH_COMPAT2:
2029 case LFCNIFILEFH:
2030 /* Return the filehandle of the Ifile */
2031 fhp = (struct fhandle *)ap->a_data;
2032 fhp->fh_fsid = *fsidp;
2033 fh_size = sizeof(struct lfs_fhandle) -
2034 offsetof(fhandle_t, fh_fid);
2035 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
2036
2037 case LFCNREWIND:
2038 /* Move lfs_offset to the lowest-numbered segment */
2039 return lfs_rewind(fs, *(int *)ap->a_data);
2040
2041 case LFCNINVAL:
2042 /* Mark a segment SEGUSE_INVAL */
2043 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp);
2044 if (sup->su_nbytes > 0) {
2045 brelse(bp, 0);
2046 lfs_unset_inval_all(fs);
2047 return EBUSY;
2048 }
2049 sup->su_flags |= SEGUSE_INVAL;
2050 VOP_BWRITE(bp->b_vp, bp);
2051 return 0;
2052
2053 case LFCNRESIZE:
2054 /* Resize the filesystem */
2055 return lfs_resize_fs(fs, *(int *)ap->a_data);
2056
2057 case LFCNWRAPSTOP:
2058 case LFCNWRAPSTOP_COMPAT:
2059 /*
2060 * Hold lfs_newseg at segment 0; if requested, sleep until
2061 * the filesystem wraps around. To support external agents
2062 * (dump, fsck-based regression test) that need to look at
2063 * a snapshot of the filesystem, without necessarily
2064 * requiring that all fs activity stops.
2065 */
2066 if (fs->lfs_stoplwp == curlwp)
2067 return EALREADY;
2068
2069 mutex_enter(&lfs_lock);
2070 while (fs->lfs_stoplwp != NULL)
2071 cv_wait(&fs->lfs_stopcv, &lfs_lock);
2072 fs->lfs_stoplwp = curlwp;
2073 if (fs->lfs_nowrap == 0)
2074 log(LOG_NOTICE, "%s: disabled log wrap\n",
2075 lfs_sb_getfsmnt(fs));
2076 ++fs->lfs_nowrap;
2077 if (*(int *)ap->a_data == 1
2078 || ap->a_command == LFCNWRAPSTOP_COMPAT) {
2079 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n");
2080 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
2081 "segwrap", 0, &lfs_lock);
2082 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n");
2083 if (error) {
2084 lfs_wrapgo(fs, VTOI(ap->a_vp), 0);
2085 }
2086 }
2087 mutex_exit(&lfs_lock);
2088 return 0;
2089
2090 case LFCNWRAPGO:
2091 case LFCNWRAPGO_COMPAT:
2092 /*
2093 * Having done its work, the agent wakes up the writer.
2094 * If the argument is 1, it sleeps until a new segment
2095 * is selected.
2096 */
2097 mutex_enter(&lfs_lock);
2098 error = lfs_wrapgo(fs, VTOI(ap->a_vp),
2099 ap->a_command == LFCNWRAPGO_COMPAT ? 1 :
2100 *((int *)ap->a_data));
2101 mutex_exit(&lfs_lock);
2102 return error;
2103
2104 case LFCNWRAPPASS:
2105 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT))
2106 return EALREADY;
2107 mutex_enter(&lfs_lock);
2108 if (fs->lfs_stoplwp != curlwp) {
2109 mutex_exit(&lfs_lock);
2110 return EALREADY;
2111 }
2112 if (fs->lfs_nowrap == 0) {
2113 mutex_exit(&lfs_lock);
2114 return EBUSY;
2115 }
2116 fs->lfs_wrappass = 1;
2117 wakeup(&fs->lfs_wrappass);
2118 /* Wait for the log to wrap, if asked */
2119 if (*(int *)ap->a_data) {
2120 vref(ap->a_vp);
2121 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT;
2122 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n");
2123 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
2124 "segwrap", 0, &lfs_lock);
2125 log(LOG_NOTICE, "LFCNPASS done waiting\n");
2126 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT;
2127 vrele(ap->a_vp);
2128 }
2129 mutex_exit(&lfs_lock);
2130 return error;
2131
2132 case LFCNWRAPSTATUS:
2133 mutex_enter(&lfs_lock);
2134 *(int *)ap->a_data = fs->lfs_wrapstatus;
2135 mutex_exit(&lfs_lock);
2136 return 0;
2137
2138 default:
2139 return ulfs_fcntl(v);
2140 }
2141 return 0;
2142}
2143
2144/*
2145 * Return the last logical file offset that should be written for this file
2146 * if we're doing a write that ends at "size". If writing, we need to know
2147 * about sizes on disk, i.e. fragments if there are any; if reading, we need
2148 * to know about entire blocks.
2149 */
2150void
2151lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
2152{
2153 struct inode *ip = VTOI(vp);
2154 struct lfs *fs = ip->i_lfs;
2155 daddr_t olbn, nlbn;
2156
2157 olbn = lfs_lblkno(fs, ip->i_size);
2158 nlbn = lfs_lblkno(fs, size);
2159 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) {
2160 *eobp = lfs_fragroundup(fs, size);
2161 } else {
2162 *eobp = lfs_blkroundup(fs, size);
2163 }
2164}
2165
2166#ifdef DEBUG
2167void lfs_dump_vop(void *);
2168
2169void
2170lfs_dump_vop(void *v)
2171{
2172 struct vop_putpages_args /* {
2173 struct vnode *a_vp;
2174 voff_t a_offlo;
2175 voff_t a_offhi;
2176 int a_flags;
2177 } */ *ap = v;
2178
2179 struct inode *ip = VTOI(ap->a_vp);
2180 struct lfs *fs = ip->i_lfs;
2181
2182#ifdef DDB
2183 vfs_vnode_print(ap->a_vp, 0, printf);
2184#endif
2185 lfs_dump_dinode(fs, ip->i_din);
2186}
2187#endif
2188
2189int
2190lfs_mmap(void *v)
2191{
2192 struct vop_mmap_args /* {
2193 const struct vnodeop_desc *a_desc;
2194 struct vnode *a_vp;
2195 vm_prot_t a_prot;
2196 kauth_cred_t a_cred;
2197 } */ *ap = v;
2198
2199 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM)
2200 return EOPNOTSUPP;
2201 return ulfs_mmap(v);
2202}
2203
2204static int
2205lfs_openextattr(void *v)
2206{
2207 struct vop_openextattr_args /* {
2208 struct vnode *a_vp;
2209 kauth_cred_t a_cred;
2210 struct proc *a_p;
2211 } */ *ap = v;
2212 struct inode *ip = VTOI(ap->a_vp);
2213 struct ulfsmount *ump = ip->i_ump;
2214 //struct lfs *fs = ip->i_lfs;
2215
2216 /* Not supported for ULFS1 file systems. */
2217 if (ump->um_fstype == ULFS1)
2218 return (EOPNOTSUPP);
2219
2220 /* XXX Not implemented for ULFS2 file systems. */
2221 return (EOPNOTSUPP);
2222}
2223
2224static int
2225lfs_closeextattr(void *v)
2226{
2227 struct vop_closeextattr_args /* {
2228 struct vnode *a_vp;
2229 int a_commit;
2230 kauth_cred_t a_cred;
2231 struct proc *a_p;
2232 } */ *ap = v;
2233 struct inode *ip = VTOI(ap->a_vp);
2234 struct ulfsmount *ump = ip->i_ump;
2235 //struct lfs *fs = ip->i_lfs;
2236
2237 /* Not supported for ULFS1 file systems. */
2238 if (ump->um_fstype == ULFS1)
2239 return (EOPNOTSUPP);
2240
2241 /* XXX Not implemented for ULFS2 file systems. */
2242 return (EOPNOTSUPP);
2243}
2244
2245static int
2246lfs_getextattr(void *v)
2247{
2248 struct vop_getextattr_args /* {
2249 struct vnode *a_vp;
2250 int a_attrnamespace;
2251 const char *a_name;
2252 struct uio *a_uio;
2253 size_t *a_size;
2254 kauth_cred_t a_cred;
2255 struct proc *a_p;
2256 } */ *ap = v;
2257 struct vnode *vp = ap->a_vp;
2258 struct inode *ip = VTOI(vp);
2259 struct ulfsmount *ump = ip->i_ump;
2260 //struct lfs *fs = ip->i_lfs;
2261 int error;
2262
2263 if (ump->um_fstype == ULFS1) {
2264#ifdef LFS_EXTATTR
2265 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2266 error = ulfs_getextattr(ap);
2267 fstrans_done(vp->v_mount);
2268#else
2269 error = EOPNOTSUPP;
2270#endif
2271 return error;
2272 }
2273
2274 /* XXX Not implemented for ULFS2 file systems. */
2275 return (EOPNOTSUPP);
2276}
2277
2278static int
2279lfs_setextattr(void *v)
2280{
2281 struct vop_setextattr_args /* {
2282 struct vnode *a_vp;
2283 int a_attrnamespace;
2284 const char *a_name;
2285 struct uio *a_uio;
2286 kauth_cred_t a_cred;
2287 struct proc *a_p;
2288 } */ *ap = v;
2289 struct vnode *vp = ap->a_vp;
2290 struct inode *ip = VTOI(vp);
2291 struct ulfsmount *ump = ip->i_ump;
2292 //struct lfs *fs = ip->i_lfs;
2293 int error;
2294
2295 if (ump->um_fstype == ULFS1) {
2296#ifdef LFS_EXTATTR
2297 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2298 error = ulfs_setextattr(ap);
2299 fstrans_done(vp->v_mount);
2300#else
2301 error = EOPNOTSUPP;
2302#endif
2303 return error;
2304 }
2305
2306 /* XXX Not implemented for ULFS2 file systems. */
2307 return (EOPNOTSUPP);
2308}
2309
2310static int
2311lfs_listextattr(void *v)
2312{
2313 struct vop_listextattr_args /* {
2314 struct vnode *a_vp;
2315 int a_attrnamespace;
2316 struct uio *a_uio;
2317 size_t *a_size;
2318 kauth_cred_t a_cred;
2319 struct proc *a_p;
2320 } */ *ap = v;
2321 struct vnode *vp = ap->a_vp;
2322 struct inode *ip = VTOI(vp);
2323 struct ulfsmount *ump = ip->i_ump;
2324 //struct lfs *fs = ip->i_lfs;
2325 int error;
2326
2327 if (ump->um_fstype == ULFS1) {
2328#ifdef LFS_EXTATTR
2329 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2330 error = ulfs_listextattr(ap);
2331 fstrans_done(vp->v_mount);
2332#else
2333 error = EOPNOTSUPP;
2334#endif
2335 return error;
2336 }
2337
2338 /* XXX Not implemented for ULFS2 file systems. */
2339 return (EOPNOTSUPP);
2340}
2341
2342static int
2343lfs_deleteextattr(void *v)
2344{
2345 struct vop_deleteextattr_args /* {
2346 struct vnode *a_vp;
2347 int a_attrnamespace;
2348 kauth_cred_t a_cred;
2349 struct proc *a_p;
2350 } */ *ap = v;
2351 struct vnode *vp = ap->a_vp;
2352 struct inode *ip = VTOI(vp);
2353 struct ulfsmount *ump = ip->i_ump;
2354 //struct fs *fs = ip->i_lfs;
2355 int error;
2356
2357 if (ump->um_fstype == ULFS1) {
2358#ifdef LFS_EXTATTR
2359 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2360 error = ulfs_deleteextattr(ap);
2361 fstrans_done(vp->v_mount);
2362#else
2363 error = EOPNOTSUPP;
2364#endif
2365 return error;
2366 }
2367
2368 /* XXX Not implemented for ULFS2 file systems. */
2369 return (EOPNOTSUPP);
2370}
2371
2372