1 | /* $NetBSD: ufs_readwrite.c,v 1.120 2015/04/12 22:48:38 riastradh Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 1993 |
5 | * The Regents of the University of California. All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. |
15 | * 3. Neither the name of the University nor the names of its contributors |
16 | * may be used to endorse or promote products derived from this software |
17 | * without specific prior written permission. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 | * SUCH DAMAGE. |
30 | * |
31 | * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 |
32 | */ |
33 | |
34 | #include <sys/cdefs.h> |
35 | __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.120 2015/04/12 22:48:38 riastradh Exp $" ); |
36 | |
37 | #ifdef LFS_READWRITE |
38 | #define FS struct lfs |
39 | #define I_FS i_lfs |
40 | #define READ lfs_read |
41 | #define READ_S "lfs_read" |
42 | #define WRITE lfs_write |
43 | #define WRITE_S "lfs_write" |
44 | #define BUFRD lfs_bufrd |
45 | #define BUFWR lfs_bufwr |
46 | #define fs_bsize lfs_bsize |
47 | #define fs_bmask lfs_bmask |
48 | #define UFS_WAPBL_BEGIN(mp) 0 |
49 | #define UFS_WAPBL_END(mp) do { } while (0) |
50 | #define UFS_WAPBL_UPDATE(vp, access, modify, flags) do { } while (0) |
51 | #define ufs_blkoff lfs_blkoff |
52 | #define ufs_blksize lfs_blksize |
53 | #define ufs_lblkno lfs_lblkno |
54 | #define ufs_lblktosize lfs_lblktosize |
55 | #define ufs_blkroundup lfs_blkroundup |
56 | #else |
57 | #define FS struct fs |
58 | #define I_FS i_fs |
59 | #define READ ffs_read |
60 | #define READ_S "ffs_read" |
61 | #define WRITE ffs_write |
62 | #define WRITE_S "ffs_write" |
63 | #define BUFRD ffs_bufrd |
64 | #define BUFWR ffs_bufwr |
65 | #define ufs_blkoff ffs_blkoff |
66 | #define ufs_blksize ffs_blksize |
67 | #define ufs_lblkno ffs_lblkno |
68 | #define ufs_lblktosize ffs_lblktosize |
69 | #define ufs_blkroundup ffs_blkroundup |
70 | #endif |
71 | |
72 | static int ufs_post_read_update(struct vnode *, int, int); |
73 | static int ufs_post_write_update(struct vnode *, struct uio *, int, |
74 | kauth_cred_t, off_t, int, int, int); |
75 | |
76 | /* |
77 | * Vnode op for reading. |
78 | */ |
79 | /* ARGSUSED */ |
80 | int |
81 | READ(void *v) |
82 | { |
83 | struct vop_read_args /* { |
84 | struct vnode *a_vp; |
85 | struct uio *a_uio; |
86 | int a_ioflag; |
87 | kauth_cred_t a_cred; |
88 | } */ *ap = v; |
89 | struct vnode *vp; |
90 | struct inode *ip; |
91 | struct uio *uio; |
92 | struct ufsmount *ump; |
93 | vsize_t bytelen; |
94 | int error, ioflag, advice; |
95 | |
96 | vp = ap->a_vp; |
97 | ip = VTOI(vp); |
98 | ump = ip->i_ump; |
99 | uio = ap->a_uio; |
100 | ioflag = ap->a_ioflag; |
101 | error = 0; |
102 | |
103 | KASSERT(uio->uio_rw == UIO_READ); |
104 | KASSERT(vp->v_type == VREG || vp->v_type == VDIR); |
105 | |
106 | /* XXX Eliminate me by refusing directory reads from userland. */ |
107 | if (vp->v_type == VDIR) |
108 | return BUFRD(vp, uio, ioflag, ap->a_cred); |
109 | #ifdef LFS_READWRITE |
110 | /* XXX Eliminate me by using ufs_bufio in lfs. */ |
111 | if (vp->v_type == VREG && ip->i_number == LFS_IFILE_INUM) |
112 | return BUFRD(vp, uio, ioflag, ap->a_cred); |
113 | #endif |
114 | if ((u_int64_t)uio->uio_offset > ump->um_maxfilesize) |
115 | return (EFBIG); |
116 | if (uio->uio_resid == 0) |
117 | return (0); |
118 | |
119 | #ifndef LFS_READWRITE |
120 | if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT) |
121 | return ffs_snapshot_read(vp, uio, ioflag); |
122 | #endif /* !LFS_READWRITE */ |
123 | |
124 | fstrans_start(vp->v_mount, FSTRANS_SHARED); |
125 | |
126 | if (uio->uio_offset >= ip->i_size) |
127 | goto out; |
128 | |
129 | KASSERT(vp->v_type == VREG); |
130 | advice = IO_ADV_DECODE(ap->a_ioflag); |
131 | while (uio->uio_resid > 0) { |
132 | if (ioflag & IO_DIRECT) { |
133 | genfs_directio(vp, uio, ioflag); |
134 | } |
135 | bytelen = MIN(ip->i_size - uio->uio_offset, uio->uio_resid); |
136 | if (bytelen == 0) |
137 | break; |
138 | error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice, |
139 | UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp)); |
140 | if (error) |
141 | break; |
142 | } |
143 | |
144 | out: |
145 | error = ufs_post_read_update(vp, ap->a_ioflag, error); |
146 | fstrans_done(vp->v_mount); |
147 | return (error); |
148 | } |
149 | |
150 | /* |
151 | * UFS op for reading via the buffer cache |
152 | */ |
153 | int |
154 | BUFRD(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred) |
155 | { |
156 | struct inode *ip; |
157 | struct ufsmount *ump; |
158 | FS *fs; |
159 | struct buf *bp; |
160 | daddr_t lbn, nextlbn; |
161 | off_t bytesinfile; |
162 | long size, xfersize, blkoffset; |
163 | int error; |
164 | |
165 | KASSERT(VOP_ISLOCKED(vp)); |
166 | KASSERT(vp->v_type == VDIR || vp->v_type == VLNK); |
167 | KASSERT(uio->uio_rw == UIO_READ); |
168 | |
169 | ip = VTOI(vp); |
170 | ump = ip->i_ump; |
171 | fs = ip->I_FS; |
172 | error = 0; |
173 | |
174 | KASSERT(vp->v_type != VLNK || ip->i_size >= ump->um_maxsymlinklen); |
175 | KASSERT(vp->v_type != VLNK || ump->um_maxsymlinklen != 0 || |
176 | DIP(ip, blocks) == 0); |
177 | |
178 | if (uio->uio_offset > ump->um_maxfilesize) |
179 | return EFBIG; |
180 | if (uio->uio_resid == 0) |
181 | return 0; |
182 | |
183 | #ifndef LFS_READWRITE |
184 | KASSERT(!ISSET(ip->i_flags, (SF_SNAPSHOT | SF_SNAPINVAL))); |
185 | #endif |
186 | |
187 | fstrans_start(vp->v_mount, FSTRANS_SHARED); |
188 | |
189 | if (uio->uio_offset >= ip->i_size) |
190 | goto out; |
191 | |
192 | for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { |
193 | bytesinfile = ip->i_size - uio->uio_offset; |
194 | if (bytesinfile <= 0) |
195 | break; |
196 | lbn = ufs_lblkno(fs, uio->uio_offset); |
197 | nextlbn = lbn + 1; |
198 | size = ufs_blksize(fs, ip, lbn); |
199 | blkoffset = ufs_blkoff(fs, uio->uio_offset); |
200 | xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid), |
201 | bytesinfile); |
202 | |
203 | if (ufs_lblktosize(fs, nextlbn) >= ip->i_size) |
204 | error = bread(vp, lbn, size, 0, &bp); |
205 | else { |
206 | int nextsize = ufs_blksize(fs, ip, nextlbn); |
207 | error = breadn(vp, lbn, |
208 | size, &nextlbn, &nextsize, 1, 0, &bp); |
209 | } |
210 | if (error) |
211 | break; |
212 | |
213 | /* |
214 | * We should only get non-zero b_resid when an I/O error |
215 | * has occurred, which should cause us to break above. |
216 | * However, if the short read did not cause an error, |
217 | * then we want to ensure that we do not uiomove bad |
218 | * or uninitialized data. |
219 | */ |
220 | size -= bp->b_resid; |
221 | if (size < xfersize) { |
222 | if (size == 0) |
223 | break; |
224 | xfersize = size; |
225 | } |
226 | error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); |
227 | if (error) |
228 | break; |
229 | brelse(bp, 0); |
230 | } |
231 | if (bp != NULL) |
232 | brelse(bp, 0); |
233 | |
234 | out: |
235 | error = ufs_post_read_update(vp, ioflag, error); |
236 | fstrans_done(vp->v_mount); |
237 | return (error); |
238 | } |
239 | |
240 | static int |
241 | ufs_post_read_update(struct vnode *vp, int ioflag, int oerror) |
242 | { |
243 | struct inode *ip = VTOI(vp); |
244 | int error = oerror; |
245 | |
246 | if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { |
247 | ip->i_flag |= IN_ACCESS; |
248 | if ((ioflag & IO_SYNC) == IO_SYNC) { |
249 | error = UFS_WAPBL_BEGIN(vp->v_mount); |
250 | if (error) |
251 | goto out; |
252 | error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT); |
253 | UFS_WAPBL_END(vp->v_mount); |
254 | } |
255 | } |
256 | |
257 | out: |
258 | /* Read error overrides any inode update error. */ |
259 | if (oerror) |
260 | error = oerror; |
261 | return error; |
262 | } |
263 | |
264 | /* |
265 | * Vnode op for writing. |
266 | */ |
267 | int |
268 | WRITE(void *v) |
269 | { |
270 | struct vop_write_args /* { |
271 | struct vnode *a_vp; |
272 | struct uio *a_uio; |
273 | int a_ioflag; |
274 | kauth_cred_t a_cred; |
275 | } */ *ap = v; |
276 | struct vnode *vp; |
277 | struct uio *uio; |
278 | struct inode *ip; |
279 | FS *fs; |
280 | kauth_cred_t cred; |
281 | off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize; |
282 | int blkoffset, error, flags, ioflag, resid; |
283 | int aflag; |
284 | int extended=0; |
285 | vsize_t bytelen; |
286 | bool async; |
287 | struct ufsmount *ump; |
288 | |
289 | cred = ap->a_cred; |
290 | ioflag = ap->a_ioflag; |
291 | uio = ap->a_uio; |
292 | vp = ap->a_vp; |
293 | ip = VTOI(vp); |
294 | ump = ip->i_ump; |
295 | |
296 | KASSERT(vp->v_size == ip->i_size); |
297 | KASSERT(uio->uio_rw == UIO_WRITE); |
298 | KASSERT(vp->v_type == VREG); |
299 | KASSERT(!ISSET(ioflag, IO_JOURNALLOCKED)); |
300 | UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount); |
301 | |
302 | if (ioflag & IO_APPEND) |
303 | uio->uio_offset = ip->i_size; |
304 | if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) |
305 | return (EPERM); |
306 | |
307 | fs = ip->I_FS; |
308 | if (uio->uio_offset < 0 || |
309 | (u_int64_t)uio->uio_offset + uio->uio_resid > ump->um_maxfilesize) |
310 | return (EFBIG); |
311 | #ifdef LFS_READWRITE |
312 | /* Disallow writes to the Ifile, even if noschg flag is removed */ |
313 | /* XXX can this go away when the Ifile is no longer in the namespace? */ |
314 | if (vp == fs->lfs_ivnode) |
315 | return (EPERM); |
316 | #endif |
317 | if (uio->uio_resid == 0) |
318 | return (0); |
319 | |
320 | fstrans_start(vp->v_mount, FSTRANS_SHARED); |
321 | |
322 | flags = ioflag & IO_SYNC ? B_SYNC : 0; |
323 | async = vp->v_mount->mnt_flag & MNT_ASYNC; |
324 | origoff = uio->uio_offset; |
325 | resid = uio->uio_resid; |
326 | osize = ip->i_size; |
327 | error = 0; |
328 | |
329 | KASSERT(vp->v_type == VREG); |
330 | |
331 | /* |
332 | * XXX The entire write operation must occur in a single WAPBL |
333 | * transaction because it may allocate disk blocks, if |
334 | * appending or filling holes, which is allowed to happen only |
335 | * if the write fully succeeds. |
336 | * |
337 | * If ubc_uiomove fails in the middle with EFAULT, we can clean |
338 | * up at the end with UFS_TRUNCATE. But if the power fails in |
339 | * the middle, there would be nobody to deallocate the blocks, |
340 | * without an fsck to globally analyze the file system. |
341 | * |
342 | * If the increasingly inaccurately named WAPBL were augmented |
343 | * with rollback records for block allocations, then we could |
344 | * split this into multiple transactions and commit the |
345 | * allocations in the last one. |
346 | * |
347 | * But WAPBL doesn't have that notion now, so we'll have to |
348 | * live with gigantic transactions and WAPBL tentacles in |
349 | * genfs_getpages/putpages to cope with the possibility that |
350 | * the transaction may or may not be locked on entry to the |
351 | * page cache. |
352 | * |
353 | * And even if we added that notion to WAPBL, it wouldn't help |
354 | * us get rid of the tentacles in genfs_getpages/putpages |
355 | * because we'd have to interoperate with old implementations |
356 | * that assume they can replay the log without fsck. |
357 | */ |
358 | error = UFS_WAPBL_BEGIN(vp->v_mount); |
359 | if (error) { |
360 | fstrans_done(vp->v_mount); |
361 | return error; |
362 | } |
363 | |
364 | #ifdef LFS_READWRITE |
365 | async = true; |
366 | lfs_availwait(fs, btofsb(fs, uio->uio_resid)); |
367 | lfs_check(vp, LFS_UNUSED_LBN, 0); |
368 | #endif /* !LFS_READWRITE */ |
369 | |
370 | preallocoff = round_page(ufs_blkroundup(fs, MAX(osize, uio->uio_offset))); |
371 | aflag = ioflag & IO_SYNC ? B_SYNC : 0; |
372 | nsize = MAX(osize, uio->uio_offset + uio->uio_resid); |
373 | endallocoff = nsize - ufs_blkoff(fs, nsize); |
374 | |
375 | /* |
376 | * if we're increasing the file size, deal with expanding |
377 | * the fragment if there is one. |
378 | */ |
379 | |
380 | if (nsize > osize && ufs_lblkno(fs, osize) < UFS_NDADDR && |
381 | ufs_lblkno(fs, osize) != ufs_lblkno(fs, nsize) && |
382 | ufs_blkroundup(fs, osize) != osize) { |
383 | off_t eob; |
384 | |
385 | eob = ufs_blkroundup(fs, osize); |
386 | uvm_vnp_setwritesize(vp, eob); |
387 | error = ufs_balloc_range(vp, osize, eob - osize, cred, aflag); |
388 | if (error) |
389 | goto out; |
390 | if (flags & B_SYNC) { |
391 | mutex_enter(vp->v_interlock); |
392 | VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask), |
393 | round_page(eob), |
394 | PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); |
395 | } |
396 | } |
397 | |
398 | while (uio->uio_resid > 0) { |
399 | int ubc_flags = UBC_WRITE; |
400 | bool overwrite; /* if we're overwrite a whole block */ |
401 | off_t newoff; |
402 | |
403 | if (ioflag & IO_DIRECT) { |
404 | genfs_directio(vp, uio, ioflag | IO_JOURNALLOCKED); |
405 | } |
406 | |
407 | oldoff = uio->uio_offset; |
408 | blkoffset = ufs_blkoff(fs, uio->uio_offset); |
409 | bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); |
410 | if (bytelen == 0) { |
411 | break; |
412 | } |
413 | |
414 | /* |
415 | * if we're filling in a hole, allocate the blocks now and |
416 | * initialize the pages first. if we're extending the file, |
417 | * we can safely allocate blocks without initializing pages |
418 | * since the new blocks will be inaccessible until the write |
419 | * is complete. |
420 | */ |
421 | overwrite = uio->uio_offset >= preallocoff && |
422 | uio->uio_offset < endallocoff; |
423 | if (!overwrite && (vp->v_vflag & VV_MAPPED) == 0 && |
424 | ufs_blkoff(fs, uio->uio_offset) == 0 && |
425 | (uio->uio_offset & PAGE_MASK) == 0) { |
426 | vsize_t len; |
427 | |
428 | len = trunc_page(bytelen); |
429 | len -= ufs_blkoff(fs, len); |
430 | if (len > 0) { |
431 | overwrite = true; |
432 | bytelen = len; |
433 | } |
434 | } |
435 | |
436 | newoff = oldoff + bytelen; |
437 | if (vp->v_size < newoff) { |
438 | uvm_vnp_setwritesize(vp, newoff); |
439 | } |
440 | |
441 | if (!overwrite) { |
442 | error = ufs_balloc_range(vp, uio->uio_offset, bytelen, |
443 | cred, aflag); |
444 | if (error) |
445 | break; |
446 | } else { |
447 | genfs_node_wrlock(vp); |
448 | error = GOP_ALLOC(vp, uio->uio_offset, bytelen, |
449 | aflag, cred); |
450 | genfs_node_unlock(vp); |
451 | if (error) |
452 | break; |
453 | ubc_flags |= UBC_FAULTBUSY; |
454 | } |
455 | |
456 | /* |
457 | * copy the data. |
458 | */ |
459 | |
460 | error = ubc_uiomove(&vp->v_uobj, uio, bytelen, |
461 | IO_ADV_DECODE(ioflag), ubc_flags | UBC_UNMAP_FLAG(vp)); |
462 | |
463 | /* |
464 | * update UVM's notion of the size now that we've |
465 | * copied the data into the vnode's pages. |
466 | * |
467 | * we should update the size even when uiomove failed. |
468 | */ |
469 | |
470 | if (vp->v_size < newoff) { |
471 | uvm_vnp_setsize(vp, newoff); |
472 | extended = 1; |
473 | } |
474 | |
475 | if (error) |
476 | break; |
477 | |
478 | /* |
479 | * flush what we just wrote if necessary. |
480 | * XXXUBC simplistic async flushing. |
481 | */ |
482 | |
483 | #ifndef LFS_READWRITE |
484 | if (!async && oldoff >> 16 != uio->uio_offset >> 16) { |
485 | mutex_enter(vp->v_interlock); |
486 | error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16, |
487 | (uio->uio_offset >> 16) << 16, |
488 | PGO_CLEANIT | PGO_JOURNALLOCKED | PGO_LAZY); |
489 | if (error) |
490 | break; |
491 | } |
492 | #endif |
493 | } |
494 | if (error == 0 && ioflag & IO_SYNC) { |
495 | mutex_enter(vp->v_interlock); |
496 | error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask), |
497 | round_page(ufs_blkroundup(fs, uio->uio_offset)), |
498 | PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); |
499 | } |
500 | |
501 | out: |
502 | error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid, |
503 | extended, error); |
504 | UFS_WAPBL_END(vp->v_mount); |
505 | fstrans_done(vp->v_mount); |
506 | |
507 | return (error); |
508 | } |
509 | |
510 | /* |
511 | * UFS op for writing via the buffer cache |
512 | */ |
513 | int |
514 | BUFWR(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred) |
515 | { |
516 | struct inode *ip; |
517 | struct ufsmount *ump; |
518 | FS *fs; |
519 | int flags; |
520 | struct buf *bp; |
521 | off_t osize; |
522 | int resid, xfersize, size, blkoffset; |
523 | daddr_t lbn; |
524 | int extended=0; |
525 | int error; |
526 | #ifdef LFS_READWRITE |
527 | bool need_unreserve = false; |
528 | #endif |
529 | |
530 | KASSERT(ISSET(ioflag, IO_NODELOCKED)); |
531 | KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); |
532 | KASSERT(vp->v_type == VDIR || vp->v_type == VLNK); |
533 | KASSERT(vp->v_type != VDIR || ISSET(ioflag, IO_SYNC)); |
534 | KASSERT(uio->uio_rw == UIO_WRITE); |
535 | KASSERT(ISSET(ioflag, IO_JOURNALLOCKED)); |
536 | UFS_WAPBL_JLOCK_ASSERT(vp->v_mount); |
537 | |
538 | ip = VTOI(vp); |
539 | ump = ip->i_ump; |
540 | fs = ip->I_FS; |
541 | |
542 | KASSERT(vp->v_size == ip->i_size); |
543 | |
544 | if (uio->uio_offset < 0 || |
545 | uio->uio_resid > ump->um_maxfilesize || |
546 | uio->uio_offset > (ump->um_maxfilesize - uio->uio_resid)) |
547 | return EFBIG; |
548 | #ifdef LFS_READWRITE |
549 | KASSERT(vp != fs->lfs_ivnode); |
550 | #endif |
551 | if (uio->uio_resid == 0) |
552 | return 0; |
553 | |
554 | fstrans_start(vp->v_mount, FSTRANS_SHARED); |
555 | |
556 | flags = ioflag & IO_SYNC ? B_SYNC : 0; |
557 | resid = uio->uio_resid; |
558 | osize = ip->i_size; |
559 | error = 0; |
560 | |
561 | KASSERT(vp->v_type != VREG); |
562 | |
563 | #ifdef LFS_READWRITE |
564 | lfs_availwait(fs, btofsb(fs, uio->uio_resid)); |
565 | lfs_check(vp, LFS_UNUSED_LBN, 0); |
566 | #endif /* !LFS_READWRITE */ |
567 | |
568 | /* XXX Should never have pages cached here. */ |
569 | KASSERT(vp->v_uobj.uo_npages == 0); |
570 | while (uio->uio_resid > 0) { |
571 | lbn = ufs_lblkno(fs, uio->uio_offset); |
572 | blkoffset = ufs_blkoff(fs, uio->uio_offset); |
573 | xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); |
574 | if (fs->fs_bsize > xfersize) |
575 | flags |= B_CLRBUF; |
576 | else |
577 | flags &= ~B_CLRBUF; |
578 | |
579 | #ifdef LFS_READWRITE |
580 | error = lfs_reserve(fs, vp, NULL, |
581 | btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift)); |
582 | if (error) |
583 | break; |
584 | need_unreserve = true; |
585 | #endif |
586 | error = UFS_BALLOC(vp, uio->uio_offset, xfersize, cred, flags, |
587 | &bp); |
588 | |
589 | if (error) |
590 | break; |
591 | if (uio->uio_offset + xfersize > ip->i_size) { |
592 | ip->i_size = uio->uio_offset + xfersize; |
593 | DIP_ASSIGN(ip, size, ip->i_size); |
594 | uvm_vnp_setsize(vp, ip->i_size); |
595 | extended = 1; |
596 | } |
597 | size = ufs_blksize(fs, ip, lbn) - bp->b_resid; |
598 | if (xfersize > size) |
599 | xfersize = size; |
600 | |
601 | error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); |
602 | |
603 | /* |
604 | * if we didn't clear the block and the uiomove failed, |
605 | * the buf will now contain part of some other file, |
606 | * so we need to invalidate it. |
607 | */ |
608 | if (error && (flags & B_CLRBUF) == 0) { |
609 | brelse(bp, BC_INVAL); |
610 | break; |
611 | } |
612 | #ifdef LFS_READWRITE |
613 | (void)VOP_BWRITE(bp->b_vp, bp); |
614 | lfs_reserve(fs, vp, NULL, |
615 | -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift)); |
616 | need_unreserve = false; |
617 | #else |
618 | if (ioflag & IO_SYNC) |
619 | (void)bwrite(bp); |
620 | else if (xfersize + blkoffset == fs->fs_bsize) |
621 | bawrite(bp); |
622 | else |
623 | bdwrite(bp); |
624 | #endif |
625 | if (error || xfersize == 0) |
626 | break; |
627 | } |
628 | #ifdef LFS_READWRITE |
629 | if (need_unreserve) { |
630 | lfs_reserve(fs, vp, NULL, |
631 | -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift)); |
632 | } |
633 | #endif |
634 | |
635 | error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid, |
636 | extended, error); |
637 | fstrans_done(vp->v_mount); |
638 | |
639 | return (error); |
640 | } |
641 | |
642 | static int |
643 | ufs_post_write_update(struct vnode *vp, struct uio *uio, int ioflag, |
644 | kauth_cred_t cred, off_t osize, int resid, int extended, int oerror) |
645 | { |
646 | struct inode *ip = VTOI(vp); |
647 | int error = oerror; |
648 | |
649 | /* Trigger ctime and mtime updates, and atime if MNT_RELATIME. */ |
650 | ip->i_flag |= IN_CHANGE | IN_UPDATE; |
651 | if (vp->v_mount->mnt_flag & MNT_RELATIME) |
652 | ip->i_flag |= IN_ACCESS; |
653 | |
654 | /* |
655 | * If we successfully wrote any data and we are not the superuser, |
656 | * we clear the setuid and setgid bits as a precaution against |
657 | * tampering. |
658 | */ |
659 | if (resid > uio->uio_resid && cred) { |
660 | if (ip->i_mode & ISUID) { |
661 | if (kauth_authorize_vnode(cred, |
662 | KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) { |
663 | ip->i_mode &= ~ISUID; |
664 | DIP_ASSIGN(ip, mode, ip->i_mode); |
665 | } |
666 | } |
667 | |
668 | if (ip->i_mode & ISGID) { |
669 | if (kauth_authorize_vnode(cred, |
670 | KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) { |
671 | ip->i_mode &= ~ISGID; |
672 | DIP_ASSIGN(ip, mode, ip->i_mode); |
673 | } |
674 | } |
675 | } |
676 | |
677 | /* If we successfully wrote anything, notify kevent listeners. */ |
678 | if (resid > uio->uio_resid) |
679 | VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); |
680 | |
681 | /* |
682 | * Update the size on disk: truncate back to original size on |
683 | * error, or reflect the new size on success. |
684 | */ |
685 | if (error) { |
686 | (void) UFS_TRUNCATE(vp, osize, ioflag & IO_SYNC, cred); |
687 | uio->uio_offset -= resid - uio->uio_resid; |
688 | uio->uio_resid = resid; |
689 | } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) |
690 | error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT); |
691 | else |
692 | UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); |
693 | |
694 | /* Make sure the vnode uvm size matches the inode file size. */ |
695 | KASSERT(vp->v_size == ip->i_size); |
696 | |
697 | /* Write error overrides any inode update error. */ |
698 | if (oerror) |
699 | error = oerror; |
700 | return error; |
701 | } |
702 | |