1/* $NetBSD: ffs_balloc.c,v 1.62 2016/09/25 11:45:39 jdolecek Exp $ */
2
3/*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43#include <sys/cdefs.h>
44__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.62 2016/09/25 11:45:39 jdolecek Exp $");
45
46#if defined(_KERNEL_OPT)
47#include "opt_quota.h"
48#endif
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/buf.h>
53#include <sys/file.h>
54#include <sys/mount.h>
55#include <sys/vnode.h>
56#include <sys/kauth.h>
57#include <sys/fstrans.h>
58
59#include <ufs/ufs/quota.h>
60#include <ufs/ufs/ufsmount.h>
61#include <ufs/ufs/inode.h>
62#include <ufs/ufs/ufs_extern.h>
63#include <ufs/ufs/ufs_bswap.h>
64
65#include <ufs/ffs/fs.h>
66#include <ufs/ffs/ffs_extern.h>
67
68#include <uvm/uvm.h>
69
70static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
74
75/*
76 * Balloc defines the structure of file system storage
77 * by allocating the physical blocks on a device given
78 * the inode and the logical block number in a file.
79 */
80
81int
82ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83 struct buf **bpp)
84{
85 int error;
86
87 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 else
90 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
91
92 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 brelse(*bpp, 0);
94
95 return error;
96}
97
98static int
99ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100 int flags, struct buf **bpp)
101{
102 daddr_t lbn, lastlbn;
103 struct buf *bp, *nbp;
104 struct inode *ip = VTOI(vp);
105 struct fs *fs = ip->i_fs;
106 struct ufsmount *ump = ip->i_ump;
107 struct indir indirs[UFS_NIADDR + 2];
108 daddr_t newb, pref, nb;
109 int32_t *bap; /* XXX ondisk32 */
110 int deallocated, osize, nsize, num, i, error;
111 int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
112 int32_t *allocib;
113 int unwindidx = -1;
114 const int needswap = UFS_FSNEEDSWAP(fs);
115 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
116
117 lbn = ffs_lblkno(fs, off);
118 size = ffs_blkoff(fs, off) + size;
119 if (size > fs->fs_bsize)
120 panic("ffs_balloc: blk too big");
121 if (bpp != NULL) {
122 *bpp = NULL;
123 }
124 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
125
126 if (lbn < 0)
127 return (EFBIG);
128
129 /*
130 * If the next write will extend the file into a new block,
131 * and the file is currently composed of a fragment
132 * this fragment has to be extended to be a full block.
133 */
134
135 lastlbn = ffs_lblkno(fs, ip->i_size);
136 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
137 nb = lastlbn;
138 osize = ffs_blksize(fs, ip, nb);
139 if (osize < fs->fs_bsize && osize > 0) {
140 mutex_enter(&ump->um_lock);
141 error = ffs_realloccg(ip, nb,
142 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
143 &ip->i_ffs1_db[0]),
144 osize, (int)fs->fs_bsize, cred, bpp, &newb);
145 if (error)
146 return (error);
147 ip->i_size = ffs_lblktosize(fs, nb + 1);
148 ip->i_ffs1_size = ip->i_size;
149 uvm_vnp_setsize(vp, ip->i_ffs1_size);
150 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
151 ip->i_flag |= IN_CHANGE | IN_UPDATE;
152 if (bpp && *bpp) {
153 if (flags & B_SYNC)
154 bwrite(*bpp);
155 else
156 bawrite(*bpp);
157 }
158 }
159 }
160
161 /*
162 * The first UFS_NDADDR blocks are direct blocks
163 */
164
165 if (lbn < UFS_NDADDR) {
166 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
167 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
168
169 /*
170 * The block is an already-allocated direct block
171 * and the file already extends past this block,
172 * thus this must be a whole block.
173 * Just read the block (if requested).
174 */
175
176 if (bpp != NULL) {
177 error = bread(vp, lbn, fs->fs_bsize,
178 B_MODIFY, bpp);
179 if (error) {
180 return (error);
181 }
182 }
183 return (0);
184 }
185 if (nb != 0) {
186
187 /*
188 * Consider need to reallocate a fragment.
189 */
190
191 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
192 nsize = ffs_fragroundup(fs, size);
193 if (nsize <= osize) {
194
195 /*
196 * The existing block is already
197 * at least as big as we want.
198 * Just read the block (if requested).
199 */
200
201 if (bpp != NULL) {
202 error = bread(vp, lbn, osize,
203 B_MODIFY, bpp);
204 if (error) {
205 return (error);
206 }
207 }
208 return 0;
209 } else {
210
211 /*
212 * The existing block is smaller than we want,
213 * grow it.
214 */
215 mutex_enter(&ump->um_lock);
216 error = ffs_realloccg(ip, lbn,
217 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
218 &ip->i_ffs1_db[0]),
219 osize, nsize, cred, bpp, &newb);
220 if (error)
221 return (error);
222 }
223 } else {
224
225 /*
226 * the block was not previously allocated,
227 * allocate a new block or fragment.
228 */
229
230 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
231 nsize = ffs_fragroundup(fs, size);
232 else
233 nsize = fs->fs_bsize;
234 mutex_enter(&ump->um_lock);
235 error = ffs_alloc(ip, lbn,
236 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
237 &ip->i_ffs1_db[0]),
238 nsize, flags, cred, &newb);
239 if (error)
240 return (error);
241 if (bpp != NULL) {
242 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
243 nsize, (flags & B_CLRBUF) != 0, bpp);
244 if (error)
245 return error;
246 }
247 }
248 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
249 ip->i_flag |= IN_CHANGE | IN_UPDATE;
250 return (0);
251 }
252
253 /*
254 * Determine the number of levels of indirection.
255 */
256
257 pref = 0;
258 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
259 return (error);
260
261 /*
262 * Fetch the first indirect block allocating if necessary.
263 */
264
265 --num;
266 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
267 allocib = NULL;
268 allocblk = allociblk;
269 if (nb == 0) {
270 mutex_enter(&ump->um_lock);
271 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
272 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
273 flags | B_METAONLY, cred, &newb);
274 if (error)
275 goto fail;
276 nb = newb;
277 *allocblk++ = nb;
278 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
279 fs->fs_bsize, true, &bp);
280 if (error)
281 goto fail;
282 /*
283 * Write synchronously so that indirect blocks
284 * never point at garbage.
285 */
286 if ((error = bwrite(bp)) != 0)
287 goto fail;
288 unwindidx = 0;
289 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
290 *allocib = ufs_rw32(nb, needswap);
291 ip->i_flag |= IN_CHANGE | IN_UPDATE;
292 }
293
294 /*
295 * Fetch through the indirect blocks, allocating as necessary.
296 */
297
298 for (i = 1;;) {
299 error = bread(vp,
300 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
301 if (error) {
302 goto fail;
303 }
304 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
305 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
306 if (i == num)
307 break;
308 i++;
309 if (nb != 0) {
310 brelse(bp, 0);
311 continue;
312 }
313 if (fscow_run(bp, true) != 0) {
314 brelse(bp, 0);
315 goto fail;
316 }
317 mutex_enter(&ump->um_lock);
318 /* Try to keep snapshot indirect blocks contiguous. */
319 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
320 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
321 flags | B_METAONLY, &bap[0]);
322 if (pref == 0)
323 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
324 NULL);
325 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
326 flags | B_METAONLY, cred, &newb);
327 if (error) {
328 brelse(bp, 0);
329 goto fail;
330 }
331 nb = newb;
332 *allocblk++ = nb;
333 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
334 fs->fs_bsize, true, &nbp);
335 if (error) {
336 brelse(bp, 0);
337 goto fail;
338 }
339 /*
340 * Write synchronously so that indirect blocks
341 * never point at garbage.
342 */
343 if ((error = bwrite(nbp)) != 0) {
344 brelse(bp, 0);
345 goto fail;
346 }
347 if (unwindidx < 0)
348 unwindidx = i - 1;
349 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
350
351 /*
352 * If required, write synchronously, otherwise use
353 * delayed write.
354 */
355
356 if (flags & B_SYNC) {
357 bwrite(bp);
358 } else {
359 bdwrite(bp);
360 }
361 }
362
363 if (flags & B_METAONLY) {
364 KASSERT(bpp != NULL);
365 *bpp = bp;
366 return (0);
367 }
368
369 /*
370 * Get the data block, allocating if necessary.
371 */
372
373 if (nb == 0) {
374 if (fscow_run(bp, true) != 0) {
375 brelse(bp, 0);
376 goto fail;
377 }
378 mutex_enter(&ump->um_lock);
379 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
380 &bap[0]);
381 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
382 &newb);
383 if (error) {
384 brelse(bp, 0);
385 goto fail;
386 }
387 nb = newb;
388 *allocblk++ = nb;
389 if (bpp != NULL) {
390 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
391 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
392 if (error) {
393 brelse(bp, 0);
394 goto fail;
395 }
396 }
397 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
398 if (allocib == NULL && unwindidx < 0) {
399 unwindidx = i - 1;
400 }
401
402 /*
403 * If required, write synchronously, otherwise use
404 * delayed write.
405 */
406
407 if (flags & B_SYNC) {
408 bwrite(bp);
409 } else {
410 bdwrite(bp);
411 }
412 return (0);
413 }
414 brelse(bp, 0);
415 if (bpp != NULL) {
416 if (flags & B_CLRBUF) {
417 error = bread(vp, lbn, (int)fs->fs_bsize,
418 B_MODIFY, &nbp);
419 if (error) {
420 goto fail;
421 }
422 } else {
423 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
424 fs->fs_bsize, true, &nbp);
425 if (error)
426 goto fail;
427 }
428 *bpp = nbp;
429 }
430 return (0);
431
432fail:
433 /*
434 * If we have failed part way through block allocation, we
435 * have to deallocate any indirect blocks that we have allocated.
436 */
437
438 if (unwindidx >= 0) {
439
440 /*
441 * First write out any buffers we've created to resolve their
442 * softdeps. This must be done in reverse order of creation
443 * so that we resolve the dependencies in one pass.
444 * Write the cylinder group buffers for these buffers too.
445 */
446
447 for (i = num; i >= unwindidx; i--) {
448 if (i == 0) {
449 break;
450 }
451 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
452 fs->fs_bsize, false, &bp) != 0)
453 continue;
454 if (bp->b_oflags & BO_DELWRI) {
455 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
456 FFS_DBTOFSB(fs, bp->b_blkno))));
457 bwrite(bp);
458 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
459 fs->fs_cgsize, false, &bp) != 0)
460 continue;
461 if (bp->b_oflags & BO_DELWRI) {
462 bwrite(bp);
463 } else {
464 brelse(bp, BC_INVAL);
465 }
466 } else {
467 brelse(bp, BC_INVAL);
468 }
469 }
470
471 /*
472 * Undo the partial allocation.
473 */
474 if (unwindidx == 0) {
475 *allocib = 0;
476 ip->i_flag |= IN_CHANGE | IN_UPDATE;
477 } else {
478 int r;
479
480 r = bread(vp, indirs[unwindidx].in_lbn,
481 (int)fs->fs_bsize, 0, &bp);
482 if (r) {
483 panic("Could not unwind indirect block, error %d", r);
484 } else {
485 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
486 bap[indirs[unwindidx].in_off] = 0;
487 bwrite(bp);
488 }
489 }
490 for (i = unwindidx + 1; i <= num; i++) {
491 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
492 fs->fs_bsize, false, &bp) == 0)
493 brelse(bp, BC_INVAL);
494 }
495 }
496 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
497 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
498 deallocated += fs->fs_bsize;
499 }
500 if (deallocated) {
501#if defined(QUOTA) || defined(QUOTA2)
502 /*
503 * Restore user's disk quota because allocation failed.
504 */
505 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
506#endif
507 ip->i_ffs1_blocks -= btodb(deallocated);
508 ip->i_flag |= IN_CHANGE | IN_UPDATE;
509 }
510 return (error);
511}
512
513static int
514ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
515 int flags, struct buf **bpp)
516{
517 daddr_t lbn, lastlbn;
518 struct buf *bp, *nbp;
519 struct inode *ip = VTOI(vp);
520 struct fs *fs = ip->i_fs;
521 struct ufsmount *ump = ip->i_ump;
522 struct indir indirs[UFS_NIADDR + 2];
523 daddr_t newb, pref, nb;
524 int64_t *bap;
525 int deallocated, osize, nsize, num, i, error;
526 daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
527 int64_t *allocib;
528 int unwindidx = -1;
529 const int needswap = UFS_FSNEEDSWAP(fs);
530 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
531
532 lbn = ffs_lblkno(fs, off);
533 size = ffs_blkoff(fs, off) + size;
534 if (size > fs->fs_bsize)
535 panic("ffs_balloc: blk too big");
536 if (bpp != NULL) {
537 *bpp = NULL;
538 }
539 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
540
541 if (lbn < 0)
542 return (EFBIG);
543
544#ifdef notyet
545 /*
546 * Check for allocating external data.
547 */
548 if (flags & IO_EXT) {
549 if (lbn >= UFS_NXADDR)
550 return (EFBIG);
551 /*
552 * If the next write will extend the data into a new block,
553 * and the data is currently composed of a fragment
554 * this fragment has to be extended to be a full block.
555 */
556 lastlbn = ffs_lblkno(fs, dp->di_extsize);
557 if (lastlbn < lbn) {
558 nb = lastlbn;
559 osize = ffs_sblksize(fs, dp->di_extsize, nb);
560 if (osize < fs->fs_bsize && osize > 0) {
561 mutex_enter(&ump->um_lock);
562 error = ffs_realloccg(ip, -1 - nb,
563 dp->di_extb[nb],
564 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
565 flags, &dp->di_extb[0]),
566 osize,
567 (int)fs->fs_bsize, cred, &bp);
568 if (error)
569 return (error);
570 dp->di_extsize = smalllblktosize(fs, nb + 1);
571 dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
572 bp->b_xflags |= BX_ALTDATA;
573 ip->i_flag |= IN_CHANGE | IN_UPDATE;
574 if (flags & IO_SYNC)
575 bwrite(bp);
576 else
577 bawrite(bp);
578 }
579 }
580 /*
581 * All blocks are direct blocks
582 */
583 if (flags & BA_METAONLY)
584 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
585 nb = dp->di_extb[lbn];
586 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
587 error = bread(vp, -1 - lbn, fs->fs_bsize,
588 0, &bp);
589 if (error) {
590 return (error);
591 }
592 mutex_enter(&bp->b_interlock);
593 bp->b_blkno = FFS_FSBTODB(fs, nb);
594 bp->b_xflags |= BX_ALTDATA;
595 mutex_exit(&bp->b_interlock);
596 *bpp = bp;
597 return (0);
598 }
599 if (nb != 0) {
600 /*
601 * Consider need to reallocate a fragment.
602 */
603 osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
604 nsize = ffs_fragroundup(fs, size);
605 if (nsize <= osize) {
606 error = bread(vp, -1 - lbn, osize,
607 0, &bp);
608 if (error) {
609 return (error);
610 }
611 mutex_enter(&bp->b_interlock);
612 bp->b_blkno = FFS_FSBTODB(fs, nb);
613 bp->b_xflags |= BX_ALTDATA;
614 mutex_exit(&bp->b_interlock);
615 } else {
616 mutex_enter(&ump->um_lock);
617 error = ffs_realloccg(ip, -1 - lbn,
618 dp->di_extb[lbn],
619 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
620 &dp->di_extb[0]),
621 osize, nsize, cred, &bp);
622 if (error)
623 return (error);
624 bp->b_xflags |= BX_ALTDATA;
625 }
626 } else {
627 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
628 nsize = ffs_fragroundup(fs, size);
629 else
630 nsize = fs->fs_bsize;
631 mutex_enter(&ump->um_lock);
632 error = ffs_alloc(ip, lbn,
633 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
634 &dp->di_extb[0]),
635 nsize, flags, cred, &newb);
636 if (error)
637 return (error);
638 error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
639 nsize, (flags & B_CLRBUF) != 0, &bp);
640 if (error)
641 return error;
642 bp->b_xflags |= BX_ALTDATA;
643 }
644 dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
645 ip->i_flag |= IN_CHANGE | IN_UPDATE;
646 *bpp = bp;
647 return (0);
648 }
649#endif
650 /*
651 * If the next write will extend the file into a new block,
652 * and the file is currently composed of a fragment
653 * this fragment has to be extended to be a full block.
654 */
655
656 lastlbn = ffs_lblkno(fs, ip->i_size);
657 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
658 nb = lastlbn;
659 osize = ffs_blksize(fs, ip, nb);
660 if (osize < fs->fs_bsize && osize > 0) {
661 mutex_enter(&ump->um_lock);
662 error = ffs_realloccg(ip, nb,
663 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
664 &ip->i_ffs2_db[0]),
665 osize, (int)fs->fs_bsize, cred, bpp, &newb);
666 if (error)
667 return (error);
668 ip->i_size = ffs_lblktosize(fs, nb + 1);
669 ip->i_ffs2_size = ip->i_size;
670 uvm_vnp_setsize(vp, ip->i_size);
671 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
672 ip->i_flag |= IN_CHANGE | IN_UPDATE;
673 if (bpp) {
674 if (flags & B_SYNC)
675 bwrite(*bpp);
676 else
677 bawrite(*bpp);
678 }
679 }
680 }
681
682 /*
683 * The first UFS_NDADDR blocks are direct blocks
684 */
685
686 if (lbn < UFS_NDADDR) {
687 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
688 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
689
690 /*
691 * The block is an already-allocated direct block
692 * and the file already extends past this block,
693 * thus this must be a whole block.
694 * Just read the block (if requested).
695 */
696
697 if (bpp != NULL) {
698 error = bread(vp, lbn, fs->fs_bsize,
699 B_MODIFY, bpp);
700 if (error) {
701 return (error);
702 }
703 }
704 return (0);
705 }
706 if (nb != 0) {
707
708 /*
709 * Consider need to reallocate a fragment.
710 */
711
712 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
713 nsize = ffs_fragroundup(fs, size);
714 if (nsize <= osize) {
715
716 /*
717 * The existing block is already
718 * at least as big as we want.
719 * Just read the block (if requested).
720 */
721
722 if (bpp != NULL) {
723 error = bread(vp, lbn, osize,
724 B_MODIFY, bpp);
725 if (error) {
726 return (error);
727 }
728 }
729 return 0;
730 } else {
731
732 /*
733 * The existing block is smaller than we want,
734 * grow it.
735 */
736 mutex_enter(&ump->um_lock);
737 error = ffs_realloccg(ip, lbn,
738 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
739 &ip->i_ffs2_db[0]),
740 osize, nsize, cred, bpp, &newb);
741 if (error)
742 return (error);
743 }
744 } else {
745
746 /*
747 * the block was not previously allocated,
748 * allocate a new block or fragment.
749 */
750
751 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
752 nsize = ffs_fragroundup(fs, size);
753 else
754 nsize = fs->fs_bsize;
755 mutex_enter(&ump->um_lock);
756 error = ffs_alloc(ip, lbn,
757 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
758 &ip->i_ffs2_db[0]),
759 nsize, flags, cred, &newb);
760 if (error)
761 return (error);
762 if (bpp != NULL) {
763 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
764 nsize, (flags & B_CLRBUF) != 0, bpp);
765 if (error)
766 return error;
767 }
768 }
769 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
770 ip->i_flag |= IN_CHANGE | IN_UPDATE;
771 return (0);
772 }
773
774 /*
775 * Determine the number of levels of indirection.
776 */
777
778 pref = 0;
779 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
780 return (error);
781
782 /*
783 * Fetch the first indirect block allocating if necessary.
784 */
785
786 --num;
787 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
788 allocib = NULL;
789 allocblk = allociblk;
790 if (nb == 0) {
791 mutex_enter(&ump->um_lock);
792 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
793 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
794 flags | B_METAONLY, cred, &newb);
795 if (error)
796 goto fail;
797 nb = newb;
798 *allocblk++ = nb;
799 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
800 fs->fs_bsize, true, &bp);
801 if (error)
802 goto fail;
803 /*
804 * Write synchronously so that indirect blocks
805 * never point at garbage.
806 */
807 if ((error = bwrite(bp)) != 0)
808 goto fail;
809 unwindidx = 0;
810 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
811 *allocib = ufs_rw64(nb, needswap);
812 ip->i_flag |= IN_CHANGE | IN_UPDATE;
813 }
814
815 /*
816 * Fetch through the indirect blocks, allocating as necessary.
817 */
818
819 for (i = 1;;) {
820 error = bread(vp,
821 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
822 if (error) {
823 goto fail;
824 }
825 bap = (int64_t *)bp->b_data;
826 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
827 if (i == num)
828 break;
829 i++;
830 if (nb != 0) {
831 brelse(bp, 0);
832 continue;
833 }
834 if (fscow_run(bp, true) != 0) {
835 brelse(bp, 0);
836 goto fail;
837 }
838 mutex_enter(&ump->um_lock);
839 /* Try to keep snapshot indirect blocks contiguous. */
840 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
841 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
842 flags | B_METAONLY, &bap[0]);
843 if (pref == 0)
844 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
845 NULL);
846 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
847 flags | B_METAONLY, cred, &newb);
848 if (error) {
849 brelse(bp, 0);
850 goto fail;
851 }
852 nb = newb;
853 *allocblk++ = nb;
854 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
855 fs->fs_bsize, true, &nbp);
856 if (error) {
857 brelse(bp, 0);
858 goto fail;
859 }
860 /*
861 * Write synchronously so that indirect blocks
862 * never point at garbage.
863 */
864 if ((error = bwrite(nbp)) != 0) {
865 brelse(bp, 0);
866 goto fail;
867 }
868 if (unwindidx < 0)
869 unwindidx = i - 1;
870 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
871
872 /*
873 * If required, write synchronously, otherwise use
874 * delayed write.
875 */
876
877 if (flags & B_SYNC) {
878 bwrite(bp);
879 } else {
880 bdwrite(bp);
881 }
882 }
883
884 if (flags & B_METAONLY) {
885 KASSERT(bpp != NULL);
886 *bpp = bp;
887 return (0);
888 }
889
890 /*
891 * Get the data block, allocating if necessary.
892 */
893
894 if (nb == 0) {
895 if (fscow_run(bp, true) != 0) {
896 brelse(bp, 0);
897 goto fail;
898 }
899 mutex_enter(&ump->um_lock);
900 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
901 &bap[0]);
902 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
903 &newb);
904 if (error) {
905 brelse(bp, 0);
906 goto fail;
907 }
908 nb = newb;
909 *allocblk++ = nb;
910 if (bpp != NULL) {
911 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
912 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
913 if (error) {
914 brelse(bp, 0);
915 goto fail;
916 }
917 }
918 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
919 if (allocib == NULL && unwindidx < 0) {
920 unwindidx = i - 1;
921 }
922
923 /*
924 * If required, write synchronously, otherwise use
925 * delayed write.
926 */
927
928 if (flags & B_SYNC) {
929 bwrite(bp);
930 } else {
931 bdwrite(bp);
932 }
933 return (0);
934 }
935 brelse(bp, 0);
936 if (bpp != NULL) {
937 if (flags & B_CLRBUF) {
938 error = bread(vp, lbn, (int)fs->fs_bsize,
939 B_MODIFY, &nbp);
940 if (error) {
941 goto fail;
942 }
943 } else {
944 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
945 fs->fs_bsize, true, &nbp);
946 if (error)
947 goto fail;
948 }
949 *bpp = nbp;
950 }
951 return (0);
952
953fail:
954 /*
955 * If we have failed part way through block allocation, we
956 * have to deallocate any indirect blocks that we have allocated.
957 */
958
959 if (unwindidx >= 0) {
960
961 /*
962 * First write out any buffers we've created to resolve their
963 * softdeps. This must be done in reverse order of creation
964 * so that we resolve the dependencies in one pass.
965 * Write the cylinder group buffers for these buffers too.
966 */
967
968 for (i = num; i >= unwindidx; i--) {
969 if (i == 0) {
970 break;
971 }
972 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
973 fs->fs_bsize, false, &bp) != 0)
974 continue;
975 if (bp->b_oflags & BO_DELWRI) {
976 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
977 FFS_DBTOFSB(fs, bp->b_blkno))));
978 bwrite(bp);
979 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
980 fs->fs_cgsize, false, &bp) != 0)
981 continue;
982 if (bp->b_oflags & BO_DELWRI) {
983 bwrite(bp);
984 } else {
985 brelse(bp, BC_INVAL);
986 }
987 } else {
988 brelse(bp, BC_INVAL);
989 }
990 }
991
992 /*
993 * Now that any dependencies that we created have been
994 * resolved, we can undo the partial allocation.
995 */
996
997 if (unwindidx == 0) {
998 *allocib = 0;
999 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1000 } else {
1001 int r;
1002
1003 r = bread(vp, indirs[unwindidx].in_lbn,
1004 (int)fs->fs_bsize, 0, &bp);
1005 if (r) {
1006 panic("Could not unwind indirect block, error %d", r);
1007 } else {
1008 bap = (int64_t *)bp->b_data;
1009 bap[indirs[unwindidx].in_off] = 0;
1010 bwrite(bp);
1011 }
1012 }
1013 for (i = unwindidx + 1; i <= num; i++) {
1014 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1015 fs->fs_bsize, false, &bp) == 0)
1016 brelse(bp, BC_INVAL);
1017 }
1018 }
1019 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1020 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1021 deallocated += fs->fs_bsize;
1022 }
1023 if (deallocated) {
1024#if defined(QUOTA) || defined(QUOTA2)
1025 /*
1026 * Restore user's disk quota because allocation failed.
1027 */
1028 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1029#endif
1030 ip->i_ffs2_blocks -= btodb(deallocated);
1031 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1032 }
1033
1034 return (error);
1035}
1036