1 | /* $NetBSD: lfs_balloc.c,v 1.91 2016/08/07 02:42:32 dholland Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Konrad E. Schroder <perseant@hhhh.org>. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | /* |
32 | * Copyright (c) 1989, 1991, 1993 |
33 | * The Regents of the University of California. All rights reserved. |
34 | * |
35 | * Redistribution and use in source and binary forms, with or without |
36 | * modification, are permitted provided that the following conditions |
37 | * are met: |
38 | * 1. Redistributions of source code must retain the above copyright |
39 | * notice, this list of conditions and the following disclaimer. |
40 | * 2. Redistributions in binary form must reproduce the above copyright |
41 | * notice, this list of conditions and the following disclaimer in the |
42 | * documentation and/or other materials provided with the distribution. |
43 | * 3. Neither the name of the University nor the names of its contributors |
44 | * may be used to endorse or promote products derived from this software |
45 | * without specific prior written permission. |
46 | * |
47 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
48 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
49 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
50 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
51 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
52 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
53 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
54 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
55 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
56 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
57 | * SUCH DAMAGE. |
58 | * |
59 | * @(#)lfs_balloc.c 8.4 (Berkeley) 5/8/95 |
60 | */ |
61 | |
62 | #include <sys/cdefs.h> |
63 | __KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.91 2016/08/07 02:42:32 dholland Exp $" ); |
64 | |
65 | #if defined(_KERNEL_OPT) |
66 | #include "opt_quota.h" |
67 | #endif |
68 | |
69 | #include <sys/param.h> |
70 | #include <sys/systm.h> |
71 | #include <sys/buf.h> |
72 | #include <sys/proc.h> |
73 | #include <sys/vnode.h> |
74 | #include <sys/mount.h> |
75 | #include <sys/resourcevar.h> |
76 | #include <sys/tree.h> |
77 | #include <sys/trace.h> |
78 | #include <sys/kauth.h> |
79 | |
80 | #include <miscfs/specfs/specdev.h> |
81 | |
82 | #include <ufs/lfs/ulfs_quotacommon.h> |
83 | #include <ufs/lfs/ulfs_inode.h> |
84 | #include <ufs/lfs/ulfsmount.h> |
85 | #include <ufs/lfs/ulfs_extern.h> |
86 | |
87 | #include <ufs/lfs/lfs.h> |
88 | #include <ufs/lfs/lfs_accessors.h> |
89 | #include <ufs/lfs/lfs_extern.h> |
90 | #include <ufs/lfs/lfs_kernel.h> |
91 | |
92 | #include <uvm/uvm.h> |
93 | |
94 | static int lfs_fragextend(struct vnode *, int, int, daddr_t, struct buf **, |
95 | kauth_cred_t); |
96 | |
97 | u_int64_t locked_fakequeue_count; |
98 | |
99 | /* |
100 | * Allocate a block, and do inode and filesystem block accounting for |
101 | * it and for any indirect blocks that may need to be created in order |
102 | * to handle this block. |
103 | * |
104 | * Blocks which have never been accounted for (i.e., which "do not |
105 | * exist") have disk address 0, which is translated by ulfs_bmap to |
106 | * the special value UNASSIGNED == -1, as in historical FFS-related |
107 | * code. |
108 | * |
109 | * Blocks which have been accounted for but which have not yet been |
110 | * written to disk are given the new special disk address UNWRITTEN == |
111 | * -2, so that they can be differentiated from completely new blocks. |
112 | * |
113 | * Note: it seems that bpp is passed as NULL for blocks that are file |
114 | * pages that will be handled by UVM and not the buffer cache. |
115 | * |
116 | * XXX: locking? |
117 | */ |
118 | /* VOP_BWRITE ULFS_NIADDR+2 times */ |
119 | int |
120 | lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, |
121 | int flags, struct buf **bpp) |
122 | { |
123 | int offset; |
124 | daddr_t daddr, idaddr; |
125 | struct buf *ibp, *bp; |
126 | struct inode *ip; |
127 | struct lfs *fs; |
128 | struct indir indirs[ULFS_NIADDR+2], *idp; |
129 | daddr_t lbn, lastblock; |
130 | int bcount; |
131 | int error, frags, i, nsize, osize, num; |
132 | |
133 | ip = VTOI(vp); |
134 | fs = ip->i_lfs; |
135 | |
136 | /* Declare to humans that we might have the seglock here */ |
137 | ASSERT_MAYBE_SEGLOCK(fs); |
138 | |
139 | |
140 | /* offset within block */ |
141 | offset = lfs_blkoff(fs, startoffset); |
142 | |
143 | /* This is usually but not always exactly the block size */ |
144 | KASSERT(iosize <= lfs_sb_getbsize(fs)); |
145 | |
146 | /* block number (within file) */ |
147 | lbn = lfs_lblkno(fs, startoffset); |
148 | |
149 | /* |
150 | * This checks for whether pending stuff needs to be flushed |
151 | * out and potentially waits. It's been disabled since UBC |
152 | * support was added to LFS in 2003. -- dholland 20160806 |
153 | */ |
154 | /* (void)lfs_check(vp, lbn, 0); */ |
155 | |
156 | |
157 | /* |
158 | * Three cases: it's a block beyond the end of file, it's a block in |
159 | * the file that may or may not have been assigned a disk address or |
160 | * we're writing an entire block. |
161 | * |
162 | * Note, if the daddr is UNWRITTEN, the block already exists in |
163 | * the cache (it was read or written earlier). If so, make sure |
164 | * we don't count it as a new block or zero out its contents. If |
165 | * it did not, make sure we allocate any necessary indirect |
166 | * blocks. |
167 | * |
168 | * If we are writing a block beyond the end of the file, we need to |
169 | * check if the old last block was a fragment. If it was, we need |
170 | * to rewrite it. |
171 | */ |
172 | |
173 | if (bpp) |
174 | *bpp = NULL; |
175 | |
176 | /* Last block number in file */ |
177 | lastblock = lfs_lblkno(fs, ip->i_size); |
178 | |
179 | if (lastblock < ULFS_NDADDR && lastblock < lbn) { |
180 | /* |
181 | * The file is small enough to have fragments, and we're |
182 | * allocating past EOF. |
183 | * |
184 | * If the last block was a fragment we need to rewrite it |
185 | * as a full block. |
186 | */ |
187 | osize = lfs_blksize(fs, ip, lastblock); |
188 | if (osize < lfs_sb_getbsize(fs) && osize > 0) { |
189 | if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs), |
190 | lastblock, |
191 | (bpp ? &bp : NULL), cred))) |
192 | return (error); |
193 | /* Update the file size with what we just did (only) */ |
194 | ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs); |
195 | lfs_dino_setsize(fs, ip->i_din, ip->i_size); |
196 | uvm_vnp_setsize(vp, ip->i_size); |
197 | ip->i_flag |= IN_CHANGE | IN_UPDATE; |
198 | /* if we got a buffer for this, write it out now */ |
199 | if (bpp) |
200 | (void) VOP_BWRITE(bp->b_vp, bp); |
201 | } |
202 | } |
203 | |
204 | /* |
205 | * If the block we are writing is a direct block, it's the last |
206 | * block in the file, and offset + iosize is less than a full |
207 | * block, we can write one or more fragments. There are two cases: |
208 | * the block is brand new and we should allocate it the correct |
209 | * size or it already exists and contains some fragments and |
210 | * may need to extend it. |
211 | */ |
212 | if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) { |
213 | osize = lfs_blksize(fs, ip, lbn); |
214 | nsize = lfs_fragroundup(fs, offset + iosize); |
215 | if (lfs_lblktosize(fs, lbn) >= ip->i_size) { |
216 | /* Brand new block or fragment */ |
217 | frags = lfs_numfrags(fs, nsize); |
218 | if (!ISSPACE(fs, frags, cred)) |
219 | return ENOSPC; |
220 | if (bpp) { |
221 | *bpp = bp = getblk(vp, lbn, nsize, 0, 0); |
222 | bp->b_blkno = UNWRITTEN; |
223 | if (flags & B_CLRBUF) |
224 | clrbuf(bp); |
225 | } |
226 | |
227 | /* |
228 | * Update the effective block count (this count |
229 | * includes blocks that don't have an on-disk |
230 | * presence or location yet) |
231 | */ |
232 | ip->i_lfs_effnblks += frags; |
233 | |
234 | /* account for the space we're taking */ |
235 | mutex_enter(&lfs_lock); |
236 | lfs_sb_subbfree(fs, frags); |
237 | mutex_exit(&lfs_lock); |
238 | |
239 | /* update the inode */ |
240 | lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); |
241 | } else { |
242 | /* extending a block that already has fragments */ |
243 | |
244 | if (nsize <= osize) { |
245 | /* No need to extend */ |
246 | if (bpp && (error = bread(vp, lbn, osize, |
247 | 0, &bp))) |
248 | return error; |
249 | } else { |
250 | /* Extend existing block */ |
251 | if ((error = |
252 | lfs_fragextend(vp, osize, nsize, lbn, |
253 | (bpp ? &bp : NULL), cred))) |
254 | return error; |
255 | } |
256 | if (bpp) |
257 | *bpp = bp; |
258 | } |
259 | return 0; |
260 | } |
261 | |
262 | /* |
263 | * Look up what's already here. |
264 | */ |
265 | |
266 | error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL); |
267 | if (error) |
268 | return (error); |
269 | |
270 | KASSERT(daddr <= LFS_MAX_DADDR(fs)); |
271 | |
272 | /* |
273 | * Do byte accounting all at once, so we can gracefully fail *before* |
274 | * we start assigning blocks. |
275 | */ |
276 | frags = fs->um_seqinc; |
277 | bcount = 0; /* number of frags we need */ |
278 | if (daddr == UNASSIGNED) { |
279 | /* no block yet, going to need a whole block */ |
280 | bcount = frags; |
281 | } |
282 | for (i = 1; i < num; ++i) { |
283 | if (!indirs[i].in_exists) { |
284 | /* need an indirect block at this level */ |
285 | bcount += frags; |
286 | } |
287 | } |
288 | if (ISSPACE(fs, bcount, cred)) { |
289 | /* update the superblock's free block count */ |
290 | mutex_enter(&lfs_lock); |
291 | lfs_sb_subbfree(fs, bcount); |
292 | mutex_exit(&lfs_lock); |
293 | /* update the file's effective block count */ |
294 | ip->i_lfs_effnblks += bcount; |
295 | } else { |
296 | /* whoops, no can do */ |
297 | return ENOSPC; |
298 | } |
299 | |
300 | if (daddr == UNASSIGNED) { |
301 | /* |
302 | * There is nothing here yet. |
303 | */ |
304 | |
305 | /* |
306 | * If there's no indirect block in the inode, change it |
307 | * to UNWRITTEN to indicate that it exists but doesn't |
308 | * have an on-disk address yet. |
309 | * |
310 | * (Question: where's the block data initialized?) |
311 | */ |
312 | if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) { |
313 | lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); |
314 | } |
315 | |
316 | /* |
317 | * If we need more layers of indirect blocks, create what |
318 | * we need. |
319 | */ |
320 | if (num > 1) { |
321 | /* |
322 | * The outermost indirect block address is the one |
323 | * in the inode, so fetch that. |
324 | */ |
325 | idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off); |
326 | /* |
327 | * For each layer of indirection... |
328 | */ |
329 | for (i = 1; i < num; ++i) { |
330 | /* |
331 | * Get a buffer for the indirect block data. |
332 | * |
333 | * (XXX: the logic here seems twisted. What's |
334 | * wrong with testing in_exists first and then |
335 | * doing either bread or getblk to get a |
336 | * buffer?) |
337 | */ |
338 | ibp = getblk(vp, indirs[i].in_lbn, |
339 | lfs_sb_getbsize(fs), 0,0); |
340 | if (!indirs[i].in_exists) { |
341 | /* |
342 | * There isn't actually a block here, |
343 | * so clear the buffer data and mark |
344 | * the address of the block as |
345 | * UNWRITTEN. |
346 | */ |
347 | clrbuf(ibp); |
348 | ibp->b_blkno = UNWRITTEN; |
349 | } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) { |
350 | /* |
351 | * Otherwise read it in. |
352 | */ |
353 | ibp->b_blkno = LFS_FSBTODB(fs, idaddr); |
354 | ibp->b_flags |= B_READ; |
355 | VOP_STRATEGY(vp, ibp); |
356 | biowait(ibp); |
357 | } |
358 | |
359 | /* |
360 | * Now this indirect block exists, but |
361 | * the next one down may not yet. If |
362 | * so, set it to UNWRITTEN. This keeps |
363 | * the accounting straight. |
364 | */ |
365 | if (lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off) == 0) |
366 | lfs_iblock_set(fs, ibp->b_data, indirs[i].in_off, |
367 | UNWRITTEN); |
368 | |
369 | /* get the block for the next iteration */ |
370 | idaddr = lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off); |
371 | #ifdef DEBUG |
372 | if (vp == fs->lfs_ivnode) { |
373 | LFS_ENTER_LOG("balloc" , __FILE__, |
374 | __LINE__, indirs[i].in_lbn, |
375 | ibp->b_flags, curproc->p_pid); |
376 | } |
377 | #endif |
378 | /* |
379 | * Write out the updated indirect block. Note |
380 | * that this writes it out even if we didn't |
381 | * modify it - ultimately because the final |
382 | * block didn't exist we'll need to write a |
383 | * new version of all the blocks that lead to |
384 | * it. Hopefully all that gets in before any |
385 | * actual disk I/O so we don't end up writing |
386 | * any of them twice... this is currently not |
387 | * very clear. |
388 | */ |
389 | if ((error = VOP_BWRITE(ibp->b_vp, ibp))) |
390 | return error; |
391 | } |
392 | } |
393 | } |
394 | |
395 | |
396 | /* |
397 | * Get the existing block from the cache, if requested. |
398 | */ |
399 | if (bpp) |
400 | *bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0); |
401 | |
402 | /* |
403 | * Do accounting on blocks that represent pages. |
404 | */ |
405 | if (!bpp) |
406 | lfs_register_block(vp, lbn); |
407 | |
408 | /* |
409 | * The block we are writing may be a brand new block |
410 | * in which case we need to do accounting. |
411 | * |
412 | * We can tell a truly new block because ulfs_bmaparray will say |
413 | * it is UNASSIGNED. Once we allocate it we will assign it the |
414 | * disk address UNWRITTEN. |
415 | */ |
416 | if (daddr == UNASSIGNED) { |
417 | if (bpp) { |
418 | if (flags & B_CLRBUF) |
419 | clrbuf(bp); |
420 | |
421 | /* Note the new address */ |
422 | bp->b_blkno = UNWRITTEN; |
423 | } |
424 | |
425 | switch (num) { |
426 | case 0: |
427 | /* direct block - update the inode */ |
428 | lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); |
429 | break; |
430 | case 1: |
431 | /* |
432 | * using a single indirect block - update the inode |
433 | * |
434 | * XXX: is this right? We already set this block |
435 | * pointer above. I think we want to be writing *in* |
436 | * the single indirect block and this case shouldn't |
437 | * exist. (just case 0 and default) |
438 | * -- dholland 20160806 |
439 | */ |
440 | lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); |
441 | break; |
442 | default: |
443 | /* |
444 | * using multiple indirect blocks - update the |
445 | * innermost one |
446 | */ |
447 | idp = &indirs[num - 1]; |
448 | if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs), |
449 | B_MODIFY, &ibp)) |
450 | panic("lfs_balloc: bread bno %lld" , |
451 | (long long)idp->in_lbn); |
452 | lfs_iblock_set(fs, ibp->b_data, idp->in_off, UNWRITTEN); |
453 | #ifdef DEBUG |
454 | if (vp == fs->lfs_ivnode) { |
455 | LFS_ENTER_LOG("balloc" , __FILE__, |
456 | __LINE__, idp->in_lbn, |
457 | ibp->b_flags, curproc->p_pid); |
458 | } |
459 | #endif |
460 | VOP_BWRITE(ibp->b_vp, ibp); |
461 | } |
462 | } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) { |
463 | /* |
464 | * Not a brand new block, also not in the cache; |
465 | * read it in from disk. |
466 | */ |
467 | if (iosize == lfs_sb_getbsize(fs)) |
468 | /* Optimization: I/O is unnecessary. */ |
469 | bp->b_blkno = daddr; |
470 | else { |
471 | /* |
472 | * We need to read the block to preserve the |
473 | * existing bytes. |
474 | */ |
475 | bp->b_blkno = daddr; |
476 | bp->b_flags |= B_READ; |
477 | VOP_STRATEGY(vp, bp); |
478 | return (biowait(bp)); |
479 | } |
480 | } |
481 | |
482 | return (0); |
483 | } |
484 | |
485 | /* |
486 | * Extend a file that uses fragments with more fragments. |
487 | * |
488 | * XXX: locking? |
489 | */ |
490 | /* VOP_BWRITE 1 time */ |
491 | static int |
492 | lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, |
493 | struct buf **bpp, kauth_cred_t cred) |
494 | { |
495 | struct inode *ip; |
496 | struct lfs *fs; |
497 | long frags; |
498 | int error; |
499 | size_t obufsize; |
500 | |
501 | /* XXX move this to a header file */ |
502 | /* (XXX: except it's not clear what purpose it serves) */ |
503 | extern long locked_queue_bytes; |
504 | |
505 | ip = VTOI(vp); |
506 | fs = ip->i_lfs; |
507 | |
508 | /* |
509 | * XXX: is there some reason we know more about the seglock |
510 | * state here than at the top of lfs_balloc? |
511 | */ |
512 | ASSERT_NO_SEGLOCK(fs); |
513 | |
514 | /* number of frags we're adding */ |
515 | frags = (long)lfs_numfrags(fs, nsize - osize); |
516 | |
517 | error = 0; |
518 | |
519 | /* |
520 | * Get the seglock so we don't enlarge blocks while a segment |
521 | * is being written. If we're called with bpp==NULL, though, |
522 | * we are only pretending to change a buffer, so we don't have to |
523 | * lock. |
524 | * |
525 | * XXX: the above comment is lying, as fs->lfs_fraglock is not |
526 | * the segment lock. |
527 | */ |
528 | top: |
529 | if (bpp) { |
530 | rw_enter(&fs->lfs_fraglock, RW_READER); |
531 | LFS_DEBUG_COUNTLOCKED("frag" ); |
532 | } |
533 | |
534 | /* check if we actually have enough frags available */ |
535 | if (!ISSPACE(fs, frags, cred)) { |
536 | error = ENOSPC; |
537 | goto out; |
538 | } |
539 | |
540 | /* |
541 | * If we are not asked to actually return the block, all we need |
542 | * to do is allocate space for it. UBC will handle dirtying the |
543 | * appropriate things and making sure it all goes to disk. |
544 | * Don't bother to read in that case. |
545 | */ |
546 | if (bpp && (error = bread(vp, lbn, osize, 0, bpp))) { |
547 | goto out; |
548 | } |
549 | #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) |
550 | if ((error = lfs_chkdq(ip, frags, cred, 0))) { |
551 | if (bpp) |
552 | brelse(*bpp, 0); |
553 | goto out; |
554 | } |
555 | #endif |
556 | /* |
557 | * Adjust accounting for lfs_avail. If there's not enough room, |
558 | * we will have to wait for the cleaner, which we can't do while |
559 | * holding a block busy or while holding the seglock. In that case, |
560 | * release both and start over after waiting. |
561 | */ |
562 | |
563 | if (bpp && ((*bpp)->b_oflags & BO_DELWRI)) { |
564 | if (!lfs_fits(fs, frags)) { |
565 | if (bpp) |
566 | brelse(*bpp, 0); |
567 | #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) |
568 | lfs_chkdq(ip, -frags, cred, 0); |
569 | #endif |
570 | rw_exit(&fs->lfs_fraglock); |
571 | lfs_availwait(fs, frags); |
572 | goto top; |
573 | } |
574 | lfs_sb_subavail(fs, frags); |
575 | } |
576 | |
577 | /* decrease the free block count in the superblock */ |
578 | mutex_enter(&lfs_lock); |
579 | lfs_sb_subbfree(fs, frags); |
580 | mutex_exit(&lfs_lock); |
581 | /* increase the file's effective block count */ |
582 | ip->i_lfs_effnblks += frags; |
583 | /* mark the inode dirty */ |
584 | ip->i_flag |= IN_CHANGE | IN_UPDATE; |
585 | |
586 | if (bpp) { |
587 | obufsize = (*bpp)->b_bufsize; |
588 | allocbuf(*bpp, nsize, 1); |
589 | |
590 | /* Adjust locked-list accounting */ |
591 | if (((*bpp)->b_flags & B_LOCKED) != 0 && |
592 | (*bpp)->b_iodone == NULL) { |
593 | mutex_enter(&lfs_lock); |
594 | locked_queue_bytes += (*bpp)->b_bufsize - obufsize; |
595 | mutex_exit(&lfs_lock); |
596 | } |
597 | |
598 | /* zero the new space */ |
599 | memset((char *)((*bpp)->b_data) + osize, 0, (u_int)(nsize - osize)); |
600 | } |
601 | |
602 | out: |
603 | if (bpp) { |
604 | rw_exit(&fs->lfs_fraglock); |
605 | } |
606 | return (error); |
607 | } |
608 | |
609 | static inline int |
610 | lge(struct lbnentry *a, struct lbnentry *b) |
611 | { |
612 | return a->lbn - b->lbn; |
613 | } |
614 | |
615 | SPLAY_PROTOTYPE(lfs_splay, lbnentry, entry, lge); |
616 | |
617 | SPLAY_GENERATE(lfs_splay, lbnentry, entry, lge); |
618 | |
619 | /* |
620 | * Record this lbn as being "write pending". We used to have this information |
621 | * on the buffer headers, but since pages don't have buffer headers we |
622 | * record it here instead. |
623 | */ |
624 | void |
625 | lfs_register_block(struct vnode *vp, daddr_t lbn) |
626 | { |
627 | struct lfs *fs; |
628 | struct inode *ip; |
629 | struct lbnentry *lbp; |
630 | |
631 | ip = VTOI(vp); |
632 | |
633 | /* Don't count metadata */ |
634 | if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM) |
635 | return; |
636 | |
637 | fs = ip->i_lfs; |
638 | |
639 | ASSERT_NO_SEGLOCK(fs); |
640 | |
641 | /* If no space, wait for the cleaner */ |
642 | lfs_availwait(fs, lfs_btofsb(fs, 1 << lfs_sb_getbshift(fs))); |
643 | |
644 | lbp = (struct lbnentry *)pool_get(&lfs_lbnentry_pool, PR_WAITOK); |
645 | lbp->lbn = lbn; |
646 | mutex_enter(&lfs_lock); |
647 | if (SPLAY_INSERT(lfs_splay, &ip->i_lfs_lbtree, lbp) != NULL) { |
648 | mutex_exit(&lfs_lock); |
649 | /* Already there */ |
650 | pool_put(&lfs_lbnentry_pool, lbp); |
651 | return; |
652 | } |
653 | |
654 | ++ip->i_lfs_nbtree; |
655 | fs->lfs_favail += lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs))); |
656 | fs->lfs_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT; |
657 | ++locked_fakequeue_count; |
658 | lfs_subsys_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT; |
659 | mutex_exit(&lfs_lock); |
660 | } |
661 | |
662 | static void |
663 | lfs_do_deregister(struct lfs *fs, struct inode *ip, struct lbnentry *lbp) |
664 | { |
665 | ASSERT_MAYBE_SEGLOCK(fs); |
666 | |
667 | mutex_enter(&lfs_lock); |
668 | --ip->i_lfs_nbtree; |
669 | SPLAY_REMOVE(lfs_splay, &ip->i_lfs_lbtree, lbp); |
670 | if (fs->lfs_favail > lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs)))) |
671 | fs->lfs_favail -= lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs))); |
672 | fs->lfs_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT; |
673 | if (locked_fakequeue_count > 0) |
674 | --locked_fakequeue_count; |
675 | lfs_subsys_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT; |
676 | mutex_exit(&lfs_lock); |
677 | |
678 | pool_put(&lfs_lbnentry_pool, lbp); |
679 | } |
680 | |
681 | void |
682 | lfs_deregister_block(struct vnode *vp, daddr_t lbn) |
683 | { |
684 | struct lfs *fs; |
685 | struct inode *ip; |
686 | struct lbnentry *lbp; |
687 | struct lbnentry tmp; |
688 | |
689 | ip = VTOI(vp); |
690 | |
691 | /* Don't count metadata */ |
692 | if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM) |
693 | return; |
694 | |
695 | fs = ip->i_lfs; |
696 | tmp.lbn = lbn; |
697 | lbp = SPLAY_FIND(lfs_splay, &ip->i_lfs_lbtree, &tmp); |
698 | if (lbp == NULL) |
699 | return; |
700 | |
701 | lfs_do_deregister(fs, ip, lbp); |
702 | } |
703 | |
704 | void |
705 | lfs_deregister_all(struct vnode *vp) |
706 | { |
707 | struct lbnentry *lbp, *nlbp; |
708 | struct lfs_splay *hd; |
709 | struct lfs *fs; |
710 | struct inode *ip; |
711 | |
712 | ip = VTOI(vp); |
713 | fs = ip->i_lfs; |
714 | hd = &ip->i_lfs_lbtree; |
715 | |
716 | for (lbp = SPLAY_MIN(lfs_splay, hd); lbp != NULL; lbp = nlbp) { |
717 | nlbp = SPLAY_NEXT(lfs_splay, hd, lbp); |
718 | lfs_do_deregister(fs, ip, lbp); |
719 | } |
720 | } |
721 | |