1/* $NetBSD: lfs_balloc.c,v 1.91 2016/08/07 02:42:32 dholland Exp $ */
2
3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31/*
32 * Copyright (c) 1989, 1991, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_balloc.c 8.4 (Berkeley) 5/8/95
60 */
61
62#include <sys/cdefs.h>
63__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.91 2016/08/07 02:42:32 dholland Exp $");
64
65#if defined(_KERNEL_OPT)
66#include "opt_quota.h"
67#endif
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/buf.h>
72#include <sys/proc.h>
73#include <sys/vnode.h>
74#include <sys/mount.h>
75#include <sys/resourcevar.h>
76#include <sys/tree.h>
77#include <sys/trace.h>
78#include <sys/kauth.h>
79
80#include <miscfs/specfs/specdev.h>
81
82#include <ufs/lfs/ulfs_quotacommon.h>
83#include <ufs/lfs/ulfs_inode.h>
84#include <ufs/lfs/ulfsmount.h>
85#include <ufs/lfs/ulfs_extern.h>
86
87#include <ufs/lfs/lfs.h>
88#include <ufs/lfs/lfs_accessors.h>
89#include <ufs/lfs/lfs_extern.h>
90#include <ufs/lfs/lfs_kernel.h>
91
92#include <uvm/uvm.h>
93
94static int lfs_fragextend(struct vnode *, int, int, daddr_t, struct buf **,
95 kauth_cred_t);
96
97u_int64_t locked_fakequeue_count;
98
99/*
100 * Allocate a block, and do inode and filesystem block accounting for
101 * it and for any indirect blocks that may need to be created in order
102 * to handle this block.
103 *
104 * Blocks which have never been accounted for (i.e., which "do not
105 * exist") have disk address 0, which is translated by ulfs_bmap to
106 * the special value UNASSIGNED == -1, as in historical FFS-related
107 * code.
108 *
109 * Blocks which have been accounted for but which have not yet been
110 * written to disk are given the new special disk address UNWRITTEN ==
111 * -2, so that they can be differentiated from completely new blocks.
112 *
113 * Note: it seems that bpp is passed as NULL for blocks that are file
114 * pages that will be handled by UVM and not the buffer cache.
115 *
116 * XXX: locking?
117 */
118/* VOP_BWRITE ULFS_NIADDR+2 times */
119int
120lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred,
121 int flags, struct buf **bpp)
122{
123 int offset;
124 daddr_t daddr, idaddr;
125 struct buf *ibp, *bp;
126 struct inode *ip;
127 struct lfs *fs;
128 struct indir indirs[ULFS_NIADDR+2], *idp;
129 daddr_t lbn, lastblock;
130 int bcount;
131 int error, frags, i, nsize, osize, num;
132
133 ip = VTOI(vp);
134 fs = ip->i_lfs;
135
136 /* Declare to humans that we might have the seglock here */
137 ASSERT_MAYBE_SEGLOCK(fs);
138
139
140 /* offset within block */
141 offset = lfs_blkoff(fs, startoffset);
142
143 /* This is usually but not always exactly the block size */
144 KASSERT(iosize <= lfs_sb_getbsize(fs));
145
146 /* block number (within file) */
147 lbn = lfs_lblkno(fs, startoffset);
148
149 /*
150 * This checks for whether pending stuff needs to be flushed
151 * out and potentially waits. It's been disabled since UBC
152 * support was added to LFS in 2003. -- dholland 20160806
153 */
154 /* (void)lfs_check(vp, lbn, 0); */
155
156
157 /*
158 * Three cases: it's a block beyond the end of file, it's a block in
159 * the file that may or may not have been assigned a disk address or
160 * we're writing an entire block.
161 *
162 * Note, if the daddr is UNWRITTEN, the block already exists in
163 * the cache (it was read or written earlier). If so, make sure
164 * we don't count it as a new block or zero out its contents. If
165 * it did not, make sure we allocate any necessary indirect
166 * blocks.
167 *
168 * If we are writing a block beyond the end of the file, we need to
169 * check if the old last block was a fragment. If it was, we need
170 * to rewrite it.
171 */
172
173 if (bpp)
174 *bpp = NULL;
175
176 /* Last block number in file */
177 lastblock = lfs_lblkno(fs, ip->i_size);
178
179 if (lastblock < ULFS_NDADDR && lastblock < lbn) {
180 /*
181 * The file is small enough to have fragments, and we're
182 * allocating past EOF.
183 *
184 * If the last block was a fragment we need to rewrite it
185 * as a full block.
186 */
187 osize = lfs_blksize(fs, ip, lastblock);
188 if (osize < lfs_sb_getbsize(fs) && osize > 0) {
189 if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs),
190 lastblock,
191 (bpp ? &bp : NULL), cred)))
192 return (error);
193 /* Update the file size with what we just did (only) */
194 ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs);
195 lfs_dino_setsize(fs, ip->i_din, ip->i_size);
196 uvm_vnp_setsize(vp, ip->i_size);
197 ip->i_flag |= IN_CHANGE | IN_UPDATE;
198 /* if we got a buffer for this, write it out now */
199 if (bpp)
200 (void) VOP_BWRITE(bp->b_vp, bp);
201 }
202 }
203
204 /*
205 * If the block we are writing is a direct block, it's the last
206 * block in the file, and offset + iosize is less than a full
207 * block, we can write one or more fragments. There are two cases:
208 * the block is brand new and we should allocate it the correct
209 * size or it already exists and contains some fragments and
210 * may need to extend it.
211 */
212 if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) {
213 osize = lfs_blksize(fs, ip, lbn);
214 nsize = lfs_fragroundup(fs, offset + iosize);
215 if (lfs_lblktosize(fs, lbn) >= ip->i_size) {
216 /* Brand new block or fragment */
217 frags = lfs_numfrags(fs, nsize);
218 if (!ISSPACE(fs, frags, cred))
219 return ENOSPC;
220 if (bpp) {
221 *bpp = bp = getblk(vp, lbn, nsize, 0, 0);
222 bp->b_blkno = UNWRITTEN;
223 if (flags & B_CLRBUF)
224 clrbuf(bp);
225 }
226
227 /*
228 * Update the effective block count (this count
229 * includes blocks that don't have an on-disk
230 * presence or location yet)
231 */
232 ip->i_lfs_effnblks += frags;
233
234 /* account for the space we're taking */
235 mutex_enter(&lfs_lock);
236 lfs_sb_subbfree(fs, frags);
237 mutex_exit(&lfs_lock);
238
239 /* update the inode */
240 lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN);
241 } else {
242 /* extending a block that already has fragments */
243
244 if (nsize <= osize) {
245 /* No need to extend */
246 if (bpp && (error = bread(vp, lbn, osize,
247 0, &bp)))
248 return error;
249 } else {
250 /* Extend existing block */
251 if ((error =
252 lfs_fragextend(vp, osize, nsize, lbn,
253 (bpp ? &bp : NULL), cred)))
254 return error;
255 }
256 if (bpp)
257 *bpp = bp;
258 }
259 return 0;
260 }
261
262 /*
263 * Look up what's already here.
264 */
265
266 error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL);
267 if (error)
268 return (error);
269
270 KASSERT(daddr <= LFS_MAX_DADDR(fs));
271
272 /*
273 * Do byte accounting all at once, so we can gracefully fail *before*
274 * we start assigning blocks.
275 */
276 frags = fs->um_seqinc;
277 bcount = 0; /* number of frags we need */
278 if (daddr == UNASSIGNED) {
279 /* no block yet, going to need a whole block */
280 bcount = frags;
281 }
282 for (i = 1; i < num; ++i) {
283 if (!indirs[i].in_exists) {
284 /* need an indirect block at this level */
285 bcount += frags;
286 }
287 }
288 if (ISSPACE(fs, bcount, cred)) {
289 /* update the superblock's free block count */
290 mutex_enter(&lfs_lock);
291 lfs_sb_subbfree(fs, bcount);
292 mutex_exit(&lfs_lock);
293 /* update the file's effective block count */
294 ip->i_lfs_effnblks += bcount;
295 } else {
296 /* whoops, no can do */
297 return ENOSPC;
298 }
299
300 if (daddr == UNASSIGNED) {
301 /*
302 * There is nothing here yet.
303 */
304
305 /*
306 * If there's no indirect block in the inode, change it
307 * to UNWRITTEN to indicate that it exists but doesn't
308 * have an on-disk address yet.
309 *
310 * (Question: where's the block data initialized?)
311 */
312 if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) {
313 lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN);
314 }
315
316 /*
317 * If we need more layers of indirect blocks, create what
318 * we need.
319 */
320 if (num > 1) {
321 /*
322 * The outermost indirect block address is the one
323 * in the inode, so fetch that.
324 */
325 idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off);
326 /*
327 * For each layer of indirection...
328 */
329 for (i = 1; i < num; ++i) {
330 /*
331 * Get a buffer for the indirect block data.
332 *
333 * (XXX: the logic here seems twisted. What's
334 * wrong with testing in_exists first and then
335 * doing either bread or getblk to get a
336 * buffer?)
337 */
338 ibp = getblk(vp, indirs[i].in_lbn,
339 lfs_sb_getbsize(fs), 0,0);
340 if (!indirs[i].in_exists) {
341 /*
342 * There isn't actually a block here,
343 * so clear the buffer data and mark
344 * the address of the block as
345 * UNWRITTEN.
346 */
347 clrbuf(ibp);
348 ibp->b_blkno = UNWRITTEN;
349 } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) {
350 /*
351 * Otherwise read it in.
352 */
353 ibp->b_blkno = LFS_FSBTODB(fs, idaddr);
354 ibp->b_flags |= B_READ;
355 VOP_STRATEGY(vp, ibp);
356 biowait(ibp);
357 }
358
359 /*
360 * Now this indirect block exists, but
361 * the next one down may not yet. If
362 * so, set it to UNWRITTEN. This keeps
363 * the accounting straight.
364 */
365 if (lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off) == 0)
366 lfs_iblock_set(fs, ibp->b_data, indirs[i].in_off,
367 UNWRITTEN);
368
369 /* get the block for the next iteration */
370 idaddr = lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off);
371#ifdef DEBUG
372 if (vp == fs->lfs_ivnode) {
373 LFS_ENTER_LOG("balloc", __FILE__,
374 __LINE__, indirs[i].in_lbn,
375 ibp->b_flags, curproc->p_pid);
376 }
377#endif
378 /*
379 * Write out the updated indirect block. Note
380 * that this writes it out even if we didn't
381 * modify it - ultimately because the final
382 * block didn't exist we'll need to write a
383 * new version of all the blocks that lead to
384 * it. Hopefully all that gets in before any
385 * actual disk I/O so we don't end up writing
386 * any of them twice... this is currently not
387 * very clear.
388 */
389 if ((error = VOP_BWRITE(ibp->b_vp, ibp)))
390 return error;
391 }
392 }
393 }
394
395
396 /*
397 * Get the existing block from the cache, if requested.
398 */
399 if (bpp)
400 *bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0);
401
402 /*
403 * Do accounting on blocks that represent pages.
404 */
405 if (!bpp)
406 lfs_register_block(vp, lbn);
407
408 /*
409 * The block we are writing may be a brand new block
410 * in which case we need to do accounting.
411 *
412 * We can tell a truly new block because ulfs_bmaparray will say
413 * it is UNASSIGNED. Once we allocate it we will assign it the
414 * disk address UNWRITTEN.
415 */
416 if (daddr == UNASSIGNED) {
417 if (bpp) {
418 if (flags & B_CLRBUF)
419 clrbuf(bp);
420
421 /* Note the new address */
422 bp->b_blkno = UNWRITTEN;
423 }
424
425 switch (num) {
426 case 0:
427 /* direct block - update the inode */
428 lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN);
429 break;
430 case 1:
431 /*
432 * using a single indirect block - update the inode
433 *
434 * XXX: is this right? We already set this block
435 * pointer above. I think we want to be writing *in*
436 * the single indirect block and this case shouldn't
437 * exist. (just case 0 and default)
438 * -- dholland 20160806
439 */
440 lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN);
441 break;
442 default:
443 /*
444 * using multiple indirect blocks - update the
445 * innermost one
446 */
447 idp = &indirs[num - 1];
448 if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs),
449 B_MODIFY, &ibp))
450 panic("lfs_balloc: bread bno %lld",
451 (long long)idp->in_lbn);
452 lfs_iblock_set(fs, ibp->b_data, idp->in_off, UNWRITTEN);
453#ifdef DEBUG
454 if (vp == fs->lfs_ivnode) {
455 LFS_ENTER_LOG("balloc", __FILE__,
456 __LINE__, idp->in_lbn,
457 ibp->b_flags, curproc->p_pid);
458 }
459#endif
460 VOP_BWRITE(ibp->b_vp, ibp);
461 }
462 } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) {
463 /*
464 * Not a brand new block, also not in the cache;
465 * read it in from disk.
466 */
467 if (iosize == lfs_sb_getbsize(fs))
468 /* Optimization: I/O is unnecessary. */
469 bp->b_blkno = daddr;
470 else {
471 /*
472 * We need to read the block to preserve the
473 * existing bytes.
474 */
475 bp->b_blkno = daddr;
476 bp->b_flags |= B_READ;
477 VOP_STRATEGY(vp, bp);
478 return (biowait(bp));
479 }
480 }
481
482 return (0);
483}
484
485/*
486 * Extend a file that uses fragments with more fragments.
487 *
488 * XXX: locking?
489 */
490/* VOP_BWRITE 1 time */
491static int
492lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn,
493 struct buf **bpp, kauth_cred_t cred)
494{
495 struct inode *ip;
496 struct lfs *fs;
497 long frags;
498 int error;
499 size_t obufsize;
500
501 /* XXX move this to a header file */
502 /* (XXX: except it's not clear what purpose it serves) */
503 extern long locked_queue_bytes;
504
505 ip = VTOI(vp);
506 fs = ip->i_lfs;
507
508 /*
509 * XXX: is there some reason we know more about the seglock
510 * state here than at the top of lfs_balloc?
511 */
512 ASSERT_NO_SEGLOCK(fs);
513
514 /* number of frags we're adding */
515 frags = (long)lfs_numfrags(fs, nsize - osize);
516
517 error = 0;
518
519 /*
520 * Get the seglock so we don't enlarge blocks while a segment
521 * is being written. If we're called with bpp==NULL, though,
522 * we are only pretending to change a buffer, so we don't have to
523 * lock.
524 *
525 * XXX: the above comment is lying, as fs->lfs_fraglock is not
526 * the segment lock.
527 */
528 top:
529 if (bpp) {
530 rw_enter(&fs->lfs_fraglock, RW_READER);
531 LFS_DEBUG_COUNTLOCKED("frag");
532 }
533
534 /* check if we actually have enough frags available */
535 if (!ISSPACE(fs, frags, cred)) {
536 error = ENOSPC;
537 goto out;
538 }
539
540 /*
541 * If we are not asked to actually return the block, all we need
542 * to do is allocate space for it. UBC will handle dirtying the
543 * appropriate things and making sure it all goes to disk.
544 * Don't bother to read in that case.
545 */
546 if (bpp && (error = bread(vp, lbn, osize, 0, bpp))) {
547 goto out;
548 }
549#if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
550 if ((error = lfs_chkdq(ip, frags, cred, 0))) {
551 if (bpp)
552 brelse(*bpp, 0);
553 goto out;
554 }
555#endif
556 /*
557 * Adjust accounting for lfs_avail. If there's not enough room,
558 * we will have to wait for the cleaner, which we can't do while
559 * holding a block busy or while holding the seglock. In that case,
560 * release both and start over after waiting.
561 */
562
563 if (bpp && ((*bpp)->b_oflags & BO_DELWRI)) {
564 if (!lfs_fits(fs, frags)) {
565 if (bpp)
566 brelse(*bpp, 0);
567#if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
568 lfs_chkdq(ip, -frags, cred, 0);
569#endif
570 rw_exit(&fs->lfs_fraglock);
571 lfs_availwait(fs, frags);
572 goto top;
573 }
574 lfs_sb_subavail(fs, frags);
575 }
576
577 /* decrease the free block count in the superblock */
578 mutex_enter(&lfs_lock);
579 lfs_sb_subbfree(fs, frags);
580 mutex_exit(&lfs_lock);
581 /* increase the file's effective block count */
582 ip->i_lfs_effnblks += frags;
583 /* mark the inode dirty */
584 ip->i_flag |= IN_CHANGE | IN_UPDATE;
585
586 if (bpp) {
587 obufsize = (*bpp)->b_bufsize;
588 allocbuf(*bpp, nsize, 1);
589
590 /* Adjust locked-list accounting */
591 if (((*bpp)->b_flags & B_LOCKED) != 0 &&
592 (*bpp)->b_iodone == NULL) {
593 mutex_enter(&lfs_lock);
594 locked_queue_bytes += (*bpp)->b_bufsize - obufsize;
595 mutex_exit(&lfs_lock);
596 }
597
598 /* zero the new space */
599 memset((char *)((*bpp)->b_data) + osize, 0, (u_int)(nsize - osize));
600 }
601
602 out:
603 if (bpp) {
604 rw_exit(&fs->lfs_fraglock);
605 }
606 return (error);
607}
608
609static inline int
610lge(struct lbnentry *a, struct lbnentry *b)
611{
612 return a->lbn - b->lbn;
613}
614
615SPLAY_PROTOTYPE(lfs_splay, lbnentry, entry, lge);
616
617SPLAY_GENERATE(lfs_splay, lbnentry, entry, lge);
618
619/*
620 * Record this lbn as being "write pending". We used to have this information
621 * on the buffer headers, but since pages don't have buffer headers we
622 * record it here instead.
623 */
624void
625lfs_register_block(struct vnode *vp, daddr_t lbn)
626{
627 struct lfs *fs;
628 struct inode *ip;
629 struct lbnentry *lbp;
630
631 ip = VTOI(vp);
632
633 /* Don't count metadata */
634 if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM)
635 return;
636
637 fs = ip->i_lfs;
638
639 ASSERT_NO_SEGLOCK(fs);
640
641 /* If no space, wait for the cleaner */
642 lfs_availwait(fs, lfs_btofsb(fs, 1 << lfs_sb_getbshift(fs)));
643
644 lbp = (struct lbnentry *)pool_get(&lfs_lbnentry_pool, PR_WAITOK);
645 lbp->lbn = lbn;
646 mutex_enter(&lfs_lock);
647 if (SPLAY_INSERT(lfs_splay, &ip->i_lfs_lbtree, lbp) != NULL) {
648 mutex_exit(&lfs_lock);
649 /* Already there */
650 pool_put(&lfs_lbnentry_pool, lbp);
651 return;
652 }
653
654 ++ip->i_lfs_nbtree;
655 fs->lfs_favail += lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs)));
656 fs->lfs_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT;
657 ++locked_fakequeue_count;
658 lfs_subsys_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT;
659 mutex_exit(&lfs_lock);
660}
661
662static void
663lfs_do_deregister(struct lfs *fs, struct inode *ip, struct lbnentry *lbp)
664{
665 ASSERT_MAYBE_SEGLOCK(fs);
666
667 mutex_enter(&lfs_lock);
668 --ip->i_lfs_nbtree;
669 SPLAY_REMOVE(lfs_splay, &ip->i_lfs_lbtree, lbp);
670 if (fs->lfs_favail > lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs))))
671 fs->lfs_favail -= lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs)));
672 fs->lfs_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT;
673 if (locked_fakequeue_count > 0)
674 --locked_fakequeue_count;
675 lfs_subsys_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT;
676 mutex_exit(&lfs_lock);
677
678 pool_put(&lfs_lbnentry_pool, lbp);
679}
680
681void
682lfs_deregister_block(struct vnode *vp, daddr_t lbn)
683{
684 struct lfs *fs;
685 struct inode *ip;
686 struct lbnentry *lbp;
687 struct lbnentry tmp;
688
689 ip = VTOI(vp);
690
691 /* Don't count metadata */
692 if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM)
693 return;
694
695 fs = ip->i_lfs;
696 tmp.lbn = lbn;
697 lbp = SPLAY_FIND(lfs_splay, &ip->i_lfs_lbtree, &tmp);
698 if (lbp == NULL)
699 return;
700
701 lfs_do_deregister(fs, ip, lbp);
702}
703
704void
705lfs_deregister_all(struct vnode *vp)
706{
707 struct lbnentry *lbp, *nlbp;
708 struct lfs_splay *hd;
709 struct lfs *fs;
710 struct inode *ip;
711
712 ip = VTOI(vp);
713 fs = ip->i_lfs;
714 hd = &ip->i_lfs_lbtree;
715
716 for (lbp = SPLAY_MIN(lfs_splay, hd); lbp != NULL; lbp = nlbp) {
717 nlbp = SPLAY_NEXT(lfs_splay, hd, lbp);
718 lfs_do_deregister(fs, ip, lbp);
719 }
720}
721