1/* $NetBSD: lfs_alloc.c,v 1.133 2016/08/07 05:09:12 dholland Exp $ */
2
3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31/*
32 * Copyright (c) 1991, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94
60 */
61
62#include <sys/cdefs.h>
63__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.133 2016/08/07 05:09:12 dholland Exp $");
64
65#if defined(_KERNEL_OPT)
66#include "opt_quota.h"
67#endif
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/buf.h>
73#include <sys/lock.h>
74#include <sys/vnode.h>
75#include <sys/syslog.h>
76#include <sys/mount.h>
77#include <sys/malloc.h>
78#include <sys/pool.h>
79#include <sys/proc.h>
80#include <sys/kauth.h>
81
82#include <ufs/lfs/ulfs_quotacommon.h>
83#include <ufs/lfs/ulfs_inode.h>
84#include <ufs/lfs/ulfsmount.h>
85#include <ufs/lfs/ulfs_extern.h>
86
87#include <ufs/lfs/lfs.h>
88#include <ufs/lfs/lfs_accessors.h>
89#include <ufs/lfs/lfs_extern.h>
90#include <ufs/lfs/lfs_kernel.h>
91
92/* Constants for inode free bitmap */
93#define BMSHIFT 5 /* 2 ** 5 = 32 */
94#define BMMASK ((1 << BMSHIFT) - 1)
95#define SET_BITMAP_FREE(F, I) do { \
96 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), \
97 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
98 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1 << ((I) & BMMASK)); \
99} while (0)
100#define CLR_BITMAP_FREE(F, I) do { \
101 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), \
102 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
103 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1 << ((I) & BMMASK)); \
104} while(0)
105
106#define ISSET_BITMAP_FREE(F, I) \
107 ((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1 << ((I) & BMMASK)))
108
109/*
110 * Add a new block to the Ifile, to accommodate future file creations.
111 * Called with the segment lock held.
112 */
113int
114lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
115{
116 struct vnode *vp;
117 struct inode *ip;
118 IFILE64 *ifp64;
119 IFILE32 *ifp32;
120 IFILE_V1 *ifp_v1;
121 struct buf *bp, *cbp;
122 int error;
123 daddr_t i, blkno, xmax;
124 ino_t oldlast, maxino;
125 CLEANERINFO *cip;
126
127 ASSERT_SEGLOCK(fs);
128
129 /* XXX should check or assert that we aren't readonly. */
130
131 /*
132 * Get a block and extend the ifile inode. Leave the buffer for
133 * the block in bp.
134 */
135
136 vp = fs->lfs_ivnode;
137 ip = VTOI(vp);
138 blkno = lfs_lblkno(fs, ip->i_size);
139 if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
140 &bp)) != 0) {
141 return (error);
142 }
143 ip->i_size += lfs_sb_getbsize(fs);
144 lfs_dino_setsize(fs, ip->i_din, ip->i_size);
145 uvm_vnp_setsize(vp, ip->i_size);
146
147 /*
148 * Compute the new number of inodes, and reallocate the in-memory
149 * inode freemap.
150 */
151
152 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
153 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
154 fs->lfs_ino_bitmap = (lfs_bm_t *)
155 realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
156 sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
157 KASSERT(fs->lfs_ino_bitmap != NULL);
158
159 /* first new inode number */
160 i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
161 lfs_sb_getifpb(fs);
162
163 /*
164 * We insert the new inodes at the head of the free list.
165 * Under normal circumstances, the free list is empty here,
166 * so we are also incidentally placing them at the end (which
167 * we must do if we are to keep them in order).
168 */
169 LFS_GET_HEADFREE(fs, cip, cbp, &oldlast);
170 LFS_PUT_HEADFREE(fs, cip, cbp, i);
171#ifdef DIAGNOSTIC
172 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM)
173 panic("inode 0 allocated [2]");
174#endif /* DIAGNOSTIC */
175
176 /* inode number to stop at (XXX: why *x*max?) */
177 xmax = i + lfs_sb_getifpb(fs);
178
179 /*
180 * Initialize the ifile block.
181 *
182 * XXX: these loops should be restructured to use the accessor
183 * functions instead of using cutpaste polymorphism.
184 */
185
186 if (fs->lfs_is64) {
187 for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
188 SET_BITMAP_FREE(fs, i);
189 ifp64->if_version = 1;
190 ifp64->if_daddr = LFS_UNUSED_DADDR;
191 ifp64->if_nextfree = ++i;
192 }
193 ifp64--;
194 ifp64->if_nextfree = oldlast;
195 } else if (lfs_sb_getversion(fs) > 1) {
196 for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
197 SET_BITMAP_FREE(fs, i);
198 ifp32->if_version = 1;
199 ifp32->if_daddr = LFS_UNUSED_DADDR;
200 ifp32->if_nextfree = ++i;
201 }
202 ifp32--;
203 ifp32->if_nextfree = oldlast;
204 } else {
205 for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
206 SET_BITMAP_FREE(fs, i);
207 ifp_v1->if_version = 1;
208 ifp_v1->if_daddr = LFS_UNUSED_DADDR;
209 ifp_v1->if_nextfree = ++i;
210 }
211 ifp_v1--;
212 ifp_v1->if_nextfree = oldlast;
213 }
214 LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1);
215
216 /*
217 * Write out the new block.
218 */
219
220 (void) LFS_BWRITE_LOG(bp); /* Ifile */
221
222 return 0;
223}
224
225/*
226 * Allocate an inode for a new file.
227 *
228 * Takes the segment lock. Also (while holding it) takes lfs_lock
229 * to frob fs->lfs_fmod.
230 *
231 * XXX: the mode argument is unused; should just get rid of it.
232 */
233/* ARGSUSED */
234/* VOP_BWRITE 2i times */
235int
236lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
237 ino_t *ino, int *gen)
238{
239 struct lfs *fs;
240 struct buf *bp, *cbp;
241 IFILE *ifp;
242 int error;
243 CLEANERINFO *cip;
244
245 fs = VTOI(pvp)->i_lfs;
246 if (fs->lfs_ronly)
247 return EROFS;
248
249 ASSERT_NO_SEGLOCK(fs);
250
251 lfs_seglock(fs, SEGM_PROT);
252
253 /* Get the head of the freelist. */
254 LFS_GET_HEADFREE(fs, cip, cbp, ino);
255
256 /* paranoia */
257 KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
258 DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
259 *ino));
260
261 /* Update the in-memory inode freemap */
262 CLR_BITMAP_FREE(fs, *ino);
263
264 /*
265 * Fetch the ifile entry and make sure the inode is really
266 * free.
267 */
268 LFS_IENTRY(ifp, fs, *ino, bp);
269 if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
270 panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
271 *ino);
272
273 /* Update the inode freelist head in the superblock. */
274 LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
275 DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
276 *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
277
278 /*
279 * Retrieve the version number from the ifile entry. It was
280 * bumped by vfree, so don't bump it again.
281 */
282 *gen = lfs_if_getversion(fs, ifp);
283
284 /* Done with ifile entry */
285 brelse(bp, 0);
286
287 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
288 /*
289 * No more inodes; extend the ifile so that the next
290 * lfs_valloc will succeed.
291 */
292 if ((error = lfs_extend_ifile(fs, cred)) != 0) {
293 /* restore the freelist */
294 LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
295
296 /* unlock and return */
297 lfs_segunlock(fs);
298 return error;
299 }
300 }
301#ifdef DIAGNOSTIC
302 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM)
303 panic("inode 0 allocated [3]");
304#endif /* DIAGNOSTIC */
305
306 /* Set superblock modified bit */
307 mutex_enter(&lfs_lock);
308 fs->lfs_fmod = 1;
309 mutex_exit(&lfs_lock);
310
311 /* increment file count */
312 lfs_sb_addnfiles(fs, 1);
313
314 /* done */
315 lfs_segunlock(fs);
316 return 0;
317}
318
319/*
320 * Allocate an inode for a new file, with given inode number and
321 * version.
322 *
323 * Called in the same context as lfs_valloc and therefore shares the
324 * same locking assumptions.
325 *
326 * XXX: WHICH MEANS IT OUGHT TO TAKE THE SEGLOCK WHILE FROBBING THIS
327 * XXX: STUFF. REALLY.
328 */
329int
330lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
331{
332 IFILE *ifp;
333 struct buf *bp, *cbp;
334 ino_t headino, thisino, oldnext;
335 CLEANERINFO *cip;
336
337 /* XXX: check for readonly */
338 /* XXX: assert no seglock */
339 /* XXX: should take seglock (as noted above) */
340
341 /*
342 * If the ifile is too short to contain this inum, extend it.
343 *
344 * XXX: lfs_extend_ifile should take a size instead of always
345 * doing just one block at time.
346 */
347 while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
348 lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
349 << lfs_sb_getbshift(fs)) {
350 lfs_extend_ifile(fs, NOCRED);
351 }
352
353 /*
354 * fetch the ifile entry; get the inode freelist next pointer,
355 * and set the version as directed.
356 */
357 LFS_IENTRY(ifp, fs, ino, bp);
358 oldnext = lfs_if_getnextfree(fs, ifp);
359 lfs_if_setversion(fs, ifp, vers);
360 brelse(bp, 0);
361
362 /* Get head of inode freelist */
363 LFS_GET_HEADFREE(fs, cip, cbp, &headino);
364 if (headino == ino) {
365 /* Easy case: the inode we wanted was at the head */
366 LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
367 } else {
368 ino_t nextfree;
369
370 /* Have to find the desired inode in the freelist... */
371
372 thisino = headino;
373 while (1) {
374 /* read this ifile entry */
375 LFS_IENTRY(ifp, fs, thisino, bp);
376 nextfree = lfs_if_getnextfree(fs, ifp);
377 /* stop if we find it or we hit the end */
378 if (nextfree == ino ||
379 nextfree == LFS_UNUSED_INUM)
380 break;
381 /* nope, keep going... */
382 thisino = nextfree;
383 brelse(bp, 0);
384 }
385 if (nextfree == LFS_UNUSED_INUM) {
386 /* hit the end -- this inode is not available */
387 brelse(bp, 0);
388 /* XXX release seglock (see above) */
389 return ENOENT;
390 }
391 /* found it; update the next pointer */
392 lfs_if_setnextfree(fs, ifp, oldnext);
393 /* write the ifile block */
394 LFS_BWRITE_LOG(bp);
395 }
396
397 /* done */
398 /* XXX release seglock (see above) */
399 return 0;
400}
401
402#if 0
403/*
404 * Find the highest-numbered allocated inode.
405 * This will be used to shrink the Ifile.
406 */
407static inline ino_t
408lfs_last_alloc_ino(struct lfs *fs)
409{
410 ino_t ino, maxino;
411
412 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
413 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
414 lfs_sb_getifpb(fs);
415 for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
416 if (ISSET_BITMAP_FREE(fs, ino) == 0)
417 break;
418 }
419 return ino;
420}
421#endif
422
423/*
424 * Find the previous (next lowest numbered) free inode, if any.
425 * If there is none, return LFS_UNUSED_INUM.
426 *
427 * XXX: locking?
428 */
429static inline ino_t
430lfs_freelist_prev(struct lfs *fs, ino_t ino)
431{
432 ino_t tino, bound, bb, freehdbb;
433
434 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
435 /* No free inodes at all */
436 return LFS_UNUSED_INUM;
437 }
438
439 /* Search our own word first */
440 bound = ino & ~BMMASK;
441 for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
442 if (ISSET_BITMAP_FREE(fs, tino))
443 return tino;
444 /* If there are no lower words to search, just return */
445 if (ino >> BMSHIFT == 0)
446 return LFS_UNUSED_INUM;
447
448 /*
449 * Find a word with a free inode in it. We have to be a bit
450 * careful here since ino_t is unsigned.
451 */
452 freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
453 for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
454 if (fs->lfs_ino_bitmap[bb])
455 break;
456 if (fs->lfs_ino_bitmap[bb] == 0)
457 return LFS_UNUSED_INUM;
458
459 /* Search the word we found */
460 for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
461 tino > LFS_UNUSED_INUM; tino--)
462 if (ISSET_BITMAP_FREE(fs, tino))
463 break;
464
465 /* Avoid returning reserved inode numbers */
466 if (tino <= LFS_IFILE_INUM)
467 tino = LFS_UNUSED_INUM;
468
469 return tino;
470}
471
472/*
473 * Free an inode.
474 *
475 * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
476 */
477/* ARGUSED */
478/* VOP_BWRITE 2i times */
479int
480lfs_vfree(struct vnode *vp, ino_t ino, int mode)
481{
482 SEGUSE *sup;
483 CLEANERINFO *cip;
484 struct buf *cbp, *bp;
485 IFILE *ifp;
486 struct inode *ip;
487 struct lfs *fs;
488 daddr_t old_iaddr;
489 ino_t otail;
490
491 /* Get the inode number and file system. */
492 ip = VTOI(vp);
493 fs = ip->i_lfs;
494 ino = ip->i_number;
495
496 /* XXX: assert not readonly */
497
498 ASSERT_NO_SEGLOCK(fs);
499 DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
500
501 /* Drain of pending writes */
502 mutex_enter(vp->v_interlock);
503 while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
504 cv_wait(&vp->v_cv, vp->v_interlock);
505 }
506 mutex_exit(vp->v_interlock);
507
508 lfs_seglock(fs, SEGM_PROT);
509
510 /*
511 * If the inode was in a dirop, it isn't now.
512 *
513 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_flag & IN_ADIROP)
514 * not updated together in one function? (and why do both exist,
515 * anyway?)
516 */
517 lfs_unmark_vnode(vp);
518
519 mutex_enter(&lfs_lock);
520 if (vp->v_uflag & VU_DIROP) {
521 vp->v_uflag &= ~VU_DIROP;
522 --lfs_dirvcount;
523 --fs->lfs_dirvcount;
524 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
525 wakeup(&fs->lfs_dirvcount);
526 wakeup(&lfs_dirvcount);
527 mutex_exit(&lfs_lock);
528 vrele(vp);
529
530 /*
531 * If this inode is not going to be written any more, any
532 * segment accounting left over from its truncation needs
533 * to occur at the end of the next dirops flush. Attach
534 * them to the fs-wide list for that purpose.
535 */
536 if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
537 struct segdelta *sd;
538
539 while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
540 LIST_REMOVE(sd, list);
541 LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
542 }
543 }
544 } else {
545 /*
546 * If it's not a dirop, we can finalize right away.
547 */
548 mutex_exit(&lfs_lock);
549 lfs_finalize_ino_seguse(fs, ip);
550 }
551
552 /* it is no longer an unwritten inode, so update the counts */
553 mutex_enter(&lfs_lock);
554 LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
555 mutex_exit(&lfs_lock);
556
557 /* Turn off all inode modification flags */
558 ip->i_flag &= ~IN_ALLMOD;
559
560 /* Mark it deleted */
561 ip->i_lfs_iflags |= LFSI_DELETED;
562
563 /* Mark it free in the in-memory inode freemap */
564 SET_BITMAP_FREE(fs, ino);
565
566 /*
567 * Set the ifile's inode entry to unused, increment its version number
568 * and link it onto the free chain.
569 */
570
571 /* fetch the ifile entry */
572 LFS_IENTRY(ifp, fs, ino, bp);
573
574 /* update the on-disk address (to "nowhere") */
575 old_iaddr = lfs_if_getdaddr(fs, ifp);
576 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
577
578 /* bump the version */
579 lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
580
581 if (lfs_sb_getversion(fs) == 1) {
582 ino_t nextfree;
583
584 /* insert on freelist */
585 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
586 lfs_if_setnextfree(fs, ifp, nextfree);
587 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
588
589 /* write the ifile block */
590 (void) LFS_BWRITE_LOG(bp); /* Ifile */
591 } else {
592 ino_t tino, onf;
593
594 /*
595 * Clear the freelist next pointer and write the ifile
596 * block. XXX: why? I'm sure there must be a reason but
597 * it seems both silly and dangerous.
598 */
599 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
600 (void) LFS_BWRITE_LOG(bp); /* Ifile */
601
602 /*
603 * Insert on freelist in order.
604 */
605
606 /* Find the next lower (by number) free inode */
607 tino = lfs_freelist_prev(fs, ino);
608
609 if (tino == LFS_UNUSED_INUM) {
610 ino_t nextfree;
611
612 /*
613 * There isn't one; put us on the freelist head.
614 */
615
616 /* reload the ifile block */
617 LFS_IENTRY(ifp, fs, ino, bp);
618 /* update the list */
619 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
620 lfs_if_setnextfree(fs, ifp, nextfree);
621 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
622 DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
623 (long long)nextfree, (long long)ino));
624 /* write the ifile block */
625 LFS_BWRITE_LOG(bp); /* Ifile */
626
627 /* If the list was empty, set tail too */
628 LFS_GET_TAILFREE(fs, cip, cbp, &otail);
629 if (otail == LFS_UNUSED_INUM) {
630 LFS_PUT_TAILFREE(fs, cip, cbp, ino);
631 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
632 "-> %lld\n", (long long)otail,
633 (long long)ino));
634 }
635 } else {
636 /*
637 * Insert this inode into the list after tino.
638 * We hold the segment lock so we don't have to
639 * worry about blocks being written out of order.
640 */
641
642 DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
643 " after %lld\n", ino, tino));
644
645 /* load the previous inode's ifile block */
646 LFS_IENTRY(ifp, fs, tino, bp);
647 /* update the list pointer */
648 onf = lfs_if_getnextfree(fs, ifp);
649 lfs_if_setnextfree(fs, ifp, ino);
650 /* write the block */
651 LFS_BWRITE_LOG(bp); /* Ifile */
652
653 /* load this inode's ifile block */
654 LFS_IENTRY(ifp, fs, ino, bp);
655 /* update the list pointer */
656 lfs_if_setnextfree(fs, ifp, onf);
657 /* write the block */
658 LFS_BWRITE_LOG(bp); /* Ifile */
659
660 /* If we're last, put us on the tail */
661 if (onf == LFS_UNUSED_INUM) {
662 LFS_GET_TAILFREE(fs, cip, cbp, &otail);
663 LFS_PUT_TAILFREE(fs, cip, cbp, ino);
664 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
665 "-> %lld\n", (long long)otail,
666 (long long)ino));
667 }
668 }
669 }
670#ifdef DIAGNOSTIC
671 /* XXX: shouldn't this check be further up *before* we trash the fs? */
672 if (ino == LFS_UNUSED_INUM) {
673 panic("inode 0 freed");
674 }
675#endif /* DIAGNOSTIC */
676
677 /*
678 * Update the segment summary for the segment where the on-disk
679 * copy used to be.
680 */
681 if (old_iaddr != LFS_UNUSED_DADDR) {
682 /* load it */
683 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
684#ifdef DIAGNOSTIC
685 /* the number of bytes in the segment should not become < 0 */
686 if (sup->su_nbytes < DINOSIZE(fs)) {
687 printf("lfs_vfree: negative byte count"
688 " (segment %" PRIu32 " short by %d)\n",
689 lfs_dtosn(fs, old_iaddr),
690 (int)DINOSIZE(fs) -
691 sup->su_nbytes);
692 panic("lfs_vfree: negative byte count");
693 sup->su_nbytes = DINOSIZE(fs);
694 }
695#endif
696 /* update the number of bytes in the segment */
697 sup->su_nbytes -= DINOSIZE(fs);
698 /* write the segment entry */
699 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
700 }
701
702 /* Set superblock modified bit. */
703 mutex_enter(&lfs_lock);
704 fs->lfs_fmod = 1;
705 mutex_exit(&lfs_lock);
706
707 /* Decrement file count. */
708 lfs_sb_subnfiles(fs, 1);
709
710 lfs_segunlock(fs);
711
712 return (0);
713}
714
715/*
716 * Sort the freelist and set up the free-inode bitmap.
717 * To be called by lfs_mountfs().
718 *
719 * Takes the segmenet lock.
720 */
721void
722lfs_order_freelist(struct lfs *fs)
723{
724 CLEANERINFO *cip;
725 IFILE *ifp = NULL;
726 struct buf *bp;
727 ino_t ino, firstino, lastino, maxino;
728#ifdef notyet
729 struct vnode *vp;
730#endif
731
732 ASSERT_NO_SEGLOCK(fs);
733 lfs_seglock(fs, SEGM_PROT);
734
735 /* largest inode on fs */
736 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
737 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
738
739 /* allocate the in-memory inode freemap */
740 /* XXX: assert that fs->lfs_ino_bitmap is null here */
741 fs->lfs_ino_bitmap =
742 malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
743 M_SEGMENT, M_WAITOK | M_ZERO);
744 KASSERT(fs->lfs_ino_bitmap != NULL);
745
746 /*
747 * Scan the ifile.
748 */
749
750 firstino = lastino = LFS_UNUSED_INUM;
751 for (ino = 0; ino < maxino; ino++) {
752 /* Load this inode's ifile entry. */
753 if (ino % lfs_sb_getifpb(fs) == 0)
754 LFS_IENTRY(ifp, fs, ino, bp);
755 else
756 LFS_IENTRY_NEXT(ifp, fs);
757
758 /* Don't put zero or ifile on the free list */
759 if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
760 continue;
761
762#ifdef notyet
763 /*
764 * Address orphaned files.
765 *
766 * The idea of this is to free inodes belonging to
767 * files that were unlinked but not reclaimed, I guess
768 * because if we're going to scan the whole ifile
769 * anyway it costs very little to do this. I don't
770 * immediately see any reason this should be disabled,
771 * but presumably it doesn't work... not sure what
772 * happens to such files currently. -- dholland 20160806
773 */
774 if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE &&
775 VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp) == 0) {
776 unsigned segno;
777
778 /* get the segment the inode in on disk */
779 segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
780
781 /* truncate the inode */
782 lfs_truncate(vp, 0, 0, NOCRED);
783 vput(vp);
784
785 /* load the segment summary */
786 LFS_SEGENTRY(sup, fs, segno, bp);
787 /* update the number of bytes in the segment */
788 KASSERT(sup->su_nbytes >= DINOSIZE(fs));
789 sup->su_nbytes -= DINOSIZE(fs);
790 /* write the segment summary */
791 LFS_WRITESEGENTRY(sup, fs, segno, bp);
792
793 /* Drop the on-disk address */
794 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
795 /* write the ifile entry */
796 LFS_BWRITE_LOG(bp);
797
798 /*
799 * and reload it (XXX: why? I guess
800 * LFS_BWRITE_LOG drops it...)
801 */
802 LFS_IENTRY(ifp, fs, ino, bp);
803
804 /* Fall through to next if block */
805 }
806#endif
807
808 if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
809
810 /*
811 * This inode is free. Put it on the free list.
812 */
813
814 if (firstino == LFS_UNUSED_INUM) {
815 /* XXX: assert lastino == LFS_UNUSED_INUM? */
816 /* remember the first free inode */
817 firstino = ino;
818 } else {
819 /* release this inode's ifile entry */
820 brelse(bp, 0);
821
822 /* XXX: assert lastino != LFS_UNUSED_INUM? */
823
824 /* load lastino's ifile entry */
825 LFS_IENTRY(ifp, fs, lastino, bp);
826 /* set the list pointer */
827 lfs_if_setnextfree(fs, ifp, ino);
828 /* write the block */
829 LFS_BWRITE_LOG(bp);
830
831 /* reload this inode's ifile entry */
832 LFS_IENTRY(ifp, fs, ino, bp);
833 }
834 /* remember the last free inode seen so far */
835 lastino = ino;
836
837 /* Mark this inode free in the in-memory freemap */
838 SET_BITMAP_FREE(fs, ino);
839 }
840
841 /* If moving to the next ifile block, release the buffer. */
842 if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
843 brelse(bp, 0);
844 }
845
846 /* Write the freelist head and tail pointers */
847 /* XXX: do we need to mark the superblock dirty? */
848 LFS_PUT_HEADFREE(fs, cip, bp, firstino);
849 LFS_PUT_TAILFREE(fs, cip, bp, lastino);
850
851 /* done */
852 lfs_segunlock(fs);
853}
854
855/*
856 * Mark a file orphaned (unlinked but not yet reclaimed) by inode
857 * number. Do this with a magic freelist next pointer.
858 *
859 * XXX: howzabout some locking?
860 */
861void
862lfs_orphan(struct lfs *fs, ino_t ino)
863{
864 IFILE *ifp;
865 struct buf *bp;
866
867 LFS_IENTRY(ifp, fs, ino, bp);
868 lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE);
869 LFS_BWRITE_LOG(bp);
870}
871