1 | /* $NetBSD: lfs_alloc.c,v 1.133 2016/08/07 05:09:12 dholland Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Konrad E. Schroder <perseant@hhhh.org>. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | /* |
32 | * Copyright (c) 1991, 1993 |
33 | * The Regents of the University of California. All rights reserved. |
34 | * |
35 | * Redistribution and use in source and binary forms, with or without |
36 | * modification, are permitted provided that the following conditions |
37 | * are met: |
38 | * 1. Redistributions of source code must retain the above copyright |
39 | * notice, this list of conditions and the following disclaimer. |
40 | * 2. Redistributions in binary form must reproduce the above copyright |
41 | * notice, this list of conditions and the following disclaimer in the |
42 | * documentation and/or other materials provided with the distribution. |
43 | * 3. Neither the name of the University nor the names of its contributors |
44 | * may be used to endorse or promote products derived from this software |
45 | * without specific prior written permission. |
46 | * |
47 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
48 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
49 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
50 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
51 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
52 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
53 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
54 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
55 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
56 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
57 | * SUCH DAMAGE. |
58 | * |
59 | * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94 |
60 | */ |
61 | |
62 | #include <sys/cdefs.h> |
63 | __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.133 2016/08/07 05:09:12 dholland Exp $" ); |
64 | |
65 | #if defined(_KERNEL_OPT) |
66 | #include "opt_quota.h" |
67 | #endif |
68 | |
69 | #include <sys/param.h> |
70 | #include <sys/systm.h> |
71 | #include <sys/kernel.h> |
72 | #include <sys/buf.h> |
73 | #include <sys/lock.h> |
74 | #include <sys/vnode.h> |
75 | #include <sys/syslog.h> |
76 | #include <sys/mount.h> |
77 | #include <sys/malloc.h> |
78 | #include <sys/pool.h> |
79 | #include <sys/proc.h> |
80 | #include <sys/kauth.h> |
81 | |
82 | #include <ufs/lfs/ulfs_quotacommon.h> |
83 | #include <ufs/lfs/ulfs_inode.h> |
84 | #include <ufs/lfs/ulfsmount.h> |
85 | #include <ufs/lfs/ulfs_extern.h> |
86 | |
87 | #include <ufs/lfs/lfs.h> |
88 | #include <ufs/lfs/lfs_accessors.h> |
89 | #include <ufs/lfs/lfs_extern.h> |
90 | #include <ufs/lfs/lfs_kernel.h> |
91 | |
92 | /* Constants for inode free bitmap */ |
93 | #define BMSHIFT 5 /* 2 ** 5 = 32 */ |
94 | #define BMMASK ((1 << BMSHIFT) - 1) |
95 | #define SET_BITMAP_FREE(F, I) do { \ |
96 | DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), \ |
97 | (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \ |
98 | (F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1 << ((I) & BMMASK)); \ |
99 | } while (0) |
100 | #define CLR_BITMAP_FREE(F, I) do { \ |
101 | DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), \ |
102 | (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \ |
103 | (F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1 << ((I) & BMMASK)); \ |
104 | } while(0) |
105 | |
106 | #define ISSET_BITMAP_FREE(F, I) \ |
107 | ((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1 << ((I) & BMMASK))) |
108 | |
109 | /* |
110 | * Add a new block to the Ifile, to accommodate future file creations. |
111 | * Called with the segment lock held. |
112 | */ |
113 | int |
114 | lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred) |
115 | { |
116 | struct vnode *vp; |
117 | struct inode *ip; |
118 | IFILE64 *ifp64; |
119 | IFILE32 *ifp32; |
120 | IFILE_V1 *ifp_v1; |
121 | struct buf *bp, *cbp; |
122 | int error; |
123 | daddr_t i, blkno, xmax; |
124 | ino_t oldlast, maxino; |
125 | CLEANERINFO *cip; |
126 | |
127 | ASSERT_SEGLOCK(fs); |
128 | |
129 | /* XXX should check or assert that we aren't readonly. */ |
130 | |
131 | /* |
132 | * Get a block and extend the ifile inode. Leave the buffer for |
133 | * the block in bp. |
134 | */ |
135 | |
136 | vp = fs->lfs_ivnode; |
137 | ip = VTOI(vp); |
138 | blkno = lfs_lblkno(fs, ip->i_size); |
139 | if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0, |
140 | &bp)) != 0) { |
141 | return (error); |
142 | } |
143 | ip->i_size += lfs_sb_getbsize(fs); |
144 | lfs_dino_setsize(fs, ip->i_din, ip->i_size); |
145 | uvm_vnp_setsize(vp, ip->i_size); |
146 | |
147 | /* |
148 | * Compute the new number of inodes, and reallocate the in-memory |
149 | * inode freemap. |
150 | */ |
151 | |
152 | maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) - |
153 | lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs); |
154 | fs->lfs_ino_bitmap = (lfs_bm_t *) |
155 | realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) * |
156 | sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK); |
157 | KASSERT(fs->lfs_ino_bitmap != NULL); |
158 | |
159 | /* first new inode number */ |
160 | i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) * |
161 | lfs_sb_getifpb(fs); |
162 | |
163 | /* |
164 | * We insert the new inodes at the head of the free list. |
165 | * Under normal circumstances, the free list is empty here, |
166 | * so we are also incidentally placing them at the end (which |
167 | * we must do if we are to keep them in order). |
168 | */ |
169 | LFS_GET_HEADFREE(fs, cip, cbp, &oldlast); |
170 | LFS_PUT_HEADFREE(fs, cip, cbp, i); |
171 | #ifdef DIAGNOSTIC |
172 | if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) |
173 | panic("inode 0 allocated [2]" ); |
174 | #endif /* DIAGNOSTIC */ |
175 | |
176 | /* inode number to stop at (XXX: why *x*max?) */ |
177 | xmax = i + lfs_sb_getifpb(fs); |
178 | |
179 | /* |
180 | * Initialize the ifile block. |
181 | * |
182 | * XXX: these loops should be restructured to use the accessor |
183 | * functions instead of using cutpaste polymorphism. |
184 | */ |
185 | |
186 | if (fs->lfs_is64) { |
187 | for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) { |
188 | SET_BITMAP_FREE(fs, i); |
189 | ifp64->if_version = 1; |
190 | ifp64->if_daddr = LFS_UNUSED_DADDR; |
191 | ifp64->if_nextfree = ++i; |
192 | } |
193 | ifp64--; |
194 | ifp64->if_nextfree = oldlast; |
195 | } else if (lfs_sb_getversion(fs) > 1) { |
196 | for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) { |
197 | SET_BITMAP_FREE(fs, i); |
198 | ifp32->if_version = 1; |
199 | ifp32->if_daddr = LFS_UNUSED_DADDR; |
200 | ifp32->if_nextfree = ++i; |
201 | } |
202 | ifp32--; |
203 | ifp32->if_nextfree = oldlast; |
204 | } else { |
205 | for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) { |
206 | SET_BITMAP_FREE(fs, i); |
207 | ifp_v1->if_version = 1; |
208 | ifp_v1->if_daddr = LFS_UNUSED_DADDR; |
209 | ifp_v1->if_nextfree = ++i; |
210 | } |
211 | ifp_v1--; |
212 | ifp_v1->if_nextfree = oldlast; |
213 | } |
214 | LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1); |
215 | |
216 | /* |
217 | * Write out the new block. |
218 | */ |
219 | |
220 | (void) LFS_BWRITE_LOG(bp); /* Ifile */ |
221 | |
222 | return 0; |
223 | } |
224 | |
225 | /* |
226 | * Allocate an inode for a new file. |
227 | * |
228 | * Takes the segment lock. Also (while holding it) takes lfs_lock |
229 | * to frob fs->lfs_fmod. |
230 | * |
231 | * XXX: the mode argument is unused; should just get rid of it. |
232 | */ |
233 | /* ARGSUSED */ |
234 | /* VOP_BWRITE 2i times */ |
235 | int |
236 | lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, |
237 | ino_t *ino, int *gen) |
238 | { |
239 | struct lfs *fs; |
240 | struct buf *bp, *cbp; |
241 | IFILE *ifp; |
242 | int error; |
243 | CLEANERINFO *cip; |
244 | |
245 | fs = VTOI(pvp)->i_lfs; |
246 | if (fs->lfs_ronly) |
247 | return EROFS; |
248 | |
249 | ASSERT_NO_SEGLOCK(fs); |
250 | |
251 | lfs_seglock(fs, SEGM_PROT); |
252 | |
253 | /* Get the head of the freelist. */ |
254 | LFS_GET_HEADFREE(fs, cip, cbp, ino); |
255 | |
256 | /* paranoia */ |
257 | KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM); |
258 | DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n" , |
259 | *ino)); |
260 | |
261 | /* Update the in-memory inode freemap */ |
262 | CLR_BITMAP_FREE(fs, *ino); |
263 | |
264 | /* |
265 | * Fetch the ifile entry and make sure the inode is really |
266 | * free. |
267 | */ |
268 | LFS_IENTRY(ifp, fs, *ino, bp); |
269 | if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR) |
270 | panic("lfs_valloc: inuse inode %" PRId64 " on the free list" , |
271 | *ino); |
272 | |
273 | /* Update the inode freelist head in the superblock. */ |
274 | LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp)); |
275 | DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n" , |
276 | *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp))); |
277 | |
278 | /* |
279 | * Retrieve the version number from the ifile entry. It was |
280 | * bumped by vfree, so don't bump it again. |
281 | */ |
282 | *gen = lfs_if_getversion(fs, ifp); |
283 | |
284 | /* Done with ifile entry */ |
285 | brelse(bp, 0); |
286 | |
287 | if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) { |
288 | /* |
289 | * No more inodes; extend the ifile so that the next |
290 | * lfs_valloc will succeed. |
291 | */ |
292 | if ((error = lfs_extend_ifile(fs, cred)) != 0) { |
293 | /* restore the freelist */ |
294 | LFS_PUT_HEADFREE(fs, cip, cbp, *ino); |
295 | |
296 | /* unlock and return */ |
297 | lfs_segunlock(fs); |
298 | return error; |
299 | } |
300 | } |
301 | #ifdef DIAGNOSTIC |
302 | if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) |
303 | panic("inode 0 allocated [3]" ); |
304 | #endif /* DIAGNOSTIC */ |
305 | |
306 | /* Set superblock modified bit */ |
307 | mutex_enter(&lfs_lock); |
308 | fs->lfs_fmod = 1; |
309 | mutex_exit(&lfs_lock); |
310 | |
311 | /* increment file count */ |
312 | lfs_sb_addnfiles(fs, 1); |
313 | |
314 | /* done */ |
315 | lfs_segunlock(fs); |
316 | return 0; |
317 | } |
318 | |
319 | /* |
320 | * Allocate an inode for a new file, with given inode number and |
321 | * version. |
322 | * |
323 | * Called in the same context as lfs_valloc and therefore shares the |
324 | * same locking assumptions. |
325 | * |
326 | * XXX: WHICH MEANS IT OUGHT TO TAKE THE SEGLOCK WHILE FROBBING THIS |
327 | * XXX: STUFF. REALLY. |
328 | */ |
329 | int |
330 | lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers) |
331 | { |
332 | IFILE *ifp; |
333 | struct buf *bp, *cbp; |
334 | ino_t headino, thisino, oldnext; |
335 | CLEANERINFO *cip; |
336 | |
337 | /* XXX: check for readonly */ |
338 | /* XXX: assert no seglock */ |
339 | /* XXX: should take seglock (as noted above) */ |
340 | |
341 | /* |
342 | * If the ifile is too short to contain this inum, extend it. |
343 | * |
344 | * XXX: lfs_extend_ifile should take a size instead of always |
345 | * doing just one block at time. |
346 | */ |
347 | while (VTOI(fs->lfs_ivnode)->i_size <= (ino / |
348 | lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs)) |
349 | << lfs_sb_getbshift(fs)) { |
350 | lfs_extend_ifile(fs, NOCRED); |
351 | } |
352 | |
353 | /* |
354 | * fetch the ifile entry; get the inode freelist next pointer, |
355 | * and set the version as directed. |
356 | */ |
357 | LFS_IENTRY(ifp, fs, ino, bp); |
358 | oldnext = lfs_if_getnextfree(fs, ifp); |
359 | lfs_if_setversion(fs, ifp, vers); |
360 | brelse(bp, 0); |
361 | |
362 | /* Get head of inode freelist */ |
363 | LFS_GET_HEADFREE(fs, cip, cbp, &headino); |
364 | if (headino == ino) { |
365 | /* Easy case: the inode we wanted was at the head */ |
366 | LFS_PUT_HEADFREE(fs, cip, cbp, oldnext); |
367 | } else { |
368 | ino_t nextfree; |
369 | |
370 | /* Have to find the desired inode in the freelist... */ |
371 | |
372 | thisino = headino; |
373 | while (1) { |
374 | /* read this ifile entry */ |
375 | LFS_IENTRY(ifp, fs, thisino, bp); |
376 | nextfree = lfs_if_getnextfree(fs, ifp); |
377 | /* stop if we find it or we hit the end */ |
378 | if (nextfree == ino || |
379 | nextfree == LFS_UNUSED_INUM) |
380 | break; |
381 | /* nope, keep going... */ |
382 | thisino = nextfree; |
383 | brelse(bp, 0); |
384 | } |
385 | if (nextfree == LFS_UNUSED_INUM) { |
386 | /* hit the end -- this inode is not available */ |
387 | brelse(bp, 0); |
388 | /* XXX release seglock (see above) */ |
389 | return ENOENT; |
390 | } |
391 | /* found it; update the next pointer */ |
392 | lfs_if_setnextfree(fs, ifp, oldnext); |
393 | /* write the ifile block */ |
394 | LFS_BWRITE_LOG(bp); |
395 | } |
396 | |
397 | /* done */ |
398 | /* XXX release seglock (see above) */ |
399 | return 0; |
400 | } |
401 | |
402 | #if 0 |
403 | /* |
404 | * Find the highest-numbered allocated inode. |
405 | * This will be used to shrink the Ifile. |
406 | */ |
407 | static inline ino_t |
408 | lfs_last_alloc_ino(struct lfs *fs) |
409 | { |
410 | ino_t ino, maxino; |
411 | |
412 | maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) - |
413 | lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * |
414 | lfs_sb_getifpb(fs); |
415 | for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) { |
416 | if (ISSET_BITMAP_FREE(fs, ino) == 0) |
417 | break; |
418 | } |
419 | return ino; |
420 | } |
421 | #endif |
422 | |
423 | /* |
424 | * Find the previous (next lowest numbered) free inode, if any. |
425 | * If there is none, return LFS_UNUSED_INUM. |
426 | * |
427 | * XXX: locking? |
428 | */ |
429 | static inline ino_t |
430 | lfs_freelist_prev(struct lfs *fs, ino_t ino) |
431 | { |
432 | ino_t tino, bound, bb, freehdbb; |
433 | |
434 | if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) { |
435 | /* No free inodes at all */ |
436 | return LFS_UNUSED_INUM; |
437 | } |
438 | |
439 | /* Search our own word first */ |
440 | bound = ino & ~BMMASK; |
441 | for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--) |
442 | if (ISSET_BITMAP_FREE(fs, tino)) |
443 | return tino; |
444 | /* If there are no lower words to search, just return */ |
445 | if (ino >> BMSHIFT == 0) |
446 | return LFS_UNUSED_INUM; |
447 | |
448 | /* |
449 | * Find a word with a free inode in it. We have to be a bit |
450 | * careful here since ino_t is unsigned. |
451 | */ |
452 | freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT); |
453 | for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb) |
454 | if (fs->lfs_ino_bitmap[bb]) |
455 | break; |
456 | if (fs->lfs_ino_bitmap[bb] == 0) |
457 | return LFS_UNUSED_INUM; |
458 | |
459 | /* Search the word we found */ |
460 | for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) && |
461 | tino > LFS_UNUSED_INUM; tino--) |
462 | if (ISSET_BITMAP_FREE(fs, tino)) |
463 | break; |
464 | |
465 | /* Avoid returning reserved inode numbers */ |
466 | if (tino <= LFS_IFILE_INUM) |
467 | tino = LFS_UNUSED_INUM; |
468 | |
469 | return tino; |
470 | } |
471 | |
472 | /* |
473 | * Free an inode. |
474 | * |
475 | * Takes lfs_seglock. Also (independently) takes vp->v_interlock. |
476 | */ |
477 | /* ARGUSED */ |
478 | /* VOP_BWRITE 2i times */ |
479 | int |
480 | lfs_vfree(struct vnode *vp, ino_t ino, int mode) |
481 | { |
482 | SEGUSE *sup; |
483 | CLEANERINFO *cip; |
484 | struct buf *cbp, *bp; |
485 | IFILE *ifp; |
486 | struct inode *ip; |
487 | struct lfs *fs; |
488 | daddr_t old_iaddr; |
489 | ino_t otail; |
490 | |
491 | /* Get the inode number and file system. */ |
492 | ip = VTOI(vp); |
493 | fs = ip->i_lfs; |
494 | ino = ip->i_number; |
495 | |
496 | /* XXX: assert not readonly */ |
497 | |
498 | ASSERT_NO_SEGLOCK(fs); |
499 | DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n" , (long long)ino)); |
500 | |
501 | /* Drain of pending writes */ |
502 | mutex_enter(vp->v_interlock); |
503 | while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) { |
504 | cv_wait(&vp->v_cv, vp->v_interlock); |
505 | } |
506 | mutex_exit(vp->v_interlock); |
507 | |
508 | lfs_seglock(fs, SEGM_PROT); |
509 | |
510 | /* |
511 | * If the inode was in a dirop, it isn't now. |
512 | * |
513 | * XXX: why are (v_uflag & VU_DIROP) and (ip->i_flag & IN_ADIROP) |
514 | * not updated together in one function? (and why do both exist, |
515 | * anyway?) |
516 | */ |
517 | lfs_unmark_vnode(vp); |
518 | |
519 | mutex_enter(&lfs_lock); |
520 | if (vp->v_uflag & VU_DIROP) { |
521 | vp->v_uflag &= ~VU_DIROP; |
522 | --lfs_dirvcount; |
523 | --fs->lfs_dirvcount; |
524 | TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); |
525 | wakeup(&fs->lfs_dirvcount); |
526 | wakeup(&lfs_dirvcount); |
527 | mutex_exit(&lfs_lock); |
528 | vrele(vp); |
529 | |
530 | /* |
531 | * If this inode is not going to be written any more, any |
532 | * segment accounting left over from its truncation needs |
533 | * to occur at the end of the next dirops flush. Attach |
534 | * them to the fs-wide list for that purpose. |
535 | */ |
536 | if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) { |
537 | struct segdelta *sd; |
538 | |
539 | while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) { |
540 | LIST_REMOVE(sd, list); |
541 | LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list); |
542 | } |
543 | } |
544 | } else { |
545 | /* |
546 | * If it's not a dirop, we can finalize right away. |
547 | */ |
548 | mutex_exit(&lfs_lock); |
549 | lfs_finalize_ino_seguse(fs, ip); |
550 | } |
551 | |
552 | /* it is no longer an unwritten inode, so update the counts */ |
553 | mutex_enter(&lfs_lock); |
554 | LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED); |
555 | mutex_exit(&lfs_lock); |
556 | |
557 | /* Turn off all inode modification flags */ |
558 | ip->i_flag &= ~IN_ALLMOD; |
559 | |
560 | /* Mark it deleted */ |
561 | ip->i_lfs_iflags |= LFSI_DELETED; |
562 | |
563 | /* Mark it free in the in-memory inode freemap */ |
564 | SET_BITMAP_FREE(fs, ino); |
565 | |
566 | /* |
567 | * Set the ifile's inode entry to unused, increment its version number |
568 | * and link it onto the free chain. |
569 | */ |
570 | |
571 | /* fetch the ifile entry */ |
572 | LFS_IENTRY(ifp, fs, ino, bp); |
573 | |
574 | /* update the on-disk address (to "nowhere") */ |
575 | old_iaddr = lfs_if_getdaddr(fs, ifp); |
576 | lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR); |
577 | |
578 | /* bump the version */ |
579 | lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1); |
580 | |
581 | if (lfs_sb_getversion(fs) == 1) { |
582 | ino_t nextfree; |
583 | |
584 | /* insert on freelist */ |
585 | LFS_GET_HEADFREE(fs, cip, cbp, &nextfree); |
586 | lfs_if_setnextfree(fs, ifp, nextfree); |
587 | LFS_PUT_HEADFREE(fs, cip, cbp, ino); |
588 | |
589 | /* write the ifile block */ |
590 | (void) LFS_BWRITE_LOG(bp); /* Ifile */ |
591 | } else { |
592 | ino_t tino, onf; |
593 | |
594 | /* |
595 | * Clear the freelist next pointer and write the ifile |
596 | * block. XXX: why? I'm sure there must be a reason but |
597 | * it seems both silly and dangerous. |
598 | */ |
599 | lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM); |
600 | (void) LFS_BWRITE_LOG(bp); /* Ifile */ |
601 | |
602 | /* |
603 | * Insert on freelist in order. |
604 | */ |
605 | |
606 | /* Find the next lower (by number) free inode */ |
607 | tino = lfs_freelist_prev(fs, ino); |
608 | |
609 | if (tino == LFS_UNUSED_INUM) { |
610 | ino_t nextfree; |
611 | |
612 | /* |
613 | * There isn't one; put us on the freelist head. |
614 | */ |
615 | |
616 | /* reload the ifile block */ |
617 | LFS_IENTRY(ifp, fs, ino, bp); |
618 | /* update the list */ |
619 | LFS_GET_HEADFREE(fs, cip, cbp, &nextfree); |
620 | lfs_if_setnextfree(fs, ifp, nextfree); |
621 | LFS_PUT_HEADFREE(fs, cip, cbp, ino); |
622 | DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n" , |
623 | (long long)nextfree, (long long)ino)); |
624 | /* write the ifile block */ |
625 | LFS_BWRITE_LOG(bp); /* Ifile */ |
626 | |
627 | /* If the list was empty, set tail too */ |
628 | LFS_GET_TAILFREE(fs, cip, cbp, &otail); |
629 | if (otail == LFS_UNUSED_INUM) { |
630 | LFS_PUT_TAILFREE(fs, cip, cbp, ino); |
631 | DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld " |
632 | "-> %lld\n" , (long long)otail, |
633 | (long long)ino)); |
634 | } |
635 | } else { |
636 | /* |
637 | * Insert this inode into the list after tino. |
638 | * We hold the segment lock so we don't have to |
639 | * worry about blocks being written out of order. |
640 | */ |
641 | |
642 | DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld " |
643 | " after %lld\n" , ino, tino)); |
644 | |
645 | /* load the previous inode's ifile block */ |
646 | LFS_IENTRY(ifp, fs, tino, bp); |
647 | /* update the list pointer */ |
648 | onf = lfs_if_getnextfree(fs, ifp); |
649 | lfs_if_setnextfree(fs, ifp, ino); |
650 | /* write the block */ |
651 | LFS_BWRITE_LOG(bp); /* Ifile */ |
652 | |
653 | /* load this inode's ifile block */ |
654 | LFS_IENTRY(ifp, fs, ino, bp); |
655 | /* update the list pointer */ |
656 | lfs_if_setnextfree(fs, ifp, onf); |
657 | /* write the block */ |
658 | LFS_BWRITE_LOG(bp); /* Ifile */ |
659 | |
660 | /* If we're last, put us on the tail */ |
661 | if (onf == LFS_UNUSED_INUM) { |
662 | LFS_GET_TAILFREE(fs, cip, cbp, &otail); |
663 | LFS_PUT_TAILFREE(fs, cip, cbp, ino); |
664 | DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld " |
665 | "-> %lld\n" , (long long)otail, |
666 | (long long)ino)); |
667 | } |
668 | } |
669 | } |
670 | #ifdef DIAGNOSTIC |
671 | /* XXX: shouldn't this check be further up *before* we trash the fs? */ |
672 | if (ino == LFS_UNUSED_INUM) { |
673 | panic("inode 0 freed" ); |
674 | } |
675 | #endif /* DIAGNOSTIC */ |
676 | |
677 | /* |
678 | * Update the segment summary for the segment where the on-disk |
679 | * copy used to be. |
680 | */ |
681 | if (old_iaddr != LFS_UNUSED_DADDR) { |
682 | /* load it */ |
683 | LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); |
684 | #ifdef DIAGNOSTIC |
685 | /* the number of bytes in the segment should not become < 0 */ |
686 | if (sup->su_nbytes < DINOSIZE(fs)) { |
687 | printf("lfs_vfree: negative byte count" |
688 | " (segment %" PRIu32 " short by %d)\n" , |
689 | lfs_dtosn(fs, old_iaddr), |
690 | (int)DINOSIZE(fs) - |
691 | sup->su_nbytes); |
692 | panic("lfs_vfree: negative byte count" ); |
693 | sup->su_nbytes = DINOSIZE(fs); |
694 | } |
695 | #endif |
696 | /* update the number of bytes in the segment */ |
697 | sup->su_nbytes -= DINOSIZE(fs); |
698 | /* write the segment entry */ |
699 | LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */ |
700 | } |
701 | |
702 | /* Set superblock modified bit. */ |
703 | mutex_enter(&lfs_lock); |
704 | fs->lfs_fmod = 1; |
705 | mutex_exit(&lfs_lock); |
706 | |
707 | /* Decrement file count. */ |
708 | lfs_sb_subnfiles(fs, 1); |
709 | |
710 | lfs_segunlock(fs); |
711 | |
712 | return (0); |
713 | } |
714 | |
715 | /* |
716 | * Sort the freelist and set up the free-inode bitmap. |
717 | * To be called by lfs_mountfs(). |
718 | * |
719 | * Takes the segmenet lock. |
720 | */ |
721 | void |
722 | lfs_order_freelist(struct lfs *fs) |
723 | { |
724 | CLEANERINFO *cip; |
725 | IFILE *ifp = NULL; |
726 | struct buf *bp; |
727 | ino_t ino, firstino, lastino, maxino; |
728 | #ifdef notyet |
729 | struct vnode *vp; |
730 | #endif |
731 | |
732 | ASSERT_NO_SEGLOCK(fs); |
733 | lfs_seglock(fs, SEGM_PROT); |
734 | |
735 | /* largest inode on fs */ |
736 | maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) - |
737 | lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs); |
738 | |
739 | /* allocate the in-memory inode freemap */ |
740 | /* XXX: assert that fs->lfs_ino_bitmap is null here */ |
741 | fs->lfs_ino_bitmap = |
742 | malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t), |
743 | M_SEGMENT, M_WAITOK | M_ZERO); |
744 | KASSERT(fs->lfs_ino_bitmap != NULL); |
745 | |
746 | /* |
747 | * Scan the ifile. |
748 | */ |
749 | |
750 | firstino = lastino = LFS_UNUSED_INUM; |
751 | for (ino = 0; ino < maxino; ino++) { |
752 | /* Load this inode's ifile entry. */ |
753 | if (ino % lfs_sb_getifpb(fs) == 0) |
754 | LFS_IENTRY(ifp, fs, ino, bp); |
755 | else |
756 | LFS_IENTRY_NEXT(ifp, fs); |
757 | |
758 | /* Don't put zero or ifile on the free list */ |
759 | if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM) |
760 | continue; |
761 | |
762 | #ifdef notyet |
763 | /* |
764 | * Address orphaned files. |
765 | * |
766 | * The idea of this is to free inodes belonging to |
767 | * files that were unlinked but not reclaimed, I guess |
768 | * because if we're going to scan the whole ifile |
769 | * anyway it costs very little to do this. I don't |
770 | * immediately see any reason this should be disabled, |
771 | * but presumably it doesn't work... not sure what |
772 | * happens to such files currently. -- dholland 20160806 |
773 | */ |
774 | if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE && |
775 | VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp) == 0) { |
776 | unsigned segno; |
777 | |
778 | /* get the segment the inode in on disk */ |
779 | segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp)); |
780 | |
781 | /* truncate the inode */ |
782 | lfs_truncate(vp, 0, 0, NOCRED); |
783 | vput(vp); |
784 | |
785 | /* load the segment summary */ |
786 | LFS_SEGENTRY(sup, fs, segno, bp); |
787 | /* update the number of bytes in the segment */ |
788 | KASSERT(sup->su_nbytes >= DINOSIZE(fs)); |
789 | sup->su_nbytes -= DINOSIZE(fs); |
790 | /* write the segment summary */ |
791 | LFS_WRITESEGENTRY(sup, fs, segno, bp); |
792 | |
793 | /* Drop the on-disk address */ |
794 | lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR); |
795 | /* write the ifile entry */ |
796 | LFS_BWRITE_LOG(bp); |
797 | |
798 | /* |
799 | * and reload it (XXX: why? I guess |
800 | * LFS_BWRITE_LOG drops it...) |
801 | */ |
802 | LFS_IENTRY(ifp, fs, ino, bp); |
803 | |
804 | /* Fall through to next if block */ |
805 | } |
806 | #endif |
807 | |
808 | if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) { |
809 | |
810 | /* |
811 | * This inode is free. Put it on the free list. |
812 | */ |
813 | |
814 | if (firstino == LFS_UNUSED_INUM) { |
815 | /* XXX: assert lastino == LFS_UNUSED_INUM? */ |
816 | /* remember the first free inode */ |
817 | firstino = ino; |
818 | } else { |
819 | /* release this inode's ifile entry */ |
820 | brelse(bp, 0); |
821 | |
822 | /* XXX: assert lastino != LFS_UNUSED_INUM? */ |
823 | |
824 | /* load lastino's ifile entry */ |
825 | LFS_IENTRY(ifp, fs, lastino, bp); |
826 | /* set the list pointer */ |
827 | lfs_if_setnextfree(fs, ifp, ino); |
828 | /* write the block */ |
829 | LFS_BWRITE_LOG(bp); |
830 | |
831 | /* reload this inode's ifile entry */ |
832 | LFS_IENTRY(ifp, fs, ino, bp); |
833 | } |
834 | /* remember the last free inode seen so far */ |
835 | lastino = ino; |
836 | |
837 | /* Mark this inode free in the in-memory freemap */ |
838 | SET_BITMAP_FREE(fs, ino); |
839 | } |
840 | |
841 | /* If moving to the next ifile block, release the buffer. */ |
842 | if ((ino + 1) % lfs_sb_getifpb(fs) == 0) |
843 | brelse(bp, 0); |
844 | } |
845 | |
846 | /* Write the freelist head and tail pointers */ |
847 | /* XXX: do we need to mark the superblock dirty? */ |
848 | LFS_PUT_HEADFREE(fs, cip, bp, firstino); |
849 | LFS_PUT_TAILFREE(fs, cip, bp, lastino); |
850 | |
851 | /* done */ |
852 | lfs_segunlock(fs); |
853 | } |
854 | |
855 | /* |
856 | * Mark a file orphaned (unlinked but not yet reclaimed) by inode |
857 | * number. Do this with a magic freelist next pointer. |
858 | * |
859 | * XXX: howzabout some locking? |
860 | */ |
861 | void |
862 | lfs_orphan(struct lfs *fs, ino_t ino) |
863 | { |
864 | IFILE *ifp; |
865 | struct buf *bp; |
866 | |
867 | LFS_IENTRY(ifp, fs, ino, bp); |
868 | lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE); |
869 | LFS_BWRITE_LOG(bp); |
870 | } |
871 | |