lfs_bio.c source code [src/src/sys/ufs/lfs/lfs_bio.c]

1	/ $NetBSD: lfs_bio.c,v 1.135 2015/10/03 09:31:29 hannken Exp $ /
2
3	/-*
4	* Copyright (c) 1999, 2000, 2001, 2002, 2003, 2008 The NetBSD Foundation, Inc.
5	* All rights reserved.
6	*
7	* This code is derived from software contributed to The NetBSD Foundation
8	* by Konrad E. Schroder <perseant@hhhh.org>.
9	*
10	* Redistribution and use in source and binary forms, with or without
11	* modification, are permitted provided that the following conditions
12	* are met:
13	* 1. Redistributions of source code must retain the above copyright
14	* notice, this list of conditions and the following disclaimer.
15	* 2. Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29	* POSSIBILITY OF SUCH DAMAGE.
30	*/
31	/*
32	* Copyright (c) 1991, 1993
33	* The Regents of the University of California. All rights reserved.
34	*
35	* Redistribution and use in source and binary forms, with or without
36	* modification, are permitted provided that the following conditions
37	* are met:
38	* 1. Redistributions of source code must retain the above copyright
39	* notice, this list of conditions and the following disclaimer.
40	* 2. Redistributions in binary form must reproduce the above copyright
41	* notice, this list of conditions and the following disclaimer in the
42	* documentation and/or other materials provided with the distribution.
43	* 3. Neither the name of the University nor the names of its contributors
44	* may be used to endorse or promote products derived from this software
45	* without specific prior written permission.
46	*
47	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57	* SUCH DAMAGE.
58	*
59	* @(#)lfs_bio.c 8.10 (Berkeley) 6/10/95
60	*/
61
62	#include <sys/cdefs.h>
63	__KERNEL_RCSID(`0`, "$NetBSD: lfs_bio.c,v 1.135 2015/10/03 09:31:29 hannken Exp $");
64
65	#include <sys/param.h>
66	#include <sys/systm.h>
67	#include <sys/proc.h>
68	#include <sys/buf.h>
69	#include <sys/vnode.h>
70	#include <sys/resourcevar.h>
71	#include <sys/mount.h>
72	#include <sys/kernel.h>
73	#include <sys/kauth.h>
74
75	#include <ufs/lfs/ulfs_inode.h>
76	#include <ufs/lfs/ulfsmount.h>
77	#include <ufs/lfs/ulfs_extern.h>
78
79	#include <ufs/lfs/lfs.h>
80	#include <ufs/lfs/lfs_accessors.h>
81	#include <ufs/lfs/lfs_extern.h>
82	#include <ufs/lfs/lfs_kernel.h>
83
84	#include <uvm/uvm.h>
85
86	/*
87	* LFS block write function.
88	*
89	* XXX
90	* No write cost accounting is done.
91	* This is almost certainly wrong for synchronous operations and NFS.
92	*
93	* protected by lfs_lock.
94	*/
95	int locked_queue_count = `0`; / Count of locked-down buffers. /
96	long locked_queue_bytes = `0L`; / Total size of locked buffers. /
97	int lfs_subsys_pages = `0L`; / Total number LFS-written pages /
98	int lfs_fs_pagetrip = `0`; / # of pages to trip per-fs write /
99	int lfs_writing = `0`; / Set if already kicked off a writer*
100	because of buffer space /*
101	int locked_queue_waiters = `0`; / Number of processes waiting on lq /
102
103	/ Lock and condition variables for above. /
104	kcondvar_t locked_queue_cv;
105	kcondvar_t lfs_writing_cv;
106	kmutex_t lfs_lock;
107
108	extern int lfs_dostats;
109
110	/*
111	* reserved number/bytes of locked buffers
112	*/
113	int locked_queue_rcount = `0`;
114	long locked_queue_rbytes = `0L`;
115
116	static int lfs_fits_buf(struct lfs , int, int*);
117	static int lfs_reservebuf(struct lfs , struct* vnode vp, struct* vnode *vp2,
118	int, int);
119	static int lfs_reserveavail(struct lfs , struct* vnode vp, struct* vnode *vp2,
120	int);
121
122	static int
123	lfs_fits_buf(struct lfs fs, int* n, int bytes)
124	{
125	int count_fit, bytes_fit;
126
127	ASSERT_NO_SEGLOCK(fs);
128	KASSERT(mutex_owned(&lfs_lock));
129
130	count_fit =
131	(locked_queue_count + locked_queue_rcount + n <= LFS_WAIT_BUFS);
132	bytes_fit =
133	(locked_queue_bytes + locked_queue_rbytes + bytes <= LFS_WAIT_BYTES);
134
135	#ifdef DEBUG
136	if (!count_fit) {
137	DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit count: %d + %d + %d >= %d\n",
138	locked_queue_count, locked_queue_rcount,
139	n, LFS_WAIT_BUFS));
140	}
141	if (!bytes_fit) {
142	DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit bytes: %ld + %ld + %d >= %ld\n",
143	locked_queue_bytes, locked_queue_rbytes,
144	bytes, LFS_WAIT_BYTES));
145	}
146	#endif /* DEBUG */
147
148	return (count_fit && bytes_fit);
149	}
150
151	/ ARGSUSED /
152	static int
153	lfs_reservebuf(struct lfs fs, struct* vnode *vp,
154	struct vnode vp2, int* n, int bytes)
155	{
156	int cantwait;
157
158	ASSERT_MAYBE_SEGLOCK(fs);
159	KASSERT(locked_queue_rcount >= `0`);
160	KASSERT(locked_queue_rbytes >= `0`);
161
162	cantwait = (VTOI(vp)->i_flag & IN_ADIROP) \|\| fs->lfs_unlockvp == vp;
163	mutex_enter(&lfs_lock);
164	while (!cantwait && n > `0` && !lfs_fits_buf(fs, n, bytes)) {
165	int error;
166
167	lfs_flush(fs, `0`, `0`);
168
169	DLOG((DLOG_AVAIL, "lfs_reservebuf: waiting: count=%d, bytes=%ld\n",
170	locked_queue_count, locked_queue_bytes));
171	++locked_queue_waiters;
172	error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock,
173	hz * LFS_BUFWAIT);
174	--locked_queue_waiters;
175	if (error && error != EWOULDBLOCK) {
176	mutex_exit(&lfs_lock);
177	return error;
178	}
179	}
180
181	locked_queue_rcount += n;
182	locked_queue_rbytes += bytes;
183
184	if (n < `0` && locked_queue_waiters > `0`) {
185	DLOG((DLOG_AVAIL, "lfs_reservebuf: broadcast: count=%d, bytes=%ld\n",
186	locked_queue_count, locked_queue_bytes));
187	cv_broadcast(&locked_queue_cv);
188	}
189
190	mutex_exit(&lfs_lock);
191
192	KASSERT(locked_queue_rcount >= `0`);
193	KASSERT(locked_queue_rbytes >= `0`);
194
195	return `0`;
196	}
197
198	/*
199	* Try to reserve some blocks, prior to performing a sensitive operation that
200	* requires the vnode lock to be honored. If there is not enough space, wait
201	* for the space to become available.
202	*
203	* Called with vp locked. (Note nowever that if fsb < 0, vp is ignored.)
204	*/
205	static int
206	lfs_reserveavail(struct lfs fs, struct* vnode *vp,
207	struct vnode vp2, int* fsb)
208	{
209	CLEANERINFO *cip;
210	struct buf *bp;
211	int error, slept;
212	int cantwait;
213
214	ASSERT_MAYBE_SEGLOCK(fs);
215	slept = `0`;
216	mutex_enter(&lfs_lock);
217	cantwait = (VTOI(vp)->i_flag & IN_ADIROP) \|\| fs->lfs_unlockvp == vp;
218	while (!cantwait && fsb > `0` &&
219	!lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) {
220	mutex_exit(&lfs_lock);
221
222	if (!slept) {
223	DLOG((DLOG_AVAIL, "lfs_reserve: waiting for %ld (bfree = %jd,"
224	" est_bfree = %jd)\n",
225	fsb + fs->lfs_ravail + fs->lfs_favail,
226	(intmax_t)lfs_sb_getbfree(fs),
227	(intmax_t)LFS_EST_BFREE(fs)));
228	}
229	++slept;
230
231	/ Wake up the cleaner /
232	LFS_CLEANERINFO(cip, fs, bp);
233	LFS_SYNC_CLEANERINFO(cip, fs, bp, `0`);
234	lfs_wakeup_cleaner(fs);
235
236	mutex_enter(&lfs_lock);
237	/ Cleaner might have run while we were reading, check again /
238	if (lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail))
239	break;
240
241	error = mtsleep(&fs->lfs_availsleep, PCATCH \| PUSER,
242	"lfs_reserve", `0`, &lfs_lock);
243	if (error) {
244	mutex_exit(&lfs_lock);
245	return error;
246	}
247	}
248	#ifdef DEBUG
249	if (slept) {
250	DLOG((DLOG_AVAIL, "lfs_reserve: woke up\n"));
251	}
252	#endif
253	fs->lfs_ravail += fsb;
254	mutex_exit(&lfs_lock);
255
256	return `0`;
257	}
258
259	#ifdef DIAGNOSTIC
260	int lfs_rescount;
261	int lfs_rescountdirop;
262	#endif
263
264	int
265	lfs_reserve(struct lfs fs, struct* vnode vp, struct* vnode vp2, int* fsb)
266	{
267	int error;
268
269	ASSERT_MAYBE_SEGLOCK(fs);
270	if (vp2) {
271	/ Make sure we're not in the process of reclaiming vp2 /
272	mutex_enter(&lfs_lock);
273	while(fs->lfs_flags & LFS_UNDIROP) {
274	mtsleep(&fs->lfs_flags, PRIBIO + `1`, "lfsrundirop", `0`,
275	&lfs_lock);
276	}
277	mutex_exit(&lfs_lock);
278	}
279
280	KASSERT(fsb < `0` \|\| VOP_ISLOCKED(vp));
281	KASSERT(vp2 == NULL \|\| fsb < `0` \|\| VOP_ISLOCKED(vp2));
282	KASSERT(vp2 == NULL \|\| vp2 != fs->lfs_unlockvp);
283
284	#ifdef DIAGNOSTIC
285	mutex_enter(&lfs_lock);
286	if (fsb > `0`)
287	lfs_rescount++;
288	else if (fsb < `0`)
289	lfs_rescount--;
290	if (lfs_rescount < `0`)
291	panic("lfs_rescount");
292	mutex_exit(&lfs_lock);
293	#endif
294
295	error = lfs_reserveavail(fs, vp, vp2, fsb);
296	if (error)
297	return error;
298
299	/*
300	* XXX just a guess. should be more precise.
301	*/
302	error = lfs_reservebuf(fs, vp, vp2, fsb, lfs_fsbtob(fs, fsb));
303	if (error)
304	lfs_reserveavail(fs, vp, vp2, -fsb);
305
306	return error;
307	}
308
309	int
310	lfs_bwrite(void *v)
311	{
312	struct vop_bwrite_args / {*
313	struct vnode a_vp;*
314	struct buf a_bp;*
315	} /* *ap = v;
316	struct buf *bp = ap->a_bp;
317
318	#ifdef DIAGNOSTIC
319	if (VTOI(bp->b_vp)->i_lfs->lfs_ronly == `0` && (bp->b_flags & B_ASYNC)) {
320	panic("bawrite LFS buffer");
321	}
322	#endif /* DIAGNOSTIC */
323	return lfs_bwrite_ext(bp, `0`);
324	}
325
326	/*
327	* Determine if there is enough room currently available to write fsb
328	* blocks. We need enough blocks for the new blocks, the current
329	* inode blocks (including potentially the ifile inode), a summary block,
330	* and the segment usage table, plus an ifile block.
331	*/
332	int
333	lfs_fits(struct lfs fs, int* fsb)
334	{
335	int64_t needed;
336
337	ASSERT_NO_SEGLOCK(fs);
338	needed = fsb + lfs_btofsb(fs, lfs_sb_getsumsize(fs)) +
339	((howmany(lfs_sb_getuinodes(fs) + `1`, LFS_INOPB(fs)) +
340	lfs_sb_getsegtabsz(fs) +
341	`1`) << (lfs_sb_getbshift(fs) - lfs_sb_getffshift(fs)));
342
343	if (needed >= lfs_sb_getavail(fs)) {
344	#ifdef DEBUG
345	DLOG((DLOG_AVAIL, "lfs_fits: no fit: fsb = %ld, uinodes = %ld, "
346	"needed = %jd, avail = %jd\n",
347	(long)fsb, (long)lfs_sb_getuinodes(fs), (intmax_t)needed,
348	(intmax_t)lfs_sb_getavail(fs)));
349	#endif
350	return `0`;
351	}
352	return `1`;
353	}
354
355	int
356	lfs_availwait(struct lfs fs, int* fsb)
357	{
358	int error;
359	CLEANERINFO *cip;
360	struct buf *cbp;
361
362	ASSERT_NO_SEGLOCK(fs);
363	/ Push cleaner blocks through regardless /
364	mutex_enter(&lfs_lock);
365	if (LFS_SEGLOCK_HELD(fs) &&
366	fs->lfs_sp->seg_flags & (SEGM_CLEAN \| SEGM_FORCE_CKP)) {
367	mutex_exit(&lfs_lock);
368	return `0`;
369	}
370	mutex_exit(&lfs_lock);
371
372	while (!lfs_fits(fs, fsb)) {
373	/*
374	* Out of space, need cleaner to run.
375	* Update the cleaner info, then wake it up.
376	* Note the cleanerinfo block is on the ifile
377	* so it CANT_WAIT.
378	*/
379	LFS_CLEANERINFO(cip, fs, cbp);
380	LFS_SYNC_CLEANERINFO(cip, fs, cbp, `0`);
381
382	#ifdef DEBUG
383	DLOG((DLOG_AVAIL, "lfs_availwait: out of available space, "
384	"waiting on cleaner\n"));
385	#endif
386
387	lfs_wakeup_cleaner(fs);
388	#ifdef DIAGNOSTIC
389	if (LFS_SEGLOCK_HELD(fs))
390	panic("lfs_availwait: deadlock");
391	#endif
392	error = tsleep(&fs->lfs_availsleep, PCATCH \| PUSER,
393	"cleaner", `0`);
394	if (error)
395	return (error);
396	}
397	return `0`;
398	}
399
400	int
401	lfs_bwrite_ext(struct buf bp, int* flags)
402	{
403	struct lfs *fs;
404	struct inode *ip;
405	struct vnode *vp;
406	int fsb;
407
408	vp = bp->b_vp;
409	fs = VFSTOULFS(vp->v_mount)->um_lfs;
410
411	ASSERT_MAYBE_SEGLOCK(fs);
412	KASSERT(bp->b_cflags & BC_BUSY);
413	KASSERT(flags & BW_CLEAN \|\| !LFS_IS_MALLOC_BUF(bp));
414	KASSERT(((bp->b_oflags \| bp->b_flags) & (BO_DELWRI\|B_LOCKED))
415	!= BO_DELWRI);
416
417	/*
418	* Don't write any blocks if we're mounted read-only, or
419	* if we are "already unmounted".
420	*
421	* In particular the cleaner can't write blocks either.
422	*/
423	if (fs->lfs_ronly \|\| (lfs_sb_getpflags(fs) & LFS_PF_CLEAN)) {
424	bp->b_oflags &= ~BO_DELWRI;
425	bp->b_flags \|= B_READ; / XXX is this right? --ks /
426	bp->b_error = `0`;
427	mutex_enter(&bufcache_lock);
428	LFS_UNLOCK_BUF(bp);
429	if (LFS_IS_MALLOC_BUF(bp))
430	bp->b_cflags &= ~BC_BUSY;
431	else
432	brelsel(bp, `0`);
433	mutex_exit(&bufcache_lock);
434	return (fs->lfs_ronly ? EROFS : `0`);
435	}
436
437	/*
438	* Set the delayed write flag and use reassignbuf to move the buffer
439	* from the clean list to the dirty one.
440	*
441	* Set the B_LOCKED flag and unlock the buffer, causing brelse to move
442	* the buffer onto the LOCKED free list. This is necessary, otherwise
443	* getnewbuf() would try to reclaim the buffers using bawrite, which
444	* isn't going to work.
445	*
446	* XXX we don't let meta-data writes run out of space because they can
447	* come from the segment writer. We need to make sure that there is
448	* enough space reserved so that there's room to write meta-data
449	* blocks.
450	*/
451	if ((bp->b_flags & B_LOCKED) == `0`) {
452	fsb = lfs_numfrags(fs, bp->b_bcount);
453
454	ip = VTOI(vp);
455	mutex_enter(&lfs_lock);
456	if (flags & BW_CLEAN) {
457	LFS_SET_UINO(ip, IN_CLEANING);
458	} else {
459	LFS_SET_UINO(ip, IN_MODIFIED);
460	}
461	mutex_exit(&lfs_lock);
462	lfs_sb_subavail(fs, fsb);
463
464	mutex_enter(&bufcache_lock);
465	mutex_enter(vp->v_interlock);
466	bp->b_oflags = (bp->b_oflags \| BO_DELWRI) & ~BO_DONE;
467	LFS_LOCK_BUF(bp);
468	bp->b_flags &= ~B_READ;
469	bp->b_error = `0`;
470	reassignbuf(bp, bp->b_vp);
471	mutex_exit(vp->v_interlock);
472	} else {
473	mutex_enter(&bufcache_lock);
474	}
475
476	if (bp->b_iodone != NULL)
477	bp->b_cflags &= ~BC_BUSY;
478	else
479	brelsel(bp, `0`);
480	mutex_exit(&bufcache_lock);
481
482	return (`0`);
483	}
484
485	/*
486	* Called and return with the lfs_lock held.
487	*/
488	void
489	lfs_flush_fs(struct lfs fs, int* flags)
490	{
491	ASSERT_NO_SEGLOCK(fs);
492	KASSERT(mutex_owned(&lfs_lock));
493	if (fs->lfs_ronly)
494	return;
495
496	if (lfs_dostats)
497	++lfs_stats.flush_invoked;
498
499	fs->lfs_pdflush = `0`;
500	mutex_exit(&lfs_lock);
501	lfs_writer_enter(fs, "fldirop");
502	lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
503	lfs_writer_leave(fs);
504	mutex_enter(&lfs_lock);
505	fs->lfs_favail = `0`; / XXX /
506	}
507
508	/*
509	* This routine initiates segment writes when LFS is consuming too many
510	* resources. Ideally the pageout daemon would be able to direct LFS
511	* more subtly.
512	* XXX We have one static count of locked buffers;
513	* XXX need to think more about the multiple filesystem case.
514	*
515	* Called and return with lfs_lock held.
516	* If fs != NULL, we hold the segment lock for fs.
517	*/
518	void
519	lfs_flush(struct lfs fs, int* flags, int only_onefs)
520	{
521	extern u_int64_t locked_fakequeue_count;
522	struct mount mp, nmp;
523	struct lfs *tfs;
524
525	KASSERT(mutex_owned(&lfs_lock));
526	KDASSERT(fs == NULL \|\| !LFS_SEGLOCK_HELD(fs));
527
528	if (lfs_dostats)
529	++lfs_stats.write_exceeded;
530	/ XXX should we include SEGM_CKP here? /
531	if (lfs_writing && !(flags & SEGM_SYNC)) {
532	DLOG((DLOG_FLUSH, "lfs_flush: not flushing because another flush is active\n"));
533	return;
534	}
535	while (lfs_writing)
536	cv_wait(&lfs_writing_cv, &lfs_lock);
537	lfs_writing = `1`;
538
539	mutex_exit(&lfs_lock);
540
541	if (only_onefs) {
542	KASSERT(fs != NULL);
543	if (vfs_busy(fs->lfs_ivnode->v_mount, NULL))
544	goto errout;
545	mutex_enter(&lfs_lock);
546	lfs_flush_fs(fs, flags);
547	mutex_exit(&lfs_lock);
548	vfs_unbusy(fs->lfs_ivnode->v_mount, false, NULL);
549	} else {
550	locked_fakequeue_count = `0`;
551	mutex_enter(&mountlist_lock);
552	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
553	if (vfs_busy(mp, &nmp)) {
554	DLOG((DLOG_FLUSH, "lfs_flush: fs vfs_busy\n"));
555	continue;
556	}
557	if (strncmp(&mp->mnt_stat.f_fstypename[`0`], MOUNT_LFS,
558	sizeof(mp->mnt_stat.f_fstypename)) == `0`) {
559	tfs = VFSTOULFS(mp)->um_lfs;
560	mutex_enter(&lfs_lock);
561	lfs_flush_fs(tfs, flags);
562	mutex_exit(&lfs_lock);
563	}
564	vfs_unbusy(mp, false, &nmp);
565	}
566	mutex_exit(&mountlist_lock);
567	}
568	LFS_DEBUG_COUNTLOCKED("flush");
569	wakeup(&lfs_subsys_pages);
570
571	errout:
572	mutex_enter(&lfs_lock);
573	KASSERT(lfs_writing);
574	lfs_writing = `0`;
575	wakeup(&lfs_writing);
576	}
577
578	#define INOCOUNT(fs) howmany(lfs_sb_getuinodes(fs), LFS_INOPB(fs))
579	#define INOBYTES(fs) (lfs_sb_getuinodes(fs) * DINOSIZE(fs))
580
581	/*
582	* make sure that we don't have too many locked buffers.
583	* flush buffers if needed.
584	*/
585	int
586	lfs_check(struct vnode vp, daddr_t blkno, int* flags)
587	{
588	int error;
589	struct lfs *fs;
590	struct inode *ip;
591	extern pid_t lfs_writer_daemon;
592
593	error = `0`;
594	ip = VTOI(vp);
595
596	/ If out of buffers, wait on writer /
597	/ XXX KS - if it's the Ifile, we're probably the cleaner! /
598	if (ip->i_number == LFS_IFILE_INUM)
599	return `0`;
600	/ If we're being called from inside a dirop, don't sleep /
601	if (ip->i_flag & IN_ADIROP)
602	return `0`;
603
604	fs = ip->i_lfs;
605
606	ASSERT_NO_SEGLOCK(fs);
607
608	/*
609	* If we would flush below, but dirops are active, sleep.
610	* Note that a dirop cannot ever reach this code!
611	*/
612	mutex_enter(&lfs_lock);
613	while (fs->lfs_dirops > `0` &&
614	(locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS \|\|
615	locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES \|\|
616	lfs_subsys_pages > LFS_MAX_PAGES \|\|
617	fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) \|\|
618	lfs_dirvcount > LFS_MAX_DIROP \|\| fs->lfs_diropwait > `0`))
619	{
620	++fs->lfs_diropwait;
621	mtsleep(&fs->lfs_writer, PRIBIO+`1`, "bufdirop", `0`,
622	&lfs_lock);
623	--fs->lfs_diropwait;
624	}
625
626	#ifdef DEBUG
627	if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS)
628	DLOG((DLOG_FLUSH, "lfs_check: lqc = %d, max %d\n",
629	locked_queue_count + INOCOUNT(fs), LFS_MAX_BUFS));
630	if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES)
631	DLOG((DLOG_FLUSH, "lfs_check: lqb = %ld, max %ld\n",
632	locked_queue_bytes + INOBYTES(fs), LFS_MAX_BYTES));
633	if (lfs_subsys_pages > LFS_MAX_PAGES)
634	DLOG((DLOG_FLUSH, "lfs_check: lssp = %d, max %d\n",
635	lfs_subsys_pages, LFS_MAX_PAGES));
636	if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip)
637	DLOG((DLOG_FLUSH, "lfs_check: fssp = %d, trip at %d\n",
638	fs->lfs_pages, lfs_fs_pagetrip));
639	if (lfs_dirvcount > LFS_MAX_DIROP)
640	DLOG((DLOG_FLUSH, "lfs_check: ldvc = %d, max %d\n",
641	lfs_dirvcount, LFS_MAX_DIROP));
642	if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs))
643	DLOG((DLOG_FLUSH, "lfs_check: lfdvc = %d, max %d\n",
644	fs->lfs_dirvcount, LFS_MAX_FSDIROP(fs)));
645	if (fs->lfs_diropwait > `0`)
646	DLOG((DLOG_FLUSH, "lfs_check: ldvw = %d\n",
647	fs->lfs_diropwait));
648	#endif
649
650	/ If there are too many pending dirops, we have to flush them. /
651	if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) \|\|
652	lfs_dirvcount > LFS_MAX_DIROP \|\| fs->lfs_diropwait > `0`) {
653	mutex_exit(&lfs_lock);
654	lfs_flush_dirops(fs);
655	mutex_enter(&lfs_lock);
656	} else if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS \|\|
657	locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES \|\|
658	lfs_subsys_pages > LFS_MAX_PAGES \|\|
659	fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) \|\|
660	lfs_dirvcount > LFS_MAX_DIROP \|\| fs->lfs_diropwait > `0`) {
661	lfs_flush(fs, flags, `0`);
662	} else if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) {
663	/*
664	* If we didn't flush the whole thing, some filesystems
665	* still might want to be flushed.
666	*/
667	++fs->lfs_pdflush;
668	wakeup(&lfs_writer_daemon);
669	}
670
671	while (locked_queue_count + INOCOUNT(fs) >= LFS_WAIT_BUFS \|\|
672	locked_queue_bytes + INOBYTES(fs) >= LFS_WAIT_BYTES \|\|
673	lfs_subsys_pages > LFS_WAIT_PAGES \|\|
674	fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) \|\|
675	lfs_dirvcount > LFS_MAX_DIROP) {
676
677	if (lfs_dostats)
678	++lfs_stats.wait_exceeded;
679	DLOG((DLOG_AVAIL, "lfs_check: waiting: count=%d, bytes=%ld\n",
680	locked_queue_count, locked_queue_bytes));
681	++locked_queue_waiters;
682	error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock,
683	hz * LFS_BUFWAIT);
684	--locked_queue_waiters;
685	if (error != EWOULDBLOCK)
686	break;
687
688	/*
689	* lfs_flush might not flush all the buffers, if some of the
690	* inodes were locked or if most of them were Ifile blocks
691	* and we weren't asked to checkpoint. Try flushing again
692	* to keep us from blocking indefinitely.
693	*/
694	if (locked_queue_count + INOCOUNT(fs) >= LFS_MAX_BUFS \|\|
695	locked_queue_bytes + INOBYTES(fs) >= LFS_MAX_BYTES) {
696	lfs_flush(fs, flags \| SEGM_CKP, `0`);
697	}
698	}
699	mutex_exit(&lfs_lock);
700	return (error);
701	}
702
703	/*
704	* Allocate a new buffer header.
705	*/
706	struct buf *
707	lfs_newbuf(struct lfs fs, struct* vnode vp, daddr_t daddr, size_t size, int* type)
708	{
709	struct buf *bp;
710	size_t nbytes;
711
712	ASSERT_MAYBE_SEGLOCK(fs);
713	nbytes = roundup(size, lfs_fsbtob(fs, `1`));
714
715	bp = getiobuf(NULL, true);
716	if (nbytes) {
717	bp->b_data = lfs_malloc(fs, nbytes, type);
718	/ memset(bp->b_data, 0, nbytes); /
719	}
720	#ifdef DIAGNOSTIC
721	if (vp == NULL)
722	panic("vp is NULL in lfs_newbuf");
723	if (bp == NULL)
724	panic("bp is NULL after malloc in lfs_newbuf");
725	#endif
726
727	bp->b_bufsize = size;
728	bp->b_bcount = size;
729	bp->b_lblkno = daddr;
730	bp->b_blkno = daddr;
731	bp->b_error = `0`;
732	bp->b_resid = `0`;
733	bp->b_iodone = lfs_callback;
734	bp->b_cflags = BC_BUSY \| BC_NOCACHE;
735	bp->b_private = fs;
736
737	mutex_enter(&bufcache_lock);
738	mutex_enter(vp->v_interlock);
739	bgetvp(vp, bp);
740	mutex_exit(vp->v_interlock);
741	mutex_exit(&bufcache_lock);
742
743	return (bp);
744	}
745
746	void
747	lfs_freebuf(struct lfs fs, struct* buf *bp)
748	{
749	struct vnode *vp;
750
751	if ((vp = bp->b_vp) != NULL) {
752	mutex_enter(&bufcache_lock);
753	mutex_enter(vp->v_interlock);
754	brelvp(bp);
755	mutex_exit(vp->v_interlock);
756	mutex_exit(&bufcache_lock);
757	}
758	if (!(bp->b_cflags & BC_INVAL)) { / BC_INVAL indicates a "fake" buffer /
759	lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN);
760	bp->b_data = NULL;
761	}
762	putiobuf(bp);
763	}
764
765	/*
766	* Count buffers on the "locked" queue, and compare it to a pro-forma count.
767	* Don't count malloced buffers, since they don't detract from the total.
768	*/
769	void
770	lfs_countlocked(int count, long* bytes, const* char *msg)
771	{
772	struct buf *bp;
773	int n = `0`;
774	long int size = `0L`;
775
776	mutex_enter(&bufcache_lock);
777	TAILQ_FOREACH(bp, &bufqueues[BQ_LOCKED].bq_queue, b_freelist) {
778	KASSERT(bp->b_iodone == NULL);
779	n++;
780	size += bp->b_bufsize;
781	#ifdef DIAGNOSTIC
782	if (n > nbuf)
783	panic("lfs_countlocked: this can't happen: more"
784	" buffers locked than exist");
785	#endif
786	}
787	/*
788	* Theoretically this function never really does anything.
789	* Give a warning if we have to fix the accounting.
790	*/
791	if (n != *count) {
792	DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted buf count"
793	" from %d to %d\n", msg, *count, n));
794	}
795	if (size != *bytes) {
796	DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted byte count"
797	" from %ld to %ld\n", msg, *bytes, size));
798	}
799	*count = n;
800	*bytes = size;
801	mutex_exit(&bufcache_lock);
802	return;
803	}
804
805	int
806	lfs_wait_pages(void)
807	{
808	int active, inactive;
809
810	uvm_estimatepageable(&active, &inactive);
811	return LFS_WAIT_RESOURCE(active + inactive + uvmexp.free, `1`);
812	}
813
814	int
815	lfs_max_pages(void)
816	{
817	int active, inactive;
818
819	uvm_estimatepageable(&active, &inactive);
820	return LFS_MAX_RESOURCE(active + inactive + uvmexp.free, `1`);
821	}
822

Browse the source code of src/src/sys/ufs/lfs/lfs_bio.c

Definitions