lfs_pages.c source code [src/src/sys/ufs/lfs/lfs_pages.c]

1	/ $NetBSD: lfs_pages.c,v 1.9 2016/10/04 16:46:20 christos Exp $ /
2
3	/-*
4	* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5	* All rights reserved.
6	*
7	* This code is derived from software contributed to The NetBSD Foundation
8	* by Konrad E. Schroder <perseant@hhhh.org>.
9	*
10	* Redistribution and use in source and binary forms, with or without
11	* modification, are permitted provided that the following conditions
12	* are met:
13	* 1. Redistributions of source code must retain the above copyright
14	* notice, this list of conditions and the following disclaimer.
15	* 2. Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29	* POSSIBILITY OF SUCH DAMAGE.
30	*/
31	/*
32	* Copyright (c) 1986, 1989, 1991, 1993, 1995
33	* The Regents of the University of California. All rights reserved.
34	*
35	* Redistribution and use in source and binary forms, with or without
36	* modification, are permitted provided that the following conditions
37	* are met:
38	* 1. Redistributions of source code must retain the above copyright
39	* notice, this list of conditions and the following disclaimer.
40	* 2. Redistributions in binary form must reproduce the above copyright
41	* notice, this list of conditions and the following disclaimer in the
42	* documentation and/or other materials provided with the distribution.
43	* 3. Neither the name of the University nor the names of its contributors
44	* may be used to endorse or promote products derived from this software
45	* without specific prior written permission.
46	*
47	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57	* SUCH DAMAGE.
58	*
59	* @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95
60	*/
61
62	#include <sys/cdefs.h>
63	__KERNEL_RCSID(`0`, "$NetBSD: lfs_pages.c,v 1.9 2016/10/04 16:46:20 christos Exp $");
64
65	#ifdef _KERNEL_OPT
66	#include "opt_compat_netbsd.h"
67	#include "opt_uvm_page_trkown.h"
68	#endif
69
70	#include <sys/param.h>
71	#include <sys/systm.h>
72	#include <sys/namei.h>
73	#include <sys/resourcevar.h>
74	#include <sys/kernel.h>
75	#include <sys/file.h>
76	#include <sys/stat.h>
77	#include <sys/buf.h>
78	#include <sys/proc.h>
79	#include <sys/mount.h>
80	#include <sys/vnode.h>
81	#include <sys/pool.h>
82	#include <sys/signalvar.h>
83	#include <sys/kauth.h>
84	#include <sys/syslog.h>
85	#include <sys/fstrans.h>
86
87	#include <miscfs/fifofs/fifo.h>
88	#include <miscfs/genfs/genfs.h>
89	#include <miscfs/specfs/specdev.h>
90
91	#include <ufs/lfs/ulfs_inode.h>
92	#include <ufs/lfs/ulfsmount.h>
93	#include <ufs/lfs/ulfs_bswap.h>
94	#include <ufs/lfs/ulfs_extern.h>
95
96	#include <uvm/uvm.h>
97	#include <uvm/uvm_pmap.h>
98	#include <uvm/uvm_stat.h>
99	#include <uvm/uvm_pager.h>
100
101	#include <ufs/lfs/lfs.h>
102	#include <ufs/lfs/lfs_accessors.h>
103	#include <ufs/lfs/lfs_kernel.h>
104	#include <ufs/lfs/lfs_extern.h>
105
106	extern pid_t lfs_writer_daemon;
107
108	static int check_dirty(struct lfs , struct* vnode , off_t, off_t, off_t, int, int, struct* vm_page **);
109
110	int
111	lfs_getpages(void *v)
112	{
113	struct vop_getpages_args / {*
114	struct vnode a_vp;*
115	voff_t a_offset;
116	struct vm_page a_m;
117	int a_count;*
118	int a_centeridx;
119	vm_prot_t a_access_type;
120	int a_advice;
121	int a_flags;
122	} /* *ap = v;
123
124	if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM &&
125	(ap->a_access_type & VM_PROT_WRITE) != `0`) {
126	return EPERM;
127	}
128	if ((ap->a_access_type & VM_PROT_WRITE) != `0`) {
129	mutex_enter(&lfs_lock);
130	LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED);
131	mutex_exit(&lfs_lock);
132	}
133
134	/*
135	* we're relying on the fact that genfs_getpages() always read in
136	* entire filesystem blocks.
137	*/
138	return genfs_getpages(v);
139	}
140
141	/*
142	* Wait for a page to become unbusy, possibly printing diagnostic messages
143	* as well.
144	*
145	* Called with vp->v_interlock held; return with it held.
146	*/
147	static void
148	wait_for_page(struct vnode vp, struct* vm_page pg, const* char *label)
149	{
150	KASSERT(mutex_owned(vp->v_interlock));
151	if ((pg->flags & PG_BUSY) == `0`)
152	return; / Nothing to wait for! /
153
154	#if defined(DEBUG) && defined(UVM_PAGE_TRKOWN)
155	static struct vm_page *lastpg;
156
157	if (label != NULL && pg != lastpg) {
158	if (pg->owner_tag) {
159	printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n",
160	curproc->p_pid, curlwp->l_lid, label,
161	pg, pg->owner, pg->lowner, pg->owner_tag);
162	} else {
163	printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n",
164	curproc->p_pid, curlwp->l_lid, label, pg);
165	}
166	}
167	lastpg = pg;
168	#endif
169
170	pg->flags \|= PG_WANTED;
171	UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, `0`, "lfsput", `0`);
172	mutex_enter(vp->v_interlock);
173	}
174
175	/*
176	* This routine is called by lfs_putpages() when it can't complete the
177	* write because a page is busy. This means that either (1) someone,
178	* possibly the pagedaemon, is looking at this page, and will give it up
179	* presently; or (2) we ourselves are holding the page busy in the
180	* process of being written (either gathered or actually on its way to
181	* disk). We don't need to give up the segment lock, but we might need
182	* to call lfs_writeseg() to expedite the page's journey to disk.
183	*
184	* Called with vp->v_interlock held; return with it held.
185	*/
186	/ #define BUSYWAIT /
187	static void
188	write_and_wait(struct lfs fs, struct* vnode vp, struct* vm_page *pg,
189	int seglocked, const char *label)
190	{
191	KASSERT(mutex_owned(vp->v_interlock));
192	#ifndef BUSYWAIT
193	struct inode *ip = VTOI(vp);
194	struct segment *sp = fs->lfs_sp;
195	int count = `0`;
196
197	if (pg == NULL)
198	return;
199
200	while (pg->flags & PG_BUSY &&
201	pg->uobject == &vp->v_uobj) {
202	mutex_exit(vp->v_interlock);
203	if (sp->cbpp - sp->bpp > `1`) {
204	/ Write gathered pages /
205	lfs_updatemeta(sp);
206	lfs_release_finfo(fs);
207	(void) lfs_writeseg(fs, sp);
208
209	/*
210	* Reinitialize FIP
211	*/
212	KASSERT(sp->vp == vp);
213	lfs_acquire_finfo(fs, ip->i_number,
214	ip->i_gen);
215	}
216	++count;
217	mutex_enter(vp->v_interlock);
218	wait_for_page(vp, pg, label);
219	}
220	if (label != NULL && count > `1`) {
221	DLOG((DLOG_PAGE, "lfs_putpages[%d]: %s: %sn = %d\n",
222	curproc->p_pid, label, (count > `0` ? "looping, " : ""),
223	count));
224	}
225	#else
226	preempt(`1`);
227	#endif
228	KASSERT(mutex_owned(vp->v_interlock));
229	}
230
231	/*
232	* Make sure that for all pages in every block in the given range,
233	* either all are dirty or all are clean. If any of the pages
234	* we've seen so far are dirty, put the vnode on the paging chain,
235	* and mark it IN_PAGING.
236	*
237	* If checkfirst != 0, don't check all the pages but return at the
238	* first dirty page.
239	*/
240	static int
241	check_dirty(struct lfs fs, struct* vnode *vp,
242	off_t startoffset, off_t endoffset, off_t blkeof,
243	int flags, int checkfirst, struct vm_page **pgp)
244	{
245	int by_list;
246	struct vm_page curpg = NULL; /* XXX: gcc /
247	struct vm_page pgs[MAXBSIZE / MIN_PAGE_SIZE], pg;
248	off_t soff = `0`; / XXX: gcc /
249	voff_t off;
250	int i;
251	int nonexistent;
252	int any_dirty; / number of dirty pages /
253	int dirty; / number of dirty pages in a block /
254	int tdirty;
255	int pages_per_block = lfs_sb_getbsize(fs) >> PAGE_SHIFT;
256	int pagedaemon = (curlwp == uvm.pagedaemon_lwp);
257
258	KASSERT(mutex_owned(vp->v_interlock));
259	ASSERT_MAYBE_SEGLOCK(fs);
260	top:
261	by_list = (vp->v_uobj.uo_npages <=
262	((endoffset - startoffset) >> PAGE_SHIFT) *
263	UVM_PAGE_TREE_PENALTY);
264	any_dirty = `0`;
265
266	if (by_list) {
267	curpg = TAILQ_FIRST(&vp->v_uobj.memq);
268	} else {
269	soff = startoffset;
270	}
271	while (by_list \|\| soff < MIN(blkeof, endoffset)) {
272	if (by_list) {
273	/*
274	* Find the first page in a block. Skip
275	* blocks outside our area of interest or beyond
276	* the end of file.
277	*/
278	KASSERT(curpg == NULL
279	\|\| (curpg->flags & PG_MARKER) == `0`);
280	if (pages_per_block > `1`) {
281	while (curpg &&
282	((curpg->offset & lfs_sb_getbmask(fs)) \|\|
283	curpg->offset >= vp->v_size \|\|
284	curpg->offset >= endoffset)) {
285	curpg = TAILQ_NEXT(curpg, listq.queue);
286	KASSERT(curpg == NULL \|\|
287	(curpg->flags & PG_MARKER) == `0`);
288	}
289	}
290	if (curpg == NULL)
291	break;
292	soff = curpg->offset;
293	}
294
295	/*
296	* Mark all pages in extended range busy; find out if any
297	* of them are dirty.
298	*/
299	nonexistent = dirty = `0`;
300	for (i = `0`; i == `0` \|\| i < pages_per_block; i++) {
301	KASSERT(mutex_owned(vp->v_interlock));
302	if (by_list && pages_per_block <= `1`) {
303	pgs[i] = pg = curpg;
304	} else {
305	off = soff + (i << PAGE_SHIFT);
306	pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off);
307	if (pg == NULL) {
308	++nonexistent;
309	continue;
310	}
311	}
312	KASSERT(pg != NULL);
313
314	/*
315	* If we're holding the segment lock, we can deadlock
316	* against a process that has our page and is waiting
317	* for the cleaner, while the cleaner waits for the
318	* segment lock. Just bail in that case.
319	*/
320	if ((pg->flags & PG_BUSY) &&
321	(pagedaemon \|\| LFS_SEGLOCK_HELD(fs))) {
322	if (i > `0`)
323	uvm_page_unbusy(pgs, i);
324	DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n"));
325	if (pgp)
326	*pgp = pg;
327	KASSERT(mutex_owned(vp->v_interlock));
328	return -`1`;
329	}
330
331	while (pg->flags & PG_BUSY) {
332	wait_for_page(vp, pg, NULL);
333	KASSERT(mutex_owned(vp->v_interlock));
334	if (i > `0`)
335	uvm_page_unbusy(pgs, i);
336	KASSERT(mutex_owned(vp->v_interlock));
337	goto top;
338	}
339	pg->flags \|= PG_BUSY;
340	UVM_PAGE_OWN(pg, "lfs_putpages");
341
342	pmap_page_protect(pg, VM_PROT_NONE);
343	tdirty = (pmap_clear_modify(pg) \|\|
344	(pg->flags & PG_CLEAN) == `0`);
345	dirty += tdirty;
346	}
347	if (pages_per_block > `0` && nonexistent >= pages_per_block) {
348	if (by_list) {
349	curpg = TAILQ_NEXT(curpg, listq.queue);
350	} else {
351	soff += lfs_sb_getbsize(fs);
352	}
353	continue;
354	}
355
356	any_dirty += dirty;
357	KASSERT(nonexistent == `0`);
358	KASSERT(mutex_owned(vp->v_interlock));
359
360	/*
361	* If any are dirty make all dirty; unbusy them,
362	* but if we were asked to clean, wire them so that
363	* the pagedaemon doesn't bother us about them while
364	* they're on their way to disk.
365	*/
366	for (i = `0`; i == `0` \|\| i < pages_per_block; i++) {
367	KASSERT(mutex_owned(vp->v_interlock));
368	pg = pgs[i];
369	KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI)));
370	KASSERT(pg->flags & PG_BUSY);
371	if (dirty) {
372	pg->flags &= ~PG_CLEAN;
373	if (flags & PGO_FREE) {
374	/*
375	* Wire the page so that
376	* pdaemon doesn't see it again.
377	*/
378	mutex_enter(&uvm_pageqlock);
379	uvm_pagewire(pg);
380	mutex_exit(&uvm_pageqlock);
381
382	/ Suspended write flag /
383	pg->flags \|= PG_DELWRI;
384	}
385	}
386	if (pg->flags & PG_WANTED)
387	wakeup(pg);
388	pg->flags &= ~(PG_WANTED\|PG_BUSY);
389	UVM_PAGE_OWN(pg, NULL);
390	}
391
392	if (checkfirst && any_dirty)
393	break;
394
395	if (by_list) {
396	curpg = TAILQ_NEXT(curpg, listq.queue);
397	} else {
398	soff += MAX(PAGE_SIZE, lfs_sb_getbsize(fs));
399	}
400	}
401
402	KASSERT(mutex_owned(vp->v_interlock));
403	return any_dirty;
404	}
405
406	/*
407	* lfs_putpages functions like genfs_putpages except that
408	*
409	* (1) It needs to bounds-check the incoming requests to ensure that
410	* they are block-aligned; if they are not, expand the range and
411	* do the right thing in case, e.g., the requested range is clean
412	* but the expanded range is dirty.
413	*
414	* (2) It needs to explicitly send blocks to be written when it is done.
415	* If VOP_PUTPAGES is called without the seglock held, we simply take
416	* the seglock and let lfs_segunlock wait for us.
417	* XXX There might be a bad situation if we have to flush a vnode while
418	* XXX lfs_markv is in operation. As of this writing we panic in this
419	* XXX case.
420	*
421	* Assumptions:
422	*
423	* (1) The caller does not hold any pages in this vnode busy. If it does,
424	* there is a danger that when we expand the page range and busy the
425	* pages we will deadlock.
426	*
427	* (2) We are called with vp->v_interlock held; we must return with it
428	* released.
429	*
430	* (3) We don't absolutely have to free pages right away, provided that
431	* the request does not have PGO_SYNCIO. When the pagedaemon gives
432	* us a request with PGO_FREE, we take the pages out of the paging
433	* queue and wake up the writer, which will handle freeing them for us.
434	*
435	* We ensure that for any filesystem block, all pages for that
436	* block are either resident or not, even if those pages are higher
437	* than EOF; that means that we will be getting requests to free
438	* "unused" pages above EOF all the time, and should ignore them.
439	*
440	* (4) If we are called with PGO_LOCKED, the finfo array we are to write
441	* into has been set up for us by lfs_writefile. If not, we will
442	* have to handle allocating and/or freeing an finfo entry.
443	*
444	* XXX note that we're (ab)using PGO_LOCKED as "seglock held".
445	*/
446
447	/ How many times to loop before we should start to worry /
448	#define TOOMANY 4
449
450	int
451	lfs_putpages(void *v)
452	{
453	int error;
454	struct vop_putpages_args / {*
455	struct vnode a_vp;*
456	voff_t a_offlo;
457	voff_t a_offhi;
458	int a_flags;
459	} /* *ap = v;
460	struct vnode *vp;
461	struct inode *ip;
462	struct lfs *fs;
463	struct segment *sp;
464	off_t origoffset, startoffset, endoffset, origendoffset, blkeof;
465	off_t off, max_endoffset;
466	bool seglocked, sync, pagedaemon, reclaim;
467	struct vm_page pg, busypg;
468	UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist);
469	int oreclaim = `0`;
470	int donewriting = `0`;
471	#ifdef DEBUG
472	int debug_n_again, debug_n_dirtyclean;
473	#endif
474
475	vp = ap->a_vp;
476	ip = VTOI(vp);
477	fs = ip->i_lfs;
478	sync = (ap->a_flags & PGO_SYNCIO) != `0`;
479	reclaim = (ap->a_flags & PGO_RECLAIM) != `0`;
480	pagedaemon = (curlwp == uvm.pagedaemon_lwp);
481
482	KASSERT(mutex_owned(vp->v_interlock));
483
484	/ Putpages does nothing for metadata. /
485	if (vp == fs->lfs_ivnode \|\| vp->v_type != VREG) {
486	mutex_exit(vp->v_interlock);
487	return `0`;
488	}
489
490	/*
491	* If there are no pages, don't do anything.
492	*/
493	if (vp->v_uobj.uo_npages == `0`) {
494	if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
495	(vp->v_iflag & VI_ONWORKLST) &&
496	LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
497	vp->v_iflag &= ~VI_WRMAPDIRTY;
498	vn_syncer_remove_from_worklist(vp);
499	}
500	mutex_exit(vp->v_interlock);
501
502	/ Remove us from paging queue, if we were on it /
503	mutex_enter(&lfs_lock);
504	if (ip->i_flags & IN_PAGING) {
505	ip->i_flags &= ~IN_PAGING;
506	TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
507	}
508	mutex_exit(&lfs_lock);
509
510	KASSERT(!mutex_owned(vp->v_interlock));
511	return `0`;
512	}
513
514	blkeof = lfs_blkroundup(fs, ip->i_size);
515
516	/*
517	* Ignore requests to free pages past EOF but in the same block
518	* as EOF, unless the vnode is being reclaimed or the request
519	* is synchronous. (If the request is sync, it comes from
520	* lfs_truncate.)
521	*
522	* To avoid being flooded with this request, make these pages
523	* look "active".
524	*/
525	if (!sync && !reclaim &&
526	ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) {
527	origoffset = ap->a_offlo;
528	for (off = origoffset; off < blkeof; off += lfs_sb_getbsize(fs)) {
529	pg = uvm_pagelookup(&vp->v_uobj, off);
530	KASSERT(pg != NULL);
531	while (pg->flags & PG_BUSY) {
532	pg->flags \|= PG_WANTED;
533	UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, `0`,
534	"lfsput2", `0`);
535	mutex_enter(vp->v_interlock);
536	}
537	mutex_enter(&uvm_pageqlock);
538	uvm_pageactivate(pg);
539	mutex_exit(&uvm_pageqlock);
540	}
541	ap->a_offlo = blkeof;
542	if (ap->a_offhi > `0` && ap->a_offhi <= ap->a_offlo) {
543	mutex_exit(vp->v_interlock);
544	return `0`;
545	}
546	}
547
548	/*
549	* Extend page range to start and end at block boundaries.
550	* (For the purposes of VOP_PUTPAGES, fragments don't exist.)
551	*/
552	origoffset = ap->a_offlo;
553	origendoffset = ap->a_offhi;
554	startoffset = origoffset & ~(lfs_sb_getbmask(fs));
555	max_endoffset = (trunc_page(LLONG_MAX) >> lfs_sb_getbshift(fs))
556	<< lfs_sb_getbshift(fs);
557
558	if (origendoffset == `0` \|\| ap->a_flags & PGO_ALLPAGES) {
559	endoffset = max_endoffset;
560	origendoffset = endoffset;
561	} else {
562	origendoffset = round_page(ap->a_offhi);
563	endoffset = round_page(lfs_blkroundup(fs, origendoffset));
564	}
565
566	KASSERT(startoffset > `0` \|\| endoffset >= startoffset);
567	if (startoffset == endoffset) {
568	/ Nothing to do, why were we called? /
569	mutex_exit(vp->v_interlock);
570	DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %"
571	PRId64 "\n", startoffset));
572	return `0`;
573	}
574
575	ap->a_offlo = startoffset;
576	ap->a_offhi = endoffset;
577
578	/*
579	* If not cleaning, just send the pages through genfs_putpages
580	* to be returned to the pool.
581	*/
582	if (!(ap->a_flags & PGO_CLEANIT)) {
583	DLOG((DLOG_PAGE, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n",
584	vp, (int)ip->i_number, ap->a_flags));
585	int r = genfs_putpages(v);
586	KASSERT(!mutex_owned(vp->v_interlock));
587	return r;
588	}
589
590	/ Set PGO_BUSYFAIL to avoid deadlocks /
591	ap->a_flags \|= PGO_BUSYFAIL;
592
593	/*
594	* Likewise, if we are asked to clean but the pages are not
595	* dirty, we can just free them using genfs_putpages.
596	*/
597	#ifdef DEBUG
598	debug_n_dirtyclean = `0`;
599	#endif
600	do {
601	int r;
602	KASSERT(mutex_owned(vp->v_interlock));
603
604	/ Count the number of dirty pages /
605	r = check_dirty(fs, vp, startoffset, endoffset, blkeof,
606	ap->a_flags, `1`, NULL);
607	if (r < `0`) {
608	/ Pages are busy with another process /
609	mutex_exit(vp->v_interlock);
610	return EDEADLK;
611	}
612	if (r > `0`) / Some pages are dirty /
613	break;
614
615	/*
616	* Sometimes pages are dirtied between the time that
617	* we check and the time we try to clean them.
618	* Instruct lfs_gop_write to return EDEADLK in this case
619	* so we can write them properly.
620	*/
621	ip->i_lfs_iflags \|= LFSI_NO_GOP_WRITE;
622	r = genfs_do_putpages(vp, startoffset, endoffset,
623	ap->a_flags & ~PGO_SYNCIO, &busypg);
624	ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE;
625	if (r != EDEADLK) {
626	KASSERT(!mutex_owned(vp->v_interlock));
627	return r;
628	}
629
630	/ One of the pages was busy. Start over. /
631	mutex_enter(vp->v_interlock);
632	wait_for_page(vp, busypg, "dirtyclean");
633	#ifdef DEBUG
634	++debug_n_dirtyclean;
635	#endif
636	} while(`1`);
637
638	#ifdef DEBUG
639	if (debug_n_dirtyclean > TOOMANY)
640	DLOG((DLOG_PAGE, "lfs_putpages: dirtyclean: looping, n = %d\n",
641	debug_n_dirtyclean));
642	#endif
643
644	/*
645	* Dirty and asked to clean.
646	*
647	* Pagedaemon can't actually write LFS pages; wake up
648	* the writer to take care of that. The writer will
649	* notice the pager inode queue and act on that.
650	*
651	* XXX We must drop the vp->interlock before taking the lfs_lock or we
652	* get a nasty deadlock with lfs_flush_pchain().
653	*/
654	if (pagedaemon) {
655	mutex_exit(vp->v_interlock);
656	mutex_enter(&lfs_lock);
657	if (!(ip->i_flags & IN_PAGING)) {
658	ip->i_flags \|= IN_PAGING;
659	TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain);
660	}
661	wakeup(&lfs_writer_daemon);
662	mutex_exit(&lfs_lock);
663	preempt();
664	KASSERT(!mutex_owned(vp->v_interlock));
665	return EWOULDBLOCK;
666	}
667
668	/*
669	* If this is a file created in a recent dirop, we can't flush its
670	* inode until the dirop is complete. Drain dirops, then flush the
671	* filesystem (taking care of any other pending dirops while we're
672	* at it).
673	*/
674	if ((ap->a_flags & (PGO_CLEANIT\|PGO_LOCKED)) == PGO_CLEANIT &&
675	(vp->v_uflag & VU_DIROP)) {
676	DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n"));
677
678	lfs_writer_enter(fs, "ppdirop");
679
680	/ Note if we hold the vnode locked /
681	if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
682	{
683	DLOG((DLOG_PAGE, "lfs_putpages: dirop inode already locked\n"));
684	} else {
685	DLOG((DLOG_PAGE, "lfs_putpages: dirop inode not locked\n"));
686	}
687	mutex_exit(vp->v_interlock);
688
689	mutex_enter(&lfs_lock);
690	lfs_flush_fs(fs, sync ? SEGM_SYNC : `0`);
691	mutex_exit(&lfs_lock);
692
693	mutex_enter(vp->v_interlock);
694	lfs_writer_leave(fs);
695
696	/ The flush will have cleaned out this vnode as well,*
697	no need to do more to it. /*
698	}
699
700	/*
701	* This is it. We are going to write some pages. From here on
702	* down it's all just mechanics.
703	*
704	* Don't let genfs_putpages wait; lfs_segunlock will wait for us.
705	*/
706	ap->a_flags &= ~PGO_SYNCIO;
707
708	/*
709	* If we've already got the seglock, flush the node and return.
710	* The FIP has already been set up for us by lfs_writefile,
711	* and FIP cleanup and lfs_updatemeta will also be done there,
712	* unless genfs_putpages returns EDEADLK; then we must flush
713	* what we have, and correct FIP and segment header accounting.
714	*/
715	get_seglock:
716	/*
717	* If we are not called with the segment locked, lock it.
718	* Account for a new FIP in the segment header, and set sp->vp.
719	* (This should duplicate the setup at the top of lfs_writefile().)
720	*/
721	seglocked = (ap->a_flags & PGO_LOCKED) != `0`;
722	if (!seglocked) {
723	mutex_exit(vp->v_interlock);
724	error = lfs_seglock(fs, SEGM_PROT \| (sync ? SEGM_SYNC : `0`));
725	if (error != `0`) {
726	KASSERT(!mutex_owned(vp->v_interlock));
727	return error;
728	}
729	mutex_enter(vp->v_interlock);
730	lfs_acquire_finfo(fs, ip->i_number, ip->i_gen);
731	}
732	sp = fs->lfs_sp;
733	KASSERT(sp->vp == NULL);
734	sp->vp = vp;
735
736	/ Note segments written by reclaim; only for debugging /
737	if (vdead_check(vp, VDEAD_NOWAIT) != `0`) {
738	sp->seg_flags \|= SEGM_RECLAIM;
739	fs->lfs_reclino = ip->i_number;
740	}
741
742	/*
743	* Ensure that the partial segment is marked SS_DIROP if this
744	* vnode is a DIROP.
745	*/
746	if (!seglocked && vp->v_uflag & VU_DIROP) {
747	SEGSUM *ssp = sp->segsum;
748
749	lfs_ss_setflags(fs, ssp,
750	lfs_ss_getflags(fs, ssp) \| (SS_DIROP\|SS_CONT));
751	}
752
753	/*
754	* Loop over genfs_putpages until all pages are gathered.
755	* genfs_putpages() drops the interlock, so reacquire it if necessary.
756	* Whenever we lose the interlock we have to rerun check_dirty, as
757	* well, since more pages might have been dirtied in our absence.
758	*/
759	#ifdef DEBUG
760	debug_n_again = `0`;
761	#endif
762	do {
763	busypg = NULL;
764	KASSERT(mutex_owned(vp->v_interlock));
765	if (check_dirty(fs, vp, startoffset, endoffset, blkeof,
766	ap->a_flags, `0`, &busypg) < `0`) {
767	mutex_exit(vp->v_interlock);
768	/ XXX why? --ks /
769	mutex_enter(vp->v_interlock);
770	write_and_wait(fs, vp, busypg, seglocked, NULL);
771	if (!seglocked) {
772	mutex_exit(vp->v_interlock);
773	lfs_release_finfo(fs);
774	lfs_segunlock(fs);
775	mutex_enter(vp->v_interlock);
776	}
777	sp->vp = NULL;
778	goto get_seglock;
779	}
780
781	busypg = NULL;
782	KASSERT(!mutex_owned(&uvm_pageqlock));
783	oreclaim = (ap->a_flags & PGO_RECLAIM);
784	ap->a_flags &= ~PGO_RECLAIM;
785	error = genfs_do_putpages(vp, startoffset, endoffset,
786	ap->a_flags, &busypg);
787	ap->a_flags \|= oreclaim;
788
789	if (error == EDEADLK \|\| error == EAGAIN) {
790	DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned"
791	" %d ino %d off %jx (seg %d)\n", error,
792	ip->i_number, (uintmax_t)lfs_sb_getoffset(fs),
793	lfs_dtosn(fs, lfs_sb_getoffset(fs))));
794
795	if (oreclaim) {
796	mutex_enter(vp->v_interlock);
797	write_and_wait(fs, vp, busypg, seglocked, "again");
798	mutex_exit(vp->v_interlock);
799	} else {
800	if ((sp->seg_flags & SEGM_SINGLE) &&
801	lfs_sb_getcurseg(fs) != fs->lfs_startseg)
802	donewriting = `1`;
803	}
804	} else if (error) {
805	DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned"
806	" %d ino %d off %jx (seg %d)\n", error,
807	(int)ip->i_number, (uintmax_t)lfs_sb_getoffset(fs),
808	lfs_dtosn(fs, lfs_sb_getoffset(fs))));
809	}
810	/ genfs_do_putpages loses the interlock /
811	#ifdef DEBUG
812	++debug_n_again;
813	#endif
814	if (oreclaim && error == EAGAIN) {
815	DLOG((DLOG_PAGE, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n",
816	vp, (int)ip->i_number, vp->v_iflag, ap->a_flags));
817	mutex_enter(vp->v_interlock);
818	}
819	if (error == EDEADLK)
820	mutex_enter(vp->v_interlock);
821	} while (error == EDEADLK \|\| (oreclaim && error == EAGAIN));
822	#ifdef DEBUG
823	if (debug_n_again > TOOMANY)
824	DLOG((DLOG_PAGE, "lfs_putpages: again: looping, n = %d\n", debug_n_again));
825	#endif
826
827	KASSERT(sp != NULL && sp->vp == vp);
828	if (!seglocked && !donewriting) {
829	sp->vp = NULL;
830
831	/ Write indirect blocks as well /
832	lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir);
833	lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir);
834	lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir);
835
836	KASSERT(sp->vp == NULL);
837	sp->vp = vp;
838	}
839
840	/*
841	* Blocks are now gathered into a segment waiting to be written.
842	* All that's left to do is update metadata, and write them.
843	*/
844	lfs_updatemeta(sp);
845	KASSERT(sp->vp == vp);
846	sp->vp = NULL;
847
848	/*
849	* If we were called from lfs_writefile, we don't need to clean up
850	* the FIP or unlock the segment lock. We're done.
851	*/
852	if (seglocked) {
853	KASSERT(!mutex_owned(vp->v_interlock));
854	return error;
855	}
856
857	/ Clean up FIP and send it to disk. /
858	lfs_release_finfo(fs);
859	lfs_writeseg(fs, fs->lfs_sp);
860
861	/*
862	* Remove us from paging queue if we wrote all our pages.
863	*/
864	if (origendoffset == `0` \|\| ap->a_flags & PGO_ALLPAGES) {
865	mutex_enter(&lfs_lock);
866	if (ip->i_flags & IN_PAGING) {
867	ip->i_flags &= ~IN_PAGING;
868	TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
869	}
870	mutex_exit(&lfs_lock);
871	}
872
873	/*
874	* XXX - with the malloc/copy writeseg, the pages are freed by now
875	* even if we don't wait (e.g. if we hold a nested lock). This
876	* will not be true if we stop using malloc/copy.
877	*/
878	KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT);
879	lfs_segunlock(fs);
880
881	/*
882	* Wait for v_numoutput to drop to zero. The seglock should
883	* take care of this, but there is a slight possibility that
884	* aiodoned might not have got around to our buffers yet.
885	*/
886	if (sync) {
887	mutex_enter(vp->v_interlock);
888	while (vp->v_numoutput > `0`) {
889	DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on"
890	" num %d\n", ip->i_number, vp->v_numoutput));
891	cv_wait(&vp->v_cv, vp->v_interlock);
892	}
893	mutex_exit(vp->v_interlock);
894	}
895	KASSERT(!mutex_owned(vp->v_interlock));
896	return error;
897	}
898
899

Browse the source code of src/src/sys/ufs/lfs/lfs_pages.c