lfs_syscalls.c source code [src/src/sys/ufs/lfs/lfs_syscalls.c]

1	/ $NetBSD: lfs_syscalls.c,v 1.172 2015/10/15 06:15:48 dholland Exp $ /
2
3	/-*
4	* Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007, 2008
5	* The NetBSD Foundation, Inc.
6	* All rights reserved.
7	*
8	* This code is derived from software contributed to The NetBSD Foundation
9	* by Konrad E. Schroder <perseant@hhhh.org>.
10	*
11	* Redistribution and use in source and binary forms, with or without
12	* modification, are permitted provided that the following conditions
13	* are met:
14	* 1. Redistributions of source code must retain the above copyright
15	* notice, this list of conditions and the following disclaimer.
16	* 2. Redistributions in binary form must reproduce the above copyright
17	* notice, this list of conditions and the following disclaimer in the
18	* documentation and/or other materials provided with the distribution.
19	*
20	* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30	* POSSIBILITY OF SUCH DAMAGE.
31	*/
32	/-*
33	* Copyright (c) 1991, 1993, 1994
34	* The Regents of the University of California. All rights reserved.
35	*
36	* Redistribution and use in source and binary forms, with or without
37	* modification, are permitted provided that the following conditions
38	* are met:
39	* 1. Redistributions of source code must retain the above copyright
40	* notice, this list of conditions and the following disclaimer.
41	* 2. Redistributions in binary form must reproduce the above copyright
42	* notice, this list of conditions and the following disclaimer in the
43	* documentation and/or other materials provided with the distribution.
44	* 3. Neither the name of the University nor the names of its contributors
45	* may be used to endorse or promote products derived from this software
46	* without specific prior written permission.
47	*
48	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58	* SUCH DAMAGE.
59	*
60	* @(#)lfs_syscalls.c 8.10 (Berkeley) 5/14/95
61	*/
62
63	#include <sys/cdefs.h>
64	__KERNEL_RCSID(`0`, "$NetBSD: lfs_syscalls.c,v 1.172 2015/10/15 06:15:48 dholland Exp $");
65
66	#ifndef LFS
67	# define LFS /* for prototypes in syscallargs.h */
68	#endif
69
70	#include <sys/param.h>
71	#include <sys/systm.h>
72	#include <sys/proc.h>
73	#include <sys/buf.h>
74	#include <sys/mount.h>
75	#include <sys/vnode.h>
76	#include <sys/kernel.h>
77	#include <sys/kauth.h>
78	#include <sys/syscallargs.h>
79
80	#include <ufs/lfs/ulfs_inode.h>
81	#include <ufs/lfs/ulfsmount.h>
82	#include <ufs/lfs/ulfs_extern.h>
83
84	#include <ufs/lfs/lfs.h>
85	#include <ufs/lfs/lfs_accessors.h>
86	#include <ufs/lfs/lfs_kernel.h>
87	#include <ufs/lfs/lfs_extern.h>
88
89	static int lfs_fastvget(struct mount , ino_t, BLOCK_INFO , int,
90	struct vnode **);
91	static struct buf lfs_fakebuf(struct* lfs , struct* vnode *, daddr_t,
92	size_t, void *);
93
94	/*
95	* sys_lfs_markv:
96	*
97	* This will mark inodes and blocks dirty, so they are written into the log.
98	* It will block until all the blocks have been written. The segment create
99	* time passed in the block_info and inode_info structures is used to decide
100	* if the data is valid for each block (in case some process dirtied a block
101	* or inode that is being cleaned between the determination that a block is
102	* live and the lfs_markv call).
103	*
104	* 0 on success
105	* -1/errno is return on error.
106	*/
107	#ifdef USE_64BIT_SYSCALLS
108	int
109	sys_lfs_markv(struct lwp l, const* struct sys_lfs_markv_args uap, register_t retval)
110	{
111	/ {*
112	syscallarg(fsid_t ) fsidp;*
113	syscallarg(struct block_info ) blkiov;*
114	syscallarg(int) blkcnt;
115	} /*
116	BLOCK_INFO *blkiov;
117	int blkcnt, error;
118	fsid_t fsid;
119	struct lfs *fs;
120	struct mount *mntp;
121
122	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != `0`)
123	return (error);
124
125	if ((mntp = vfs_getvfs(&fsid)) == NULL)
126	return (ENOENT);
127	fs = VFSTOULFS(mntp)->um_lfs;
128
129	blkcnt = SCARG(uap, blkcnt);
130	if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
131	return (EINVAL);
132
133	KERNEL_LOCK(`1`, NULL);
134	blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
135	if ((error = copyin(SCARG(uap, blkiov), blkiov,
136	blkcnt * sizeof(BLOCK_INFO))) != `0`)
137	goto out;
138
139	if ((error = lfs_markv(l, &fsid, blkiov, blkcnt)) == `0`)
140	copyout(blkiov, SCARG(uap, blkiov),
141	blkcnt * sizeof(BLOCK_INFO));
142	out:
143	lfs_free(fs, blkiov, LFS_NB_BLKIOV);
144	KERNEL_UNLOCK_ONE(NULL);
145	return error;
146	}
147	#else
148	int
149	sys_lfs_markv(struct lwp l, const* struct sys_lfs_markv_args uap, register_t retval)
150	{
151	/ {*
152	syscallarg(fsid_t ) fsidp;*
153	syscallarg(struct block_info ) blkiov;*
154	syscallarg(int) blkcnt;
155	} /*
156	BLOCK_INFO *blkiov;
157	BLOCK_INFO_15 *blkiov15;
158	int i, blkcnt, error;
159	fsid_t fsid;
160	struct lfs *fs;
161	struct mount *mntp;
162
163	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != `0`)
164	return (error);
165
166	if ((mntp = vfs_getvfs(&fsid)) == NULL)
167	return (ENOENT);
168	fs = VFSTOULFS(mntp)->um_lfs;
169
170	blkcnt = SCARG(uap, blkcnt);
171	if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
172	return (EINVAL);
173
174	KERNEL_LOCK(`1`, NULL);
175	blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
176	blkiov15 = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO_15), LFS_NB_BLKIOV);
177	if ((error = copyin(SCARG(uap, blkiov), blkiov15,
178	blkcnt * sizeof(BLOCK_INFO_15))) != `0`)
179	goto out;
180
181	for (i = `0`; i < blkcnt; i++) {
182	blkiov[i].bi_inode = blkiov15[i].bi_inode;
183	blkiov[i].bi_lbn = blkiov15[i].bi_lbn;
184	blkiov[i].bi_daddr = blkiov15[i].bi_daddr;
185	blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
186	blkiov[i].bi_version = blkiov15[i].bi_version;
187	blkiov[i].bi_bp = blkiov15[i].bi_bp;
188	blkiov[i].bi_size = blkiov15[i].bi_size;
189	}
190
191	if ((error = lfs_markv(l, &fsid, blkiov, blkcnt)) == `0`) {
192	for (i = `0`; i < blkcnt; i++) {
193	blkiov15[i].bi_inode = blkiov[i].bi_inode;
194	blkiov15[i].bi_lbn = blkiov[i].bi_lbn;
195	blkiov15[i].bi_daddr = blkiov[i].bi_daddr;
196	blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
197	blkiov15[i].bi_version = blkiov[i].bi_version;
198	blkiov15[i].bi_bp = blkiov[i].bi_bp;
199	blkiov15[i].bi_size = blkiov[i].bi_size;
200	}
201	copyout(blkiov15, SCARG(uap, blkiov),
202	blkcnt * sizeof(BLOCK_INFO_15));
203	}
204	out:
205	lfs_free(fs, blkiov, LFS_NB_BLKIOV);
206	lfs_free(fs, blkiov15, LFS_NB_BLKIOV);
207	KERNEL_UNLOCK_ONE(NULL);
208	return error;
209	}
210	#endif
211
212	#define LFS_MARKV_MAX_BLOCKS (LFS_MAX_BUFS)
213
214	int
215	lfs_markv(struct lwp l, fsid_t fsidp, BLOCK_INFO *blkiov,
216	int blkcnt)
217	{
218	BLOCK_INFO *blkp;
219	IFILE *ifp;
220	struct buf *bp;
221	struct inode *ip = NULL;
222	struct lfs *fs;
223	struct mount *mntp;
224	struct ulfsmount *ump;
225	struct vnode *vp;
226	ino_t lastino;
227	daddr_t b_daddr;
228	int cnt, error;
229	int do_again = `0`;
230	int numrefed = `0`;
231	ino_t maxino;
232	size_t obsize;
233
234	/ number of blocks/inodes that we have already bwrite'ed /
235	int nblkwritten, ninowritten;
236
237	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
238	KAUTH_REQ_SYSTEM_LFS_MARKV, NULL, NULL, NULL);
239	if (error)
240	return (error);
241
242	if ((mntp = vfs_getvfs(fsidp)) == NULL)
243	return (ENOENT);
244
245	ump = VFSTOULFS(mntp);
246	fs = ump->um_lfs;
247
248	if (fs->lfs_ronly)
249	return EROFS;
250
251	maxino = (lfs_fragstoblks(fs, lfs_dino_getblocks(fs, VTOI(fs->lfs_ivnode)->i_din)) -
252	lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
253
254	cnt = blkcnt;
255
256	if ((error = vfs_busy(mntp, NULL)) != `0`)
257	return (error);
258
259	/*
260	* This seglock is just to prevent the fact that we might have to sleep
261	* from allowing the possibility that our blocks might become
262	* invalid.
263	*
264	* It is also important to note here that unless we specify SEGM_CKP,
265	* any Ifile blocks that we might be asked to clean will never get
266	* to the disk.
267	*/
268	lfs_seglock(fs, SEGM_CLEAN \| SEGM_CKP \| SEGM_SYNC);
269
270	/ Mark blocks/inodes dirty. /
271	error = `0`;
272
273	/ these were inside the initialization for the for loop /
274	vp = NULL;
275	lastino = LFS_UNUSED_INUM;
276	nblkwritten = ninowritten = `0`;
277	for (blkp = blkiov; cnt--; ++blkp)
278	{
279	/ Bounds-check incoming data, avoid panic for failed VGET /
280	if (blkp->bi_inode <= `0` \|\| blkp->bi_inode >= maxino) {
281	error = EINVAL;
282	goto err3;
283	}
284	/*
285	* Get the IFILE entry (only once) and see if the file still
286	* exists.
287	*/
288	if (lastino != blkp->bi_inode) {
289	/*
290	* Finish the old file, if there was one.
291	*/
292	if (vp != NULL) {
293	vput(vp);
294	vp = NULL;
295	numrefed--;
296	}
297
298	/*
299	* Start a new file
300	*/
301	lastino = blkp->bi_inode;
302
303	/ Get the vnode/inode. /
304	error = lfs_fastvget(mntp, blkp->bi_inode, blkp,
305	LK_EXCLUSIVE \| LK_NOWAIT, &vp);
306	if (error) {
307	DLOG((DLOG_CLEAN, "lfs_markv: lfs_fastvget"
308	" failed with %d (ino %d, segment %d)\n",
309	error, blkp->bi_inode,
310	lfs_dtosn(fs, blkp->bi_daddr)));
311	/*
312	* If we got EAGAIN, that means that the
313	* Inode was locked. This is
314	* recoverable: just clean the rest of
315	* this segment, and let the cleaner try
316	* again with another. (When the
317	* cleaner runs again, this segment will
318	* sort high on the list, since it is
319	* now almost entirely empty.)
320	*/
321	if (error == EAGAIN) {
322	error = `0`;
323	do_again++;
324	} else
325	KASSERT(error == ENOENT);
326	KASSERT(vp == NULL);
327	ip = NULL;
328	continue;
329	}
330
331	ip = VTOI(vp);
332	numrefed++;
333	ninowritten++;
334	} else if (vp == NULL) {
335	/*
336	* This can only happen if the vnode is dead (or
337	* in any case we can't get it...e.g., it is
338	* inlocked). Keep going.
339	*/
340	continue;
341	}
342
343	/ Past this point we are guaranteed that vp, ip are valid. /
344
345	/ Can't clean VU_DIROP directories in case of truncation /
346	/ XXX - maybe we should mark removed dirs specially? /
347	if (vp->v_type == VDIR && (vp->v_uflag & VU_DIROP)) {
348	do_again++;
349	continue;
350	}
351
352	/ If this BLOCK_INFO didn't contain a block, keep going. /
353	if (blkp->bi_lbn == LFS_UNUSED_LBN) {
354	/ XXX need to make sure that the inode gets written in this case /
355	/ XXX but only write the inode if it's the right one /
356	if (blkp->bi_inode != LFS_IFILE_INUM) {
357	LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
358	if (lfs_if_getdaddr(fs, ifp) == blkp->bi_daddr) {
359	mutex_enter(&lfs_lock);
360	LFS_SET_UINO(ip, IN_CLEANING);
361	mutex_exit(&lfs_lock);
362	}
363	brelse(bp, `0`);
364	}
365	continue;
366	}
367
368	b_daddr = `0`;
369	if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) \|\|
370	LFS_DBTOFSB(fs, b_daddr) != blkp->bi_daddr)
371	{
372	if (lfs_dtosn(fs, LFS_DBTOFSB(fs, b_daddr)) ==
373	lfs_dtosn(fs, blkp->bi_daddr))
374	{
375	DLOG((DLOG_CLEAN, "lfs_markv: wrong da same seg: %jx vs %jx\n",
376	(intmax_t)blkp->bi_daddr, (intmax_t)LFS_DBTOFSB(fs, b_daddr)));
377	}
378	do_again++;
379	continue;
380	}
381
382	/*
383	* Check block sizes. The blocks being cleaned come from
384	* disk, so they should have the same size as their on-disk
385	* counterparts.
386	*/
387	if (blkp->bi_lbn >= `0`)
388	obsize = lfs_blksize(fs, ip, blkp->bi_lbn);
389	else
390	obsize = lfs_sb_getbsize(fs);
391	/ Check for fragment size change /
392	if (blkp->bi_lbn >= `0` && blkp->bi_lbn < ULFS_NDADDR) {
393	obsize = ip->i_lfs_fragsize[blkp->bi_lbn];
394	}
395	if (obsize != blkp->bi_size) {
396	DLOG((DLOG_CLEAN, "lfs_markv: ino %d lbn %jd wrong"
397	" size (%ld != %d), try again\n",
398	blkp->bi_inode, (intmax_t)blkp->bi_lbn,
399	(long) obsize, blkp->bi_size));
400	do_again++;
401	continue;
402	}
403
404	/*
405	* If we get to here, then we are keeping the block. If
406	* it is an indirect block, we want to actually put it
407	* in the buffer cache so that it can be updated in the
408	* finish_meta section. If it's not, we need to
409	* allocate a fake buffer so that writeseg can perform
410	* the copyin and write the buffer.
411	*/
412	if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= `0`) {
413	/ Data Block /
414	bp = lfs_fakebuf(fs, vp, blkp->bi_lbn,
415	blkp->bi_size, blkp->bi_bp);
416	/ Pretend we used bread() to get it /
417	bp->b_blkno = LFS_FSBTODB(fs, blkp->bi_daddr);
418	} else {
419	/ Indirect block or ifile /
420	if (blkp->bi_size != lfs_sb_getbsize(fs) &&
421	ip->i_number != LFS_IFILE_INUM)
422	panic("lfs_markv: partial indirect block?"
423	" size=%d\n", blkp->bi_size);
424	bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, `0`, `0`);
425	if (!(bp->b_oflags & (BO_DONE\|BO_DELWRI))) {
426	/*
427	* The block in question was not found
428	* in the cache; i.e., the block that
429	* getblk() returned is empty. So, we
430	* can (and should) copy in the
431	* contents, because we've already
432	* determined that this was the right
433	* version of this block on disk.
434	*
435	* And, it can't have changed underneath
436	* us, because we have the segment lock.
437	*/
438	error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size);
439	if (error)
440	goto err2;
441	}
442	}
443	if ((error = lfs_bwrite_ext(bp, BW_CLEAN)) != `0`)
444	goto err2;
445
446	nblkwritten++;
447	/*
448	* XXX should account indirect blocks and ifile pages as well
449	*/
450	if (nblkwritten + lfs_lblkno(fs, ninowritten * DINOSIZE(fs))
451	> LFS_MARKV_MAX_BLOCKS) {
452	DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos\n",
453	nblkwritten, ninowritten));
454	lfs_segwrite(mntp, SEGM_CLEAN);
455	nblkwritten = ninowritten = `0`;
456	}
457	}
458
459	/*
460	* Finish the old file, if there was one
461	*/
462	if (vp != NULL) {
463	vput(vp);
464	vp = NULL;
465	numrefed--;
466	}
467
468	#ifdef DIAGNOSTIC
469	if (numrefed != `0`)
470	panic("lfs_markv: numrefed=%d", numrefed);
471	#endif
472	DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos (check point)\n",
473	nblkwritten, ninowritten));
474
475	/*
476	* The last write has to be SEGM_SYNC, because of calling semantics.
477	* It also has to be SEGM_CKP, because otherwise we could write
478	* over the newly cleaned data contained in a checkpoint, and then
479	* we'd be unhappy at recovery time.
480	*/
481	lfs_segwrite(mntp, SEGM_CLEAN \| SEGM_CKP \| SEGM_SYNC);
482
483	lfs_segunlock(fs);
484
485	vfs_unbusy(mntp, false, NULL);
486	if (error)
487	return (error);
488	else if (do_again)
489	return EAGAIN;
490
491	return `0`;
492
493	err2:
494	DLOG((DLOG_CLEAN, "lfs_markv err2\n"));
495
496	/*
497	* XXX we're here because copyin() failed.
498	* XXX it means that we can't trust the cleanerd. too bad.
499	* XXX how can we recover from this?
500	*/
501
502	err3:
503	/*
504	* XXX should do segwrite here anyway?
505	*/
506
507	if (vp != NULL) {
508	vput(vp);
509	vp = NULL;
510	--numrefed;
511	}
512
513	lfs_segunlock(fs);
514	vfs_unbusy(mntp, false, NULL);
515	#ifdef DIAGNOSTIC
516	if (numrefed != `0`)
517	panic("lfs_markv: numrefed=%d", numrefed);
518	#endif
519
520	return (error);
521	}
522
523	/*
524	* sys_lfs_bmapv:
525	*
526	* This will fill in the current disk address for arrays of blocks.
527	*
528	* 0 on success
529	* -1/errno is return on error.
530	*/
531	#ifdef USE_64BIT_SYSCALLS
532	int
533	sys_lfs_bmapv(struct lwp l, const* struct sys_lfs_bmapv_args uap, register_t retval)
534	{
535	/ {*
536	syscallarg(fsid_t ) fsidp;*
537	syscallarg(struct block_info ) blkiov;*
538	syscallarg(int) blkcnt;
539	} /*
540	BLOCK_INFO *blkiov;
541	int blkcnt, error;
542	fsid_t fsid;
543	struct lfs *fs;
544	struct mount *mntp;
545
546	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != `0`)
547	return (error);
548
549	if ((mntp = vfs_getvfs(&fsid)) == NULL)
550	return (ENOENT);
551	fs = VFSTOULFS(mntp)->um_lfs;
552
553	blkcnt = SCARG(uap, blkcnt);
554	#if SIZE_T_MAX <= UINT_MAX
555	if ((u_int) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
556	return (EINVAL);
557	#endif
558	KERNEL_LOCK(`1`, NULL);
559	blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
560	if ((error = copyin(SCARG(uap, blkiov), blkiov,
561	blkcnt * sizeof(BLOCK_INFO))) != `0`)
562	goto out;
563
564	if ((error = lfs_bmapv(l, &fsid, blkiov, blkcnt)) == `0`)
565	copyout(blkiov, SCARG(uap, blkiov),
566	blkcnt * sizeof(BLOCK_INFO));
567	out:
568	lfs_free(fs, blkiov, LFS_NB_BLKIOV);
569	KERNEL_UNLOCK_ONE(NULL);
570	return error;
571	}
572	#else
573	int
574	sys_lfs_bmapv(struct lwp l, const* struct sys_lfs_bmapv_args uap, register_t retval)
575	{
576	/ {*
577	syscallarg(fsid_t ) fsidp;*
578	syscallarg(struct block_info ) blkiov;*
579	syscallarg(int) blkcnt;
580	} /*
581	BLOCK_INFO *blkiov;
582	BLOCK_INFO_15 *blkiov15;
583	int i, blkcnt, error;
584	fsid_t fsid;
585	struct lfs *fs;
586	struct mount *mntp;
587
588	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != `0`)
589	return (error);
590
591	if ((mntp = vfs_getvfs(&fsid)) == NULL)
592	return (ENOENT);
593	fs = VFSTOULFS(mntp)->um_lfs;
594
595	blkcnt = SCARG(uap, blkcnt);
596	if ((size_t) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
597	return (EINVAL);
598	KERNEL_LOCK(`1`, NULL);
599	blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
600	blkiov15 = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO_15), LFS_NB_BLKIOV);
601	if ((error = copyin(SCARG(uap, blkiov), blkiov15,
602	blkcnt * sizeof(BLOCK_INFO_15))) != `0`)
603	goto out;
604
605	for (i = `0`; i < blkcnt; i++) {
606	blkiov[i].bi_inode = blkiov15[i].bi_inode;
607	blkiov[i].bi_lbn = blkiov15[i].bi_lbn;
608	blkiov[i].bi_daddr = blkiov15[i].bi_daddr;
609	blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
610	blkiov[i].bi_version = blkiov15[i].bi_version;
611	blkiov[i].bi_bp = blkiov15[i].bi_bp;
612	blkiov[i].bi_size = blkiov15[i].bi_size;
613	}
614
615	if ((error = lfs_bmapv(l, &fsid, blkiov, blkcnt)) == `0`) {
616	for (i = `0`; i < blkcnt; i++) {
617	blkiov15[i].bi_inode = blkiov[i].bi_inode;
618	blkiov15[i].bi_lbn = blkiov[i].bi_lbn;
619	blkiov15[i].bi_daddr = blkiov[i].bi_daddr;
620	blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
621	blkiov15[i].bi_version = blkiov[i].bi_version;
622	blkiov15[i].bi_bp = blkiov[i].bi_bp;
623	blkiov15[i].bi_size = blkiov[i].bi_size;
624	}
625	copyout(blkiov15, SCARG(uap, blkiov),
626	blkcnt * sizeof(BLOCK_INFO_15));
627	}
628	out:
629	lfs_free(fs, blkiov, LFS_NB_BLKIOV);
630	lfs_free(fs, blkiov15, LFS_NB_BLKIOV);
631	KERNEL_UNLOCK_ONE(NULL);
632	return error;
633	}
634	#endif
635
636	int
637	lfs_bmapv(struct lwp l, fsid_t fsidp, BLOCK_INFO blkiov, int* blkcnt)
638	{
639	BLOCK_INFO *blkp;
640	IFILE *ifp;
641	struct buf *bp;
642	struct inode *ip = NULL;
643	struct lfs *fs;
644	struct mount *mntp;
645	struct ulfsmount *ump;
646	struct vnode *vp;
647	ino_t lastino;
648	daddr_t v_daddr;
649	int cnt, error;
650	int numrefed = `0`;
651
652	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
653	KAUTH_REQ_SYSTEM_LFS_BMAPV, NULL, NULL, NULL);
654	if (error)
655	return (error);
656
657	if ((mntp = vfs_getvfs(fsidp)) == NULL)
658	return (ENOENT);
659
660	if ((error = vfs_busy(mntp, NULL)) != `0`)
661	return (error);
662
663	ump = VFSTOULFS(mntp);
664	fs = ump->um_lfs;
665
666	if (fs->lfs_cleaner_thread == NULL)
667	fs->lfs_cleaner_thread = curlwp;
668	KASSERT(fs->lfs_cleaner_thread == curlwp);
669
670	cnt = blkcnt;
671
672	error = `0`;
673
674	/ these were inside the initialization for the for loop /
675	vp = NULL;
676	v_daddr = LFS_UNUSED_DADDR;
677	lastino = LFS_UNUSED_INUM;
678	for (blkp = blkiov; cnt--; ++blkp)
679	{
680	/*
681	* Get the IFILE entry (only once) and see if the file still
682	* exists.
683	*/
684	if (lastino != blkp->bi_inode) {
685	/*
686	* Finish the old file, if there was one.
687	*/
688	if (vp != NULL) {
689	vput(vp);
690	vp = NULL;
691	numrefed--;
692	}
693
694	/*
695	* Start a new file
696	*/
697	lastino = blkp->bi_inode;
698	if (blkp->bi_inode == LFS_IFILE_INUM)
699	v_daddr = lfs_sb_getidaddr(fs);
700	else {
701	LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
702	v_daddr = lfs_if_getdaddr(fs, ifp);
703	brelse(bp, `0`);
704	}
705	if (v_daddr == LFS_UNUSED_DADDR) {
706	blkp->bi_daddr = LFS_UNUSED_DADDR;
707	continue;
708	}
709	error = lfs_fastvget(mntp, blkp->bi_inode, NULL,
710	LK_SHARED, &vp);
711	if (error) {
712	DLOG((DLOG_CLEAN, "lfs_bmapv: lfs_fastvget ino"
713	"%d failed with %d",
714	blkp->bi_inode,error));
715	KASSERT(vp == NULL);
716	continue;
717	} else {
718	KASSERT(VOP_ISLOCKED(vp));
719	numrefed++;
720	}
721	ip = VTOI(vp);
722	} else if (vp == NULL) {
723	/*
724	* This can only happen if the vnode is dead.
725	* Keep going. Note that we DO NOT set the
726	* bi_addr to anything -- if we failed to get
727	* the vnode, for example, we want to assume
728	* conservatively that all of its blocks are
729	* located in the segment in question.
730	* lfs_markv will throw them out if we are
731	* wrong.
732	*/
733	continue;
734	}
735
736	/ Past this point we are guaranteed that vp, ip are valid. /
737
738	if (blkp->bi_lbn == LFS_UNUSED_LBN) {
739	/*
740	* We just want the inode address, which is
741	* conveniently in v_daddr.
742	*/
743	blkp->bi_daddr = v_daddr;
744	} else {
745	daddr_t bi_daddr;
746
747	error = VOP_BMAP(vp, blkp->bi_lbn, NULL,
748	&bi_daddr, NULL);
749	if (error)
750	{
751	blkp->bi_daddr = LFS_UNUSED_DADDR;
752	continue;
753	}
754	blkp->bi_daddr = LFS_DBTOFSB(fs, bi_daddr);
755	/ Fill in the block size, too /
756	if (blkp->bi_lbn >= `0`)
757	blkp->bi_size = lfs_blksize(fs, ip, blkp->bi_lbn);
758	else
759	blkp->bi_size = lfs_sb_getbsize(fs);
760	}
761	}
762
763	/*
764	* Finish the old file, if there was one.
765	*/
766	if (vp != NULL) {
767	vput(vp);
768	vp = NULL;
769	numrefed--;
770	}
771
772	#ifdef DIAGNOSTIC
773	if (numrefed != `0`)
774	panic("lfs_bmapv: numrefed=%d", numrefed);
775	#endif
776
777	vfs_unbusy(mntp, false, NULL);
778
779	return `0`;
780	}
781
782	/*
783	* sys_lfs_segclean:
784	*
785	* Mark the segment clean.
786	*
787	* 0 on success
788	* -1/errno is return on error.
789	*/
790	int
791	sys_lfs_segclean(struct lwp l, const* struct sys_lfs_segclean_args uap, register_t retval)
792	{
793	/ {*
794	syscallarg(fsid_t ) fsidp;*
795	syscallarg(u_long) segment;
796	} /*
797	struct lfs *fs;
798	struct mount *mntp;
799	fsid_t fsid;
800	int error;
801	unsigned long segnum;
802
803	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
804	KAUTH_REQ_SYSTEM_LFS_SEGCLEAN, NULL, NULL, NULL);
805	if (error)
806	return (error);
807
808	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != `0`)
809	return (error);
810	if ((mntp = vfs_getvfs(&fsid)) == NULL)
811	return (ENOENT);
812
813	fs = VFSTOULFS(mntp)->um_lfs;
814	segnum = SCARG(uap, segment);
815
816	if ((error = vfs_busy(mntp, NULL)) != `0`)
817	return (error);
818
819	KERNEL_LOCK(`1`, NULL);
820	lfs_seglock(fs, SEGM_PROT);
821	error = lfs_do_segclean(fs, segnum);
822	lfs_segunlock(fs);
823	KERNEL_UNLOCK_ONE(NULL);
824	vfs_unbusy(mntp, false, NULL);
825	return error;
826	}
827
828	/*
829	* Actually mark the segment clean.
830	* Must be called with the segment lock held.
831	*/
832	int
833	lfs_do_segclean(struct lfs fs, unsigned* long segnum)
834	{
835	extern int lfs_dostats;
836	struct buf *bp;
837	CLEANERINFO *cip;
838	SEGUSE *sup;
839
840	if (lfs_dtosn(fs, lfs_sb_getcurseg(fs)) == segnum) {
841	return (EBUSY);
842	}
843
844	LFS_SEGENTRY(sup, fs, segnum, bp);
845	if (sup->su_nbytes) {
846	DLOG((DLOG_CLEAN, "lfs_segclean: not cleaning segment %lu:"
847	" %d live bytes\n", segnum, sup->su_nbytes));
848	brelse(bp, `0`);
849	return (EBUSY);
850	}
851	if (sup->su_flags & SEGUSE_ACTIVE) {
852	DLOG((DLOG_CLEAN, "lfs_segclean: not cleaning segment %lu:"
853	" segment is active\n", segnum));
854	brelse(bp, `0`);
855	return (EBUSY);
856	}
857	if (!(sup->su_flags & SEGUSE_DIRTY)) {
858	DLOG((DLOG_CLEAN, "lfs_segclean: not cleaning segment %lu:"
859	" segment is already clean\n", segnum));
860	brelse(bp, `0`);
861	return (EALREADY);
862	}
863
864	lfs_sb_addavail(fs, lfs_segtod(fs, `1`));
865	if (sup->su_flags & SEGUSE_SUPERBLOCK)
866	lfs_sb_subavail(fs, lfs_btofsb(fs, LFS_SBPAD));
867	if (lfs_sb_getversion(fs) > `1` && segnum == `0` &&
868	lfs_sb_gets0addr(fs) < lfs_btofsb(fs, LFS_LABELPAD))
869	lfs_sb_subavail(fs, lfs_btofsb(fs, LFS_LABELPAD) - lfs_sb_gets0addr(fs));
870	mutex_enter(&lfs_lock);
871	lfs_sb_addbfree(fs, sup->su_nsums * lfs_btofsb(fs, lfs_sb_getsumsize(fs)) +
872	lfs_btofsb(fs, sup->su_ninos * lfs_sb_getibsize(fs)));
873	lfs_sb_subdmeta(fs, sup->su_nsums * lfs_btofsb(fs, lfs_sb_getsumsize(fs)) +
874	lfs_btofsb(fs, sup->su_ninos * lfs_sb_getibsize(fs)));
875	if (lfs_sb_getdmeta(fs) < `0`)
876	lfs_sb_setdmeta(fs, `0`);
877	mutex_exit(&lfs_lock);
878	sup->su_flags &= ~SEGUSE_DIRTY;
879	LFS_WRITESEGENTRY(sup, fs, segnum, bp);
880
881	LFS_CLEANERINFO(cip, fs, bp);
882	lfs_ci_shiftdirtytoclean(fs, cip, `1`);
883	lfs_sb_setnclean(fs, lfs_ci_getclean(fs, cip));
884	mutex_enter(&lfs_lock);
885	lfs_ci_setbfree(fs, cip, lfs_sb_getbfree(fs));
886	lfs_ci_setavail(fs, cip, lfs_sb_getavail(fs)
887	- fs->lfs_ravail - fs->lfs_favail);
888	wakeup(&fs->lfs_availsleep);
889	mutex_exit(&lfs_lock);
890	(void) LFS_BWRITE_LOG(bp);
891
892	if (lfs_dostats)
893	++lfs_stats.segs_reclaimed;
894
895	return (`0`);
896	}
897
898	/*
899	* This will block until a segment in file system fsid is written. A timeout
900	* in milliseconds may be specified which will awake the cleaner automatically.
901	* An fsid of -1 means any file system, and a timeout of 0 means forever.
902	*/
903	int
904	lfs_segwait(fsid_t fsidp, struct* timeval *tv)
905	{
906	struct mount *mntp;
907	void *addr;
908	u_long timeout;
909	int error;
910
911	KERNEL_LOCK(`1`, NULL);
912	if (fsidp == NULL \|\| (mntp = vfs_getvfs(fsidp)) == NULL)
913	addr = &lfs_allclean_wakeup;
914	else
915	addr = &VFSTOULFS(mntp)->um_lfs->lfs_nextsegsleep;
916	/*
917	* XXX THIS COULD SLEEP FOREVER IF TIMEOUT IS {0,0}!
918	* XXX IS THAT WHAT IS INTENDED?
919	*/
920	timeout = tvtohz(tv);
921	error = tsleep(addr, PCATCH \| PVFS, "segment", timeout);
922	KERNEL_UNLOCK_ONE(NULL);
923	return (error == ERESTART ? EINTR : `0`);
924	}
925
926	/*
927	* sys_lfs_segwait:
928	*
929	* System call wrapper around lfs_segwait().
930	*
931	* 0 on success
932	* 1 on timeout
933	* -1/errno is return on error.
934	*/
935	int
936	sys___lfs_segwait50(struct lwp l, const* struct sys___lfs_segwait50_args *uap,
937	register_t *retval)
938	{
939	/ {*
940	syscallarg(fsid_t ) fsidp;*
941	syscallarg(struct timeval ) tv;*
942	} /*
943	struct timeval atv;
944	fsid_t fsid;
945	int error;
946
947	/ XXX need we be su to segwait? /
948	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
949	KAUTH_REQ_SYSTEM_LFS_SEGWAIT, NULL, NULL, NULL);
950	if (error)
951	return (error);
952	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != `0`)
953	return (error);
954
955	if (SCARG(uap, tv)) {
956	error = copyin(SCARG(uap, tv), &atv, sizeof(struct timeval));
957	if (error)
958	return (error);
959	if (itimerfix(&atv))
960	return (EINVAL);
961	} else / NULL or invalid /
962	atv.tv_sec = atv.tv_usec = `0`;
963	return lfs_segwait(&fsid, &atv);
964	}
965
966	/*
967	* VFS_VGET call specialized for the cleaner. If the cleaner is
968	* processing IINFO structures, it may have the ondisk inode already, so
969	* don't go retrieving it again.
970	*
971	* Return the vnode referenced and locked.
972	*/
973
974	static int
975	lfs_fastvget(struct mount mp, ino_t ino, BLOCK_INFO blkp, int lk_flags,
976	struct vnode **vpp)
977	{
978	struct ulfsmount *ump;
979	struct lfs *fs;
980	int error;
981
982	ump = VFSTOULFS(mp);
983	fs = ump->um_lfs;
984	fs->lfs_cleaner_hint = blkp;
985	error = vcache_get(mp, &ino, sizeof(ino), vpp);
986	fs->lfs_cleaner_hint = NULL;
987	if (error)
988	return error;
989	error = vn_lock(*vpp, lk_flags);
990	if (error) {
991	if (error == EBUSY)
992	error = EAGAIN;
993	vrele(*vpp);
994	*vpp = NULL;
995	return error;
996	}
997
998	return `0`;
999	}
1000
1001	/*
1002	* Make up a "fake" cleaner buffer, copy the data from userland into it.
1003	*/
1004	static struct buf *
1005	lfs_fakebuf(struct lfs fs, struct* vnode vp, daddr_t lbn, size_t size, void* *uaddr)
1006	{
1007	struct buf *bp;
1008	int error;
1009
1010	KASSERT(VTOI(vp)->i_number != LFS_IFILE_INUM);
1011
1012	bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size, LFS_NB_CLEAN);
1013	error = copyin(uaddr, bp->b_data, size);
1014	if (error) {
1015	lfs_freebuf(fs, bp);
1016	return NULL;
1017	}
1018	KDASSERT(bp->b_iodone == lfs_callback);
1019
1020	#if 0
1021	mutex_enter(&lfs_lock);
1022	++fs->lfs_iocount;
1023	mutex_exit(&lfs_lock);
1024	#endif
1025	bp->b_bufsize = size;
1026	bp->b_bcount = size;
1027	return (bp);
1028	}
1029

Browse the source code of src/src/sys/ufs/lfs/lfs_syscalls.c

Definitions