vfs_lockf.c source code [src/src/sys/kern/vfs_lockf.c]

1	/ $NetBSD: vfs_lockf.c,v 1.73 2011/01/31 08:25:32 dholland Exp $ /
2
3	/*
4	* Copyright (c) 1982, 1986, 1989, 1993
5	* The Regents of the University of California. All rights reserved.
6	*
7	* This code is derived from software contributed to Berkeley by
8	* Scooter Morris at Genentech Inc.
9	*
10	* Redistribution and use in source and binary forms, with or without
11	* modification, are permitted provided that the following conditions
12	* are met:
13	* 1. Redistributions of source code must retain the above copyright
14	* notice, this list of conditions and the following disclaimer.
15	* 2. Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	* 3. Neither the name of the University nor the names of its contributors
19	* may be used to endorse or promote products derived from this software
20	* without specific prior written permission.
21	*
22	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32	* SUCH DAMAGE.
33	*
34	* @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94
35	*/
36
37	#include <sys/cdefs.h>
38	__KERNEL_RCSID(`0`, "$NetBSD: vfs_lockf.c,v 1.73 2011/01/31 08:25:32 dholland Exp $");
39
40	#include <sys/param.h>
41	#include <sys/systm.h>
42	#include <sys/kernel.h>
43	#include <sys/file.h>
44	#include <sys/proc.h>
45	#include <sys/vnode.h>
46	#include <sys/pool.h>
47	#include <sys/fcntl.h>
48	#include <sys/lockf.h>
49	#include <sys/atomic.h>
50	#include <sys/kauth.h>
51	#include <sys/uidinfo.h>
52
53	/*
54	* The lockf structure is a kernel structure which contains the information
55	* associated with a byte range lock. The lockf structures are linked into
56	* the vnode structure. Locks are sorted by the starting byte of the lock for
57	* efficiency.
58	*
59	* lf_next is used for two purposes, depending on whether the lock is
60	* being held, or is in conflict with an existing lock. If this lock
61	* is held, it indicates the next lock on the same vnode.
62	* For pending locks, if lock->lf_next is non-NULL, then lock->lf_block
63	* must be queued on the lf_blkhd TAILQ of lock->lf_next.
64	*/
65
66	TAILQ_HEAD(locklist, lockf);
67
68	struct lockf {
69	kcondvar_t lf_cv; / Signalling /
70	short lf_flags; / Lock semantics: F_POSIX, F_FLOCK, F_WAIT /
71	short lf_type; / Lock type: F_RDLCK, F_WRLCK /
72	off_t lf_start; / The byte # of the start of the lock /
73	off_t lf_end; / The byte # of the end of the lock (-1=EOF)/
74	void lf_id; /* process or file description holding lock /
75	struct lockf *lf_head; /* Back pointer to the head of lockf list /
76	struct lockf lf_next; /* Next lock on this vnode, or blocking lock /
77	struct locklist lf_blkhd; / List of requests blocked on this lock /
78	TAILQ_ENTRY(lockf) lf_block;/ A request waiting for a lock /
79	uid_t lf_uid; / User ID responsible /
80	};
81
82	/ Maximum length of sleep chains to traverse to try and detect deadlock. /
83	#define MAXDEPTH 50
84
85	static pool_cache_t lockf_cache;
86	static kmutex_t *lockf_lock;
87	static char lockstr[] = "lockf";
88
89	/*
90	* This variable controls the maximum number of processes that will
91	* be checked in doing deadlock detection.
92	*/
93	int maxlockdepth = MAXDEPTH;
94
95	#ifdef LOCKF_DEBUG
96	int lockf_debug = `0`;
97	#endif
98
99	#define SELF 0x1
100	#define OTHERS 0x2
101
102	/*
103	* XXX TODO
104	* Misc cleanups: "void *id" should be visible in the API as a
105	* "struct proc *".
106	* (This requires rototilling all VFS's which support advisory locking).
107	*/
108
109	/*
110	* If there's a lot of lock contention on a single vnode, locking
111	* schemes which allow for more paralleism would be needed. Given how
112	* infrequently byte-range locks are actually used in typical BSD
113	* code, a more complex approach probably isn't worth it.
114	*/
115
116	/*
117	* We enforce a limit on locks by uid, so that a single user cannot
118	* run the kernel out of memory. For now, the limit is pretty coarse.
119	* There is no limit on root.
120	*
121	* Splitting a lock will always succeed, regardless of current allocations.
122	* If you're slightly above the limit, we still have to permit an allocation
123	* so that the unlock can succeed. If the unlocking causes too many splits,
124	* however, you're totally cutoff.
125	*/
126	int maxlocksperuid = `1024`;
127
128	#ifdef LOCKF_DEBUG
129	/*
130	* Print out a lock.
131	*/
132	static void
133	lf_print(const char tag, struct* lockf *lock)
134	{
135
136	printf("%s: lock %p for ", tag, lock);
137	if (lock->lf_flags & F_POSIX)
138	printf("proc %d", ((struct proc *)lock->lf_id)->p_pid);
139	else
140	printf("file %p", (struct file *)lock->lf_id);
141	printf(" %s, start %jd, end %jd",
142	lock->lf_type == F_RDLCK ? "shared" :
143	lock->lf_type == F_WRLCK ? "exclusive" :
144	lock->lf_type == F_UNLCK ? "unlock" :
145	"unknown", (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
146	if (TAILQ_FIRST(&lock->lf_blkhd))
147	printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd));
148	else
149	printf("\n");
150	}
151
152	static void
153	lf_printlist(const char tag, struct* lockf *lock)
154	{
155	struct lockf lf, blk;
156
157	printf("%s: Lock list:\n", tag);
158	for (lf = *lock->lf_head; lf; lf = lf->lf_next) {
159	printf("\tlock %p for ", lf);
160	if (lf->lf_flags & F_POSIX)
161	printf("proc %d", ((struct proc *)lf->lf_id)->p_pid);
162	else
163	printf("file %p", (struct file *)lf->lf_id);
164	printf(", %s, start %jd, end %jd",
165	lf->lf_type == F_RDLCK ? "shared" :
166	lf->lf_type == F_WRLCK ? "exclusive" :
167	lf->lf_type == F_UNLCK ? "unlock" :
168	"unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
169	TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
170	if (blk->lf_flags & F_POSIX)
171	printf("; proc %d",
172	((struct proc *)blk->lf_id)->p_pid);
173	else
174	printf("; file %p", (struct file *)blk->lf_id);
175	printf(", %s, start %jd, end %jd",
176	blk->lf_type == F_RDLCK ? "shared" :
177	blk->lf_type == F_WRLCK ? "exclusive" :
178	blk->lf_type == F_UNLCK ? "unlock" :
179	"unknown", (intmax_t)blk->lf_start, (intmax_t)blk->lf_end);
180	if (TAILQ_FIRST(&blk->lf_blkhd))
181	panic("lf_printlist: bad list");
182	}
183	printf("\n");
184	}
185	}
186	#endif /* LOCKF_DEBUG */
187
188	/*
189	* 3 options for allowfail.
190	* 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit.
191	*/
192	static struct lockf *
193	lf_alloc(int allowfail)
194	{
195	struct uidinfo *uip;
196	struct lockf *lock;
197	u_long lcnt;
198	const uid_t uid = kauth_cred_geteuid(kauth_cred_get());
199
200	uip = uid_find(uid);
201	lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt);
202	if (uid && allowfail && lcnt >
203	(allowfail == `1` ? maxlocksperuid : (maxlocksperuid * `2`))) {
204	atomic_dec_ulong(&uip->ui_lockcnt);
205	return NULL;
206	}
207
208	lock = pool_cache_get(lockf_cache, PR_WAITOK);
209	lock->lf_uid = uid;
210	return lock;
211	}
212
213	static void
214	lf_free(struct lockf *lock)
215	{
216	struct uidinfo *uip;
217
218	uip = uid_find(lock->lf_uid);
219	atomic_dec_ulong(&uip->ui_lockcnt);
220	pool_cache_put(lockf_cache, lock);
221	}
222
223	static int
224	lf_ctor(void arg, void* obj, int* flag)
225	{
226	struct lockf *lock;
227
228	lock = obj;
229	cv_init(&lock->lf_cv, lockstr);
230
231	return `0`;
232	}
233
234	static void
235	lf_dtor(void arg, void* *obj)
236	{
237	struct lockf *lock;
238
239	lock = obj;
240	cv_destroy(&lock->lf_cv);
241	}
242
243	/*
244	* Walk the list of locks for an inode to
245	* find an overlapping lock (if any).
246	*
247	* NOTE: this returns only the FIRST overlapping lock. There
248	* may be more than one.
249	*/
250	static int
251	lf_findoverlap(struct lockf lf, struct* lockf lock, int* type,
252	struct lockf *prev, struct lockf overlap)
253	{
254	off_t start, end;
255
256	*overlap = lf;
257	if (lf == NULL)
258	return `0`;
259	#ifdef LOCKF_DEBUG
260	if (lockf_debug & `2`)
261	lf_print("lf_findoverlap: looking for overlap in", lock);
262	#endif /* LOCKF_DEBUG */
263	start = lock->lf_start;
264	end = lock->lf_end;
265	while (lf != NULL) {
266	if (((type == SELF) && lf->lf_id != lock->lf_id) \|\|
267	((type == OTHERS) && lf->lf_id == lock->lf_id)) {
268	*prev = &lf->lf_next;
269	*overlap = lf = lf->lf_next;
270	continue;
271	}
272	#ifdef LOCKF_DEBUG
273	if (lockf_debug & `2`)
274	lf_print("\tchecking", lf);
275	#endif /* LOCKF_DEBUG */
276	/*
277	* OK, check for overlap
278	*
279	* Six cases:
280	* 0) no overlap
281	* 1) overlap == lock
282	* 2) overlap contains lock
283	* 3) lock contains overlap
284	* 4) overlap starts before lock
285	* 5) overlap ends after lock
286	*/
287	if ((lf->lf_end != -`1` && start > lf->lf_end) \|\|
288	(end != -`1` && lf->lf_start > end)) {
289	/ Case 0 /
290	#ifdef LOCKF_DEBUG
291	if (lockf_debug & `2`)
292	printf("no overlap\n");
293	#endif /* LOCKF_DEBUG */
294	if ((type & SELF) && end != -`1` && lf->lf_start > end)
295	return `0`;
296	*prev = &lf->lf_next;
297	*overlap = lf = lf->lf_next;
298	continue;
299	}
300	if ((lf->lf_start == start) && (lf->lf_end == end)) {
301	/ Case 1 /
302	#ifdef LOCKF_DEBUG
303	if (lockf_debug & `2`)
304	printf("overlap == lock\n");
305	#endif /* LOCKF_DEBUG */
306	return `1`;
307	}
308	if ((lf->lf_start <= start) &&
309	(end != -`1`) &&
310	((lf->lf_end >= end) \|\| (lf->lf_end == -`1`))) {
311	/ Case 2 /
312	#ifdef LOCKF_DEBUG
313	if (lockf_debug & `2`)
314	printf("overlap contains lock\n");
315	#endif /* LOCKF_DEBUG */
316	return `2`;
317	}
318	if (start <= lf->lf_start &&
319	(end == -`1` \|\|
320	(lf->lf_end != -`1` && end >= lf->lf_end))) {
321	/ Case 3 /
322	#ifdef LOCKF_DEBUG
323	if (lockf_debug & `2`)
324	printf("lock contains overlap\n");
325	#endif /* LOCKF_DEBUG */
326	return `3`;
327	}
328	if ((lf->lf_start < start) &&
329	((lf->lf_end >= start) \|\| (lf->lf_end == -`1`))) {
330	/ Case 4 /
331	#ifdef LOCKF_DEBUG
332	if (lockf_debug & `2`)
333	printf("overlap starts before lock\n");
334	#endif /* LOCKF_DEBUG */
335	return `4`;
336	}
337	if ((lf->lf_start > start) &&
338	(end != -`1`) &&
339	((lf->lf_end > end) \|\| (lf->lf_end == -`1`))) {
340	/ Case 5 /
341	#ifdef LOCKF_DEBUG
342	if (lockf_debug & `2`)
343	printf("overlap ends after lock\n");
344	#endif /* LOCKF_DEBUG */
345	return `5`;
346	}
347	panic("lf_findoverlap: default");
348	}
349	return `0`;
350	}
351
352	/*
353	* Split a lock and a contained region into
354	* two or three locks as necessary.
355	*/
356	static void
357	lf_split(struct lockf lock1, struct* lockf lock2, struct* lockf **sparelock)
358	{
359	struct lockf *splitlock;
360
361	#ifdef LOCKF_DEBUG
362	if (lockf_debug & `2`) {
363	lf_print("lf_split", lock1);
364	lf_print("splitting from", lock2);
365	}
366	#endif /* LOCKF_DEBUG */
367	/*
368	* Check to see if spliting into only two pieces.
369	*/
370	if (lock1->lf_start == lock2->lf_start) {
371	lock1->lf_start = lock2->lf_end + `1`;
372	lock2->lf_next = lock1;
373	return;
374	}
375	if (lock1->lf_end == lock2->lf_end) {
376	lock1->lf_end = lock2->lf_start - `1`;
377	lock2->lf_next = lock1->lf_next;
378	lock1->lf_next = lock2;
379	return;
380	}
381	/*
382	* Make a new lock consisting of the last part of
383	* the encompassing lock
384	*/
385	splitlock = *sparelock;
386	*sparelock = NULL;
387	cv_destroy(&splitlock->lf_cv);
388	memcpy(splitlock, lock1, sizeof(*splitlock));
389	cv_init(&splitlock->lf_cv, lockstr);
390
391	splitlock->lf_start = lock2->lf_end + `1`;
392	TAILQ_INIT(&splitlock->lf_blkhd);
393	lock1->lf_end = lock2->lf_start - `1`;
394	/*
395	* OK, now link it in
396	*/
397	splitlock->lf_next = lock1->lf_next;
398	lock2->lf_next = splitlock;
399	lock1->lf_next = lock2;
400	}
401
402	/*
403	* Wakeup a blocklist
404	*/
405	static void
406	lf_wakelock(struct lockf *listhead)
407	{
408	struct lockf *wakelock;
409
410	while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) {
411	KASSERT(wakelock->lf_next == listhead);
412	TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
413	wakelock->lf_next = NULL;
414	#ifdef LOCKF_DEBUG
415	if (lockf_debug & `2`)
416	lf_print("lf_wakelock: awakening", wakelock);
417	#endif
418	cv_broadcast(&wakelock->lf_cv);
419	}
420	}
421
422	/*
423	* Remove a byte-range lock on an inode.
424	*
425	* Generally, find the lock (or an overlap to that lock)
426	* and remove it (or shrink it), then wakeup anyone we can.
427	*/
428	static int
429	lf_clearlock(struct lockf unlock, struct* lockf **sparelock)
430	{
431	struct lockf **head = unlock->lf_head;
432	struct lockf lf = head;
433	struct lockf overlap, *prev;
434	int ovcase;
435
436	if (lf == NULL)
437	return `0`;
438	#ifdef LOCKF_DEBUG
439	if (unlock->lf_type != F_UNLCK)
440	panic("lf_clearlock: bad type");
441	if (lockf_debug & `1`)
442	lf_print("lf_clearlock", unlock);
443	#endif /* LOCKF_DEBUG */
444	prev = head;
445	while ((ovcase = lf_findoverlap(lf, unlock, SELF,
446	&prev, &overlap)) != `0`) {
447	/*
448	* Wakeup the list of locks to be retried.
449	*/
450	lf_wakelock(overlap);
451
452	switch (ovcase) {
453
454	case `1`: / overlap == lock /
455	*prev = overlap->lf_next;
456	lf_free(overlap);
457	break;
458
459	case `2`: / overlap contains lock: split it /
460	if (overlap->lf_start == unlock->lf_start) {
461	overlap->lf_start = unlock->lf_end + `1`;
462	break;
463	}
464	lf_split(overlap, unlock, sparelock);
465	overlap->lf_next = unlock->lf_next;
466	break;
467
468	case `3`: / lock contains overlap /
469	*prev = overlap->lf_next;
470	lf = overlap->lf_next;
471	lf_free(overlap);
472	continue;
473
474	case `4`: / overlap starts before lock /
475	overlap->lf_end = unlock->lf_start - `1`;
476	prev = &overlap->lf_next;
477	lf = overlap->lf_next;
478	continue;
479
480	case `5`: / overlap ends after lock /
481	overlap->lf_start = unlock->lf_end + `1`;
482	break;
483	}
484	break;
485	}
486	#ifdef LOCKF_DEBUG
487	if (lockf_debug & `1`)
488	lf_printlist("lf_clearlock", unlock);
489	#endif /* LOCKF_DEBUG */
490	return `0`;
491	}
492
493	/*
494	* Walk the list of locks for an inode and
495	* return the first blocking lock.
496	*/
497	static struct lockf *
498	lf_getblock(struct lockf *lock)
499	{
500	struct lockf *prev, overlap, lf = (lock->lf_head);
501
502	prev = lock->lf_head;
503	while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != `0`) {
504	/*
505	* We've found an overlap, see if it blocks us
506	*/
507	if ((lock->lf_type == F_WRLCK \|\| overlap->lf_type == F_WRLCK))
508	return overlap;
509	/*
510	* Nope, point to the next one on the list and
511	* see if it blocks us
512	*/
513	lf = overlap->lf_next;
514	}
515	return NULL;
516	}
517
518	/*
519	* Set a byte-range lock.
520	*/
521	static int
522	lf_setlock(struct lockf lock, struct* lockf **sparelock,
523	kmutex_t *interlock)
524	{
525	struct lockf *block;
526	struct lockf **head = lock->lf_head;
527	struct lockf *prev, overlap, *ltmp;
528	int ovcase, needtolink, error;
529
530	#ifdef LOCKF_DEBUG
531	if (lockf_debug & `1`)
532	lf_print("lf_setlock", lock);
533	#endif /* LOCKF_DEBUG */
534
535	/*
536	* Scan lock list for this file looking for locks that would block us.
537	*/
538	while ((block = lf_getblock(lock)) != NULL) {
539	/*
540	* Free the structure and return if nonblocking.
541	*/
542	if ((lock->lf_flags & F_WAIT) == `0`) {
543	lf_free(lock);
544	return EAGAIN;
545	}
546	/*
547	* We are blocked. Since flock style locks cover
548	* the whole file, there is no chance for deadlock.
549	* For byte-range locks we must check for deadlock.
550	*
551	* Deadlock detection is done by looking through the
552	* wait channels to see if there are any cycles that
553	* involve us. MAXDEPTH is set just to make sure we
554	* do not go off into neverneverland.
555	*/
556	if ((lock->lf_flags & F_POSIX) &&
557	(block->lf_flags & F_POSIX)) {
558	struct lwp *wlwp;
559	volatile const struct lockf *waitblock;
560	int i = `0`;
561	struct proc *p;
562
563	p = (struct proc *)block->lf_id;
564	KASSERT(p != NULL);
565	while (i++ < maxlockdepth) {
566	mutex_enter(p->p_lock);
567	if (p->p_nlwps > `1`) {
568	mutex_exit(p->p_lock);
569	break;
570	}
571	wlwp = LIST_FIRST(&p->p_lwps);
572	lwp_lock(wlwp);
573	if (wlwp->l_wchan == NULL \|\|
574	wlwp->l_wmesg != lockstr) {
575	lwp_unlock(wlwp);
576	mutex_exit(p->p_lock);
577	break;
578	}
579	waitblock = wlwp->l_wchan;
580	lwp_unlock(wlwp);
581	mutex_exit(p->p_lock);
582	/ Get the owner of the blocking lock /
583	waitblock = waitblock->lf_next;
584	if ((waitblock->lf_flags & F_POSIX) == `0`)
585	break;
586	p = (struct proc *)waitblock->lf_id;
587	if (p == curproc) {
588	lf_free(lock);
589	return EDEADLK;
590	}
591	}
592	/*
593	* If we're still following a dependency chain
594	* after maxlockdepth iterations, assume we're in
595	* a cycle to be safe.
596	*/
597	if (i >= maxlockdepth) {
598	lf_free(lock);
599	return EDEADLK;
600	}
601	}
602	/*
603	* For flock type locks, we must first remove
604	* any shared locks that we hold before we sleep
605	* waiting for an exclusive lock.
606	*/
607	if ((lock->lf_flags & F_FLOCK) &&
608	lock->lf_type == F_WRLCK) {
609	lock->lf_type = F_UNLCK;
610	(void) lf_clearlock(lock, NULL);
611	lock->lf_type = F_WRLCK;
612	}
613	/*
614	* Add our lock to the blocked list and sleep until we're free.
615	* Remember who blocked us (for deadlock detection).
616	*/
617	lock->lf_next = block;
618	TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
619	#ifdef LOCKF_DEBUG
620	if (lockf_debug & `1`) {
621	lf_print("lf_setlock: blocking on", block);
622	lf_printlist("lf_setlock", block);
623	}
624	#endif /* LOCKF_DEBUG */
625	error = cv_wait_sig(&lock->lf_cv, interlock);
626
627	/*
628	* We may have been awoken by a signal (in
629	* which case we must remove ourselves from the
630	* blocked list) and/or by another process
631	* releasing a lock (in which case we have already
632	* been removed from the blocked list and our
633	* lf_next field set to NULL).
634	*/
635	if (lock->lf_next != NULL) {
636	TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
637	lock->lf_next = NULL;
638	}
639	if (error) {
640	lf_free(lock);
641	return error;
642	}
643	}
644	/*
645	* No blocks!! Add the lock. Note that we will
646	* downgrade or upgrade any overlapping locks this
647	* process already owns.
648	*
649	* Skip over locks owned by other processes.
650	* Handle any locks that overlap and are owned by ourselves.
651	*/
652	prev = head;
653	block = *head;
654	needtolink = `1`;
655	for (;;) {
656	ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
657	if (ovcase)
658	block = overlap->lf_next;
659	/*
660	* Six cases:
661	* 0) no overlap
662	* 1) overlap == lock
663	* 2) overlap contains lock
664	* 3) lock contains overlap
665	* 4) overlap starts before lock
666	* 5) overlap ends after lock
667	*/
668	switch (ovcase) {
669	case `0`: / no overlap /
670	if (needtolink) {
671	*prev = lock;
672	lock->lf_next = overlap;
673	}
674	break;
675
676	case `1`: / overlap == lock /
677	/*
678	* If downgrading lock, others may be
679	* able to acquire it.
680	*/
681	if (lock->lf_type == F_RDLCK &&
682	overlap->lf_type == F_WRLCK)
683	lf_wakelock(overlap);
684	overlap->lf_type = lock->lf_type;
685	lf_free(lock);
686	lock = overlap; / for debug output below /
687	break;
688
689	case `2`: / overlap contains lock /
690	/*
691	* Check for common starting point and different types.
692	*/
693	if (overlap->lf_type == lock->lf_type) {
694	lf_free(lock);
695	lock = overlap; / for debug output below /
696	break;
697	}
698	if (overlap->lf_start == lock->lf_start) {
699	*prev = lock;
700	lock->lf_next = overlap;
701	overlap->lf_start = lock->lf_end + `1`;
702	} else
703	lf_split(overlap, lock, sparelock);
704	lf_wakelock(overlap);
705	break;
706
707	case `3`: / lock contains overlap /
708	/*
709	* If downgrading lock, others may be able to
710	* acquire it, otherwise take the list.
711	*/
712	if (lock->lf_type == F_RDLCK &&
713	overlap->lf_type == F_WRLCK) {
714	lf_wakelock(overlap);
715	} else {
716	while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) {
717	KASSERT(ltmp->lf_next == overlap);
718	TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
719	lf_block);
720	ltmp->lf_next = lock;
721	TAILQ_INSERT_TAIL(&lock->lf_blkhd,
722	ltmp, lf_block);
723	}
724	}
725	/*
726	* Add the new lock if necessary and delete the overlap.
727	*/
728	if (needtolink) {
729	*prev = lock;
730	lock->lf_next = overlap->lf_next;
731	prev = &lock->lf_next;
732	needtolink = `0`;
733	} else
734	*prev = overlap->lf_next;
735	lf_free(overlap);
736	continue;
737
738	case `4`: / overlap starts before lock /
739	/*
740	* Add lock after overlap on the list.
741	*/
742	lock->lf_next = overlap->lf_next;
743	overlap->lf_next = lock;
744	overlap->lf_end = lock->lf_start - `1`;
745	prev = &lock->lf_next;
746	lf_wakelock(overlap);
747	needtolink = `0`;
748	continue;
749
750	case `5`: / overlap ends after lock /
751	/*
752	* Add the new lock before overlap.
753	*/
754	if (needtolink) {
755	*prev = lock;
756	lock->lf_next = overlap;
757	}
758	overlap->lf_start = lock->lf_end + `1`;
759	lf_wakelock(overlap);
760	break;
761	}
762	break;
763	}
764	#ifdef LOCKF_DEBUG
765	if (lockf_debug & `1`) {
766	lf_print("lf_setlock: got the lock", lock);
767	lf_printlist("lf_setlock", lock);
768	}
769	#endif /* LOCKF_DEBUG */
770	return `0`;
771	}
772
773	/*
774	* Check whether there is a blocking lock,
775	* and if so return its process identifier.
776	*/
777	static int
778	lf_getlock(struct lockf lock, struct* flock *fl)
779	{
780	struct lockf *block;
781
782	#ifdef LOCKF_DEBUG
783	if (lockf_debug & `1`)
784	lf_print("lf_getlock", lock);
785	#endif /* LOCKF_DEBUG */
786
787	if ((block = lf_getblock(lock)) != NULL) {
788	fl->l_type = block->lf_type;
789	fl->l_whence = SEEK_SET;
790	fl->l_start = block->lf_start;
791	if (block->lf_end == -`1`)
792	fl->l_len = `0`;
793	else
794	fl->l_len = block->lf_end - block->lf_start + `1`;
795	if (block->lf_flags & F_POSIX)
796	fl->l_pid = ((struct proc *)block->lf_id)->p_pid;
797	else
798	fl->l_pid = -`1`;
799	} else {
800	fl->l_type = F_UNLCK;
801	}
802	return `0`;
803	}
804
805	/*
806	* Do an advisory lock operation.
807	*/
808	int
809	lf_advlock(struct vop_advlock_args ap, struct* lockf **head, off_t size)
810	{
811	struct flock *fl = ap->a_fl;
812	struct lockf *lock = NULL;
813	struct lockf *sparelock;
814	kmutex_t *interlock = lockf_lock;
815	off_t start, end;
816	int error = `0`;
817
818	/*
819	* Convert the flock structure into a start and end.
820	*/
821	switch (fl->l_whence) {
822	case SEEK_SET:
823	case SEEK_CUR:
824	/*
825	* Caller is responsible for adding any necessary offset
826	* when SEEK_CUR is used.
827	*/
828	start = fl->l_start;
829	break;
830
831	case SEEK_END:
832	start = size + fl->l_start;
833	break;
834
835	default:
836	return EINVAL;
837	}
838
839	if (fl->l_len == `0`)
840	end = -`1`;
841	else {
842	if (fl->l_len > `0`)
843	end = start + fl->l_len - `1`;
844	else {
845	/ lockf() allows -ve lengths /
846	end = start - `1`;
847	start += fl->l_len;
848	}
849	}
850	if (start < `0`)
851	return EINVAL;
852
853	/*
854	* Allocate locks before acquiring the interlock. We need two
855	* locks in the worst case.
856	*/
857	switch (ap->a_op) {
858	case F_SETLK:
859	case F_UNLCK:
860	/*
861	* XXX For F_UNLCK case, we can re-use the lock.
862	*/
863	if ((ap->a_flags & F_FLOCK) == `0`) {
864	/*
865	* Byte-range lock might need one more lock.
866	*/
867	sparelock = lf_alloc(`0`);
868	if (sparelock == NULL) {
869	error = ENOMEM;
870	goto quit;
871	}
872	break;
873	}
874	/ FALLTHROUGH /
875
876	case F_GETLK:
877	sparelock = NULL;
878	break;
879
880	default:
881	return EINVAL;
882	}
883
884	switch (ap->a_op) {
885	case F_SETLK:
886	lock = lf_alloc(`1`);
887	break;
888	case F_UNLCK:
889	if (start == `0` \|\| end == -`1`) {
890	/ never split /
891	lock = lf_alloc(`0`);
892	} else {
893	/ might split /
894	lock = lf_alloc(`2`);
895	}
896	break;
897	case F_GETLK:
898	lock = lf_alloc(`0`);
899	break;
900	}
901	if (lock == NULL) {
902	error = ENOMEM;
903	goto quit;
904	}
905
906	mutex_enter(interlock);
907
908	/*
909	* Avoid the common case of unlocking when inode has no locks.
910	*/
911	if (head == (struct* lockf *)`0`) {
912	if (ap->a_op != F_SETLK) {
913	fl->l_type = F_UNLCK;
914	error = `0`;
915	goto quit_unlock;
916	}
917	}
918
919	/*
920	* Create the lockf structure.
921	*/
922	lock->lf_start = start;
923	lock->lf_end = end;
924	lock->lf_head = head;
925	lock->lf_type = fl->l_type;
926	lock->lf_next = (struct lockf *)`0`;
927	TAILQ_INIT(&lock->lf_blkhd);
928	lock->lf_flags = ap->a_flags;
929	if (lock->lf_flags & F_POSIX) {
930	KASSERT(curproc == (struct proc *)ap->a_id);
931	}
932	lock->lf_id = ap->a_id;
933
934	/*
935	* Do the requested operation.
936	*/
937	switch (ap->a_op) {
938
939	case F_SETLK:
940	error = lf_setlock(lock, &sparelock, interlock);
941	lock = NULL; / lf_setlock freed it /
942	break;
943
944	case F_UNLCK:
945	error = lf_clearlock(lock, &sparelock);
946	break;
947
948	case F_GETLK:
949	error = lf_getlock(lock, fl);
950	break;
951
952	default:
953	break;
954	/ NOTREACHED /
955	}
956
957	quit_unlock:
958	mutex_exit(interlock);
959	quit:
960	if (lock)
961	lf_free(lock);
962	if (sparelock)
963	lf_free(sparelock);
964
965	return error;
966	}
967
968	/*
969	* Initialize subsystem. XXX We use a global lock. This could be the
970	* vnode interlock, but the deadlock detection code may need to inspect
971	* locks belonging to other files.
972	*/
973	void
974	lf_init(void)
975	{
976
977	lockf_cache = pool_cache_init(sizeof(struct lockf), `0`, `0`, `0`, "lockf",
978	NULL, IPL_NONE, lf_ctor, lf_dtor, NULL);
979	lockf_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
980	}
981

Browse the source code of src/src/sys/kern/vfs_lockf.c