/*	$NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $	*/

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
 */

/*
 * uvm_loan.c: page loanout handler
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/mman.h>

#include <uvm/uvm.h>

#ifdef UVMHIST
UVMHIST_DEFINE(loanhist);
#endif

/*
 * "loaned" pages are pages which are (read-only, copy-on-write) loaned
 * from the VM system to other parts of the kernel.   this allows page
 * copying to be avoided (e.g. you can loan pages from objs/anons to
 * the mbuf system).
 *
 * there are 3 types of loans possible:
 *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
 *  A->K  anon page to wired kernel page (e.g. mbuf data area)
 *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
 * note that it possible to have an O page loaned to both an A and K
 * at the same time.
 *
 * loans are tracked by pg->loan_count.  an O->A page will have both
 * a uvm_object and a vm_anon, but PG_ANON will not be set.   this sort
 * of page is considered "owned" by the uvm_object (not the anon).
 *
 * each loan of a page to the kernel bumps the pg->wire_count.  the
 * kernel mappings for these pages will be read-only and wired.  since
 * the page will also be wired, it will not be a candidate for pageout,
 * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
 * write fault in the kernel to one of these pages will not cause
 * copy-on-write.  instead, the page fault is considered fatal.  this
 * is because the kernel mapping will have no way to look up the
 * object/anon which the page is owned by.  this is a good side-effect,
 * since a kernel write to a loaned page is an error.
 *
 * owners that want to free their pages and discover that they are
 * loaned out simply "disown" them (the page becomes an orphan).  these
 * pages should be freed when the last loan is dropped.   in some cases
 * an anon may "adopt" an orphaned page.
 *
 * locking: to read pg->loan_count either the owner or pg->interlock
 * must be locked.   to modify pg->loan_count, both the owner of the page
 * and pg->interlock must be locked.   pg->flags is (as always) locked by
 * the owner of the page.
 *
 * note that locking from the "loaned" side is tricky since the object
 * getting the loaned page has no reference to the page's owner and thus
 * the owner could "die" at any time.   in order to prevent the owner
 * from dying pg->interlock should be locked.   this forces us to sometimes
 * use "try" locking.
 *
 * loans are typically broken by the following events:
 *  1. user-level xwrite fault to a loaned page
 *  2. pageout of clean+inactive O->A loaned page
 *  3. owner frees page (e.g. pager flush)
 *
 * note that loaning a page causes all mappings of the page to become
 * read-only (via pmap_page_protect).   this could have an unexpected
 * effect on normal "wired" pages if one is not careful (XXX).
 */

/*
 * local prototypes
 */

static int	uvm_loananon(struct uvm_faultinfo *, void ***,
			     int, struct vm_anon *);
static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
			     int, vaddr_t);
static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
static void	uvm_unloananon(struct vm_anon **, int);
static void	uvm_unloanpage(struct vm_page **, int);
static int	uvm_loanpage(struct vm_page **, int, bool);


/*
 * inlines
 */

/*
 * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
 *
 * => "ufi" is the result of a successful map lookup (meaning that
 *	on entry the map is locked by the caller)
 * => we may unlock and then relock the map if needed (for I/O)
 * => we put our output result in "output"
 * => we always return with the map unlocked
 * => possible return values:
 *	-1 == error, map is unlocked
 *	 0 == map relock error (try again!), map is unlocked
 *	>0 == number of pages we loaned, map is unlocked
 *
 * NOTE: We can live with this being an inline, because it is only called
 * from one place.
 */

static inline int
uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
{
	vaddr_t curaddr = ufi->orig_rvaddr;
	vsize_t togo = ufi->size;
	struct vm_aref *aref = &ufi->entry->aref;
	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
	struct vm_anon *anon;
	int rv, result = 0;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

	/*
	 * lock us the rest of the way down (we unlock before return)
	 */
	if (aref->ar_amap) {
		amap_lock(aref->ar_amap, RW_WRITER);
	}

	/*
	 * loop until done
	 */
	while (togo) {

		/*
		 * find the page we want.   check the anon layer first.
		 */

		if (aref->ar_amap) {
			anon = amap_lookup(aref, curaddr - ufi->entry->start);
		} else {
			anon = NULL;
		}

		/* locked: map, amap, uobj */
		if (anon) {
			rv = uvm_loananon(ufi, output, flags, anon);
		} else if (uobj) {
			rv = uvm_loanuobj(ufi, output, flags, curaddr);
		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
			rv = uvm_loanzero(ufi, output, flags);
		} else {
			uvmfault_unlockall(ufi, aref->ar_amap, uobj);
			rv = -1;
		}
		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
		KASSERT(rv > 0 || aref->ar_amap == NULL ||
		    !rw_write_held(aref->ar_amap->am_lock));
		KASSERT(rv > 0 || uobj == NULL ||
		    !rw_write_held(uobj->vmobjlock));

		/* total failure */
		if (rv < 0) {
			UVMHIST_LOG(loanhist, "failure %jd", rv, 0,0,0);
			return (-1);
		}

		/* relock failed, need to do another lookup */
		if (rv == 0) {
			UVMHIST_LOG(loanhist, "relock failure %jd", result
			    ,0,0,0);
			return (result);
		}

		/*
		 * got it... advance to next page
		 */

		result++;
		togo -= PAGE_SIZE;
		curaddr += PAGE_SIZE;
	}

	/*
	 * unlock what we locked, unlock the maps and return
	 */

	if (aref->ar_amap) {
		amap_unlock(aref->ar_amap);
	}
	uvmfault_unlockmaps(ufi, false);
	UVMHIST_LOG(loanhist, "done %jd", result, 0,0,0);
	return (result);
}

/*
 * normal functions
 */

/*
 * uvm_loan: loan pages in a map out to anons or to the kernel
 *
 * => map should be unlocked
 * => start and len should be multiples of PAGE_SIZE
 * => result is either an array of anon's or vm_pages (depending on flags)
 * => flag values: UVM_LOAN_TOANON - loan to anons
 *                 UVM_LOAN_TOPAGE - loan to wired kernel page
 *    one and only one of these flags must be set!
 * => returns 0 (success), or an appropriate error number
 */

int
uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
{
	struct uvm_faultinfo ufi;
	void **result, **output;
	int rv, error;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

	/*
	 * ensure that one and only one of the flags is set
	 */

	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
		((flags & UVM_LOAN_TOPAGE) == 0));

	/*
	 * "output" is a pointer to the current place to put the loaned page.
	 */

	result = v;
	output = &result[0];	/* start at the beginning ... */

	/*
	 * while we've got pages to do
	 */

	while (len > 0) {

		/*
		 * fill in params for a call to uvmfault_lookup
		 */

		ufi.orig_map = map;
		ufi.orig_rvaddr = start;
		ufi.orig_size = len;

		/*
		 * do the lookup, the only time this will fail is if we hit on
		 * an unmapped region (an error)
		 */

		if (!uvmfault_lookup(&ufi, false)) {
			error = ENOENT;
			goto fail;
		}

		/*
		 * map now locked.  now do the loanout...
		 */

		rv = uvm_loanentry(&ufi, &output, flags);
		if (rv < 0) {
			/* all unlocked due to error */
			error = EINVAL;
			goto fail;
		}

		/*
		 * done!  the map is unlocked.  advance, if possible.
		 *
		 * XXXCDC: could be recoded to hold the map lock with
		 *	   smarter code (but it only happens on map entry
		 *	   boundaries, so it isn't that bad).
		 */

		if (rv) {
			rv <<= PAGE_SHIFT;
			len -= rv;
			start += rv;
		}
	}
	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
	return 0;

fail:
	/*
	 * failed to complete loans.  drop any loans and return failure code.
	 * map is already unlocked.
	 */

	if (output - result) {
		if (flags & UVM_LOAN_TOANON) {
			uvm_unloananon((struct vm_anon **)result,
			    output - result);
		} else {
			uvm_unloanpage((struct vm_page **)result,
			    output - result);
		}
	}
	UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
	return (error);
}

/*
 * uvm_loananon: loan a page from an anon out
 *
 * => called with map, amap, uobj locked
 * => return value:
 *	-1 = fatal error, everything is unlocked, abort.
 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
 *		try again
 *	 1 = got it, everything still locked
 */

int
uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
    struct vm_anon *anon)
{
	struct vm_page *pg;
	int error;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

	/*
	 * if we are loaning to "another" anon then it is easy, we just
	 * bump the reference count on the current anon and return a
	 * pointer to it (it becomes copy-on-write shared).
	 */

	if (flags & UVM_LOAN_TOANON) {
		KASSERT(rw_write_held(anon->an_lock));
		pg = anon->an_page;
		if (pg && (pg->flags & PG_ANON) != 0 && anon->an_ref == 1) {
			if (pg->wire_count > 0) {
				UVMHIST_LOG(loanhist, "->A wired %#jx",
				    (uintptr_t)pg, 0, 0, 0);
				uvmfault_unlockall(ufi,
				    ufi->entry->aref.ar_amap,
				    ufi->entry->object.uvm_obj);
				return (-1);
			}
			pmap_page_protect(pg, VM_PROT_READ);
		}
		anon->an_ref++;
		**output = anon;
		(*output)++;
		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
		return (1);
	}

	/*
	 * we are loaning to a kernel-page.   we need to get the page
	 * resident so we can wire it.   uvmfault_anonget will handle
	 * this for us.
	 */

	KASSERT(rw_write_held(anon->an_lock));
	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);

	/*
	 * if we were unable to get the anon, then uvmfault_anonget has
	 * unlocked everything and returned an error code.
	 */

	if (error) {
		UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
		KASSERT(error != ENOLCK);

		/* need to refault (i.e. refresh our lookup) ? */
		if (error == ERESTART) {
			return (0);
		}

		/* "try again"?   sleep a bit and retry ... */
		if (error == EAGAIN) {
			kpause("loanagain", false, hz/2, NULL);
			return (0);
		}

		/* otherwise flag it as an error */
		return (-1);
	}

	/*
	 * we have the page and its owner locked: do the loan now.
	 */

	pg = anon->an_page;
	if (pg->wire_count > 0) {
		UVMHIST_LOG(loanhist, "->K wired %#jx", (uintptr_t)pg, 0, 0, 0);
		KASSERT(pg->uobject == NULL);
		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
		return (-1);
	}
	if (pg->loan_count == 0) {
		pmap_page_protect(pg, VM_PROT_READ);
	}
	uvm_pagelock(pg);
	pg->loan_count++;
	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
	uvm_pageactivate(pg);
	uvm_pageunlock(pg);
	**output = pg;
	(*output)++;

	/* unlock and return success */
	if (pg->uobject)
		rw_exit(pg->uobject->vmobjlock);
	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
	return (1);
}

/*
 * uvm_loanpage: loan out pages to kernel (->K)
 *
 * => pages should be object-owned and the object should be locked.
 * => in the case of error, the object might be unlocked and relocked.
 * => pages will be unbusied (if busied is true).
 * => fail with EBUSY if meet a wired page.
 */
static int
uvm_loanpage(struct vm_page **pgpp, int npages, bool busied)
{
	int i;
	int error = 0;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

	for (i = 0; i < npages; i++) {
		struct vm_page *pg = pgpp[i];

		KASSERT(pg->uobject != NULL);
		KASSERT(pg->uobject == pgpp[0]->uobject);
		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
		KASSERT(rw_write_held(pg->uobject->vmobjlock));
		KASSERT(busied == ((pg->flags & PG_BUSY) != 0));

		if (pg->wire_count > 0) {
			UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg,
			    0, 0, 0);
			error = EBUSY;
			break;
		}
		if (pg->loan_count == 0) {
			pmap_page_protect(pg, VM_PROT_READ);
		}
		uvm_pagelock(pg);
		pg->loan_count++;
		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
		uvm_pageactivate(pg);
		uvm_pageunlock(pg);
	}

	if (busied) {
		uvm_page_unbusy(pgpp, npages);
	}

	if (error) {
		/*
		 * backout what we've done
		 */
		krwlock_t *slock = pgpp[0]->uobject->vmobjlock;

		rw_exit(slock);
		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
		rw_enter(slock, RW_WRITER);
	}

	UVMHIST_LOG(loanhist, "done %jd", error, 0, 0, 0);
	return error;
}

/*
 * XXX UBC temp limit
 * number of pages to get at once.
 * should be <= MAX_READ_AHEAD in genfs_vnops.c
 */
#define	UVM_LOAN_GET_CHUNK	16

/*
 * uvm_loanuobjchunk: helper for uvm_loanuobjpages()
 */
static int
uvm_loanuobjchunk(struct uvm_object *uobj, voff_t pgoff, int orignpages,
    struct vm_page **pgpp)
{
	int error, npages;

	rw_enter(uobj->vmobjlock, RW_WRITER);
 reget:
 	npages = orignpages;
	error = (*uobj->pgops->pgo_get)(uobj, pgoff, pgpp, &npages, 0,
	    VM_PROT_READ, 0, PGO_SYNCIO);
	switch (error) {
	case 0:
		KASSERT(npages == orignpages);

		/* check for released pages */
		rw_enter(uobj->vmobjlock, RW_WRITER);
		for (int i = 0; i < npages; i++) {
			KASSERT(pgpp[i]->uobject->vmobjlock == uobj->vmobjlock);
			if ((pgpp[i]->flags & PG_RELEASED) != 0) {
				/*
				 * release pages and try again.
				 */
				uvm_page_unbusy(pgpp, npages);
				goto reget;
			}
		}

		/* loan out pages.  they will be unbusied whatever happens. */
		error = uvm_loanpage(pgpp, npages, true);
		rw_exit(uobj->vmobjlock);
		if (error != 0) {
			memset(pgpp, 0, sizeof(pgpp[0]) * npages);
		}
		return error;

	case EAGAIN:
		kpause("loanuopg", false, hz/2, NULL);
		rw_enter(uobj->vmobjlock, RW_WRITER);
		goto reget;

	default:
		return error;
	}
}

/*
 * uvm_loanuobjpages: loan pages from a uobj out (O->K)
 *
 * => uobj shouldn't be locked.  (we'll lock it)
 * => fail with EBUSY if we meet a wired page.
 */
int
uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int npages,
    struct vm_page **pgpp)
{
	int ndone, error, chunk;

	KASSERT(npages > 0);

	memset(pgpp, 0, sizeof(pgpp[0]) * npages);
	for (ndone = 0; ndone < npages; ndone += chunk) {
		chunk = MIN(UVM_LOAN_GET_CHUNK, npages - ndone);
		error = uvm_loanuobjchunk(uobj, pgoff + (ndone << PAGE_SHIFT),
		    chunk, pgpp + ndone);
		if (error != 0) {
			if (ndone != 0) {
				uvm_unloan(pgpp, ndone, UVM_LOAN_TOPAGE);
			}
			break;
		}
	}

	return error;
}

/*
 * uvm_loanuobj: loan a page from a uobj out
 *
 * => called with map, amap, uobj locked
 * => return value:
 *	-1 = fatal error, everything is unlocked, abort.
 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
 *		try again
 *	 1 = got it, everything still locked
 */

static int
uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
{
	struct vm_amap *amap = ufi->entry->aref.ar_amap;
	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
	struct vm_page *pg;
	int error, npages;
	bool locked;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

	/*
	 * first we must make sure the page is resident.
	 *
	 * XXXCDC: duplicate code with uvm_fault().
	 */

	/* locked: maps(read), amap(if there) */
	rw_enter(uobj->vmobjlock, RW_WRITER);
	/* locked: maps(read), amap(if there), uobj */

	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
		npages = 1;
		pg = NULL;
		error = (*uobj->pgops->pgo_get)(uobj,
		    va - ufi->entry->start + ufi->entry->offset,
		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
	} else {
		error = EIO;		/* must have pgo_get op */
	}

	/*
	 * check the result of the locked pgo_get.  if there is a problem,
	 * then we fail the loan.
	 */

	if (error && error != EBUSY) {
		uvmfault_unlockall(ufi, amap, uobj);
		return (-1);
	}

	/*
	 * if we need to unlock for I/O, do so now.
	 */

	if (error == EBUSY) {
		uvmfault_unlockall(ufi, amap, NULL);

		/* locked: uobj */
		npages = 1;
		error = (*uobj->pgops->pgo_get)(uobj,
		    va - ufi->entry->start + ufi->entry->offset,
		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
		/* locked: <nothing> */

		if (error) {
			if (error == EAGAIN) {
				kpause("fltagain2", false, hz/2, NULL);
				return (0);
			}
			return (-1);
		}

		/*
		 * pgo_get was a success.   attempt to relock everything.
		 */

		locked = uvmfault_relock(ufi);
		if (locked && amap)
			amap_lock(amap, RW_WRITER);
		uobj = pg->uobject;
		rw_enter(uobj->vmobjlock, RW_WRITER);

		/*
		 * verify that the page has not be released and re-verify
		 * that amap slot is still free.   if there is a problem we
		 * drop our lock (thus force a lookup refresh/retry).
		 */

		if ((pg->flags & PG_RELEASED) != 0 ||
		    (locked && amap && amap_lookup(&ufi->entry->aref,
		    ufi->orig_rvaddr - ufi->entry->start))) {
			if (locked)
				uvmfault_unlockall(ufi, amap, NULL);
			locked = false;
		}

		/*
		 * unbusy the page.
		 */

		if ((pg->flags & PG_RELEASED) == 0) {
			uvm_pagelock(pg);
			uvm_pagewakeup(pg);
			uvm_pageunlock(pg);
			pg->flags &= ~PG_BUSY;
			UVM_PAGE_OWN(pg, NULL);
		}

		/*
		 * didn't get the lock?   release the page and retry.
		 */

 		if (locked == false) {
			if (pg->flags & PG_RELEASED) {
				uvm_pagefree(pg);
			}
			rw_exit(uobj->vmobjlock);
			return (0);
		}
	}

	/*
	 * for tmpfs vnodes, the page will be from a UAO rather than
	 * the vnode.  just check the locks match.
	 */

	KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);

	/*
	 * at this point we have the page we want ("pg") and we have
	 * all data structures locked.  do the loanout.  page can not
	 * be PG_RELEASED (we caught this above).
	 */

	if ((flags & UVM_LOAN_TOANON) == 0) {
		if (uvm_loanpage(&pg, 1, false)) {
			uvmfault_unlockall(ufi, amap, uobj);
			return (-1);
		}
		rw_exit(uobj->vmobjlock);
		**output = pg;
		(*output)++;
		return (1);
	}

#ifdef notdef
	/*
	 * must be a loan to an anon.   check to see if there is already
	 * an anon associated with this page.  if so, then just return
	 * a reference to this object.   the page should already be
	 * mapped read-only because it is already on loan.
	 */

	if (pg->uanon) {
		/* XXX: locking */
		anon = pg->uanon;
		anon->an_ref++;
		uvm_pagelock(pg);
		uvm_pagewakeup(pg);
		uvm_pageunlock(pg);
		pg->flags &= ~PG_BUSY;
		UVM_PAGE_OWN(pg, NULL);
		rw_exit(uobj->vmobjlock);
		**output = anon;
		(*output)++;
		return (1);
	}

	/*
	 * need to allocate a new anon
	 */

	anon = uvm_analloc();
	if (anon == NULL) {
		goto fail;
	}
	if (pg->wire_count > 0) {
		UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0);
		goto fail;
	}
	if (pg->loan_count == 0) {
		pmap_page_protect(pg, VM_PROT_READ);
	}
	uvm_pagelock(pg);
	pg->loan_count++;
	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
	pg->uanon = anon;
	anon->an_page = pg;
	anon->an_lock = /* TODO: share amap lock */
	uvm_pageactivate(pg);
	uvm_pagewakeup(pg);
	uvm_pageunlock(pg);
	pg->flags &= ~PG_BUSY;
	UVM_PAGE_OWN(pg, NULL);
	rw_exit(uobj->vmobjlock);
	rw_exit(&anon->an_lock);
	**output = anon;
	(*output)++;
	return (1);

fail:
	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
	/*
	 * unlock everything and bail out.
	 */
	uvm_pagelock(pg);
	uvm_pagewakeup(pg);
	uvm_pageunlock(pg);
	pg->flags &= ~PG_BUSY;
	UVM_PAGE_OWN(pg, NULL);
	uvmfault_unlockall(ufi, amap, uobj, NULL);
	if (anon) {
		anon->an_ref--;
		uvm_anfree(anon);
	}
#endif	/* notdef */
	return (-1);
}

/*
 * uvm_loanzero: loan a zero-fill page out
 *
 * => called with map, amap, uobj locked
 * => return value:
 *	-1 = fatal error, everything is unlocked, abort.
 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
 *		try again
 *	 1 = got it, everything still locked
 */

static struct uvm_object uvm_loanzero_object;
static krwlock_t uvm_loanzero_lock __cacheline_aligned;

static int
uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
{
	struct vm_page *pg;
	struct vm_amap *amap = ufi->entry->aref.ar_amap;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
again:
	rw_enter(uvm_loanzero_object.vmobjlock, RW_WRITER);

	/*
	 * first, get ahold of our single zero page.
	 */

	pg = uvm_pagelookup(&uvm_loanzero_object, 0);
	if (__predict_false(pg == NULL)) {
		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
					   UVM_PGA_ZERO)) == NULL) {
			rw_exit(uvm_loanzero_object.vmobjlock);
			uvmfault_unlockall(ufi, amap, NULL);
			uvm_wait("loanzero");
			if (!uvmfault_relock(ufi)) {
				return (0);
			}
			if (amap) {
				amap_lock(amap, RW_WRITER);
			}
			goto again;
		}

		/* got a zero'd page. */
		pg->flags &= ~(PG_BUSY|PG_FAKE);
		pg->flags |= PG_RDONLY;
		uvm_pagelock(pg);
		uvm_pageactivate(pg);
		uvm_pagewakeup(pg);
		uvm_pageunlock(pg);
		UVM_PAGE_OWN(pg, NULL);
	}

	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
		mutex_enter(&pg->interlock);
		pg->loan_count++;
		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
		mutex_exit(&pg->interlock);
		rw_exit(uvm_loanzero_object.vmobjlock);
		**output = pg;
		(*output)++;
		return (1);
	}

#ifdef notdef
	/*
	 * loaning to an anon.  check to see if there is already an anon
	 * associated with this page.  if so, then just return a reference
	 * to this object.
	 */

	if (pg->uanon) {
		anon = pg->uanon;
		rw_enter(&anon->an_lock, RW_WRITER);
		anon->an_ref++;
		rw_exit(&anon->an_lock);
		rw_exit(uvm_loanzero_object.vmobjlock);
		**output = anon;
		(*output)++;
		return (1);
	}

	/*
	 * need to allocate a new anon
	 */

	anon = uvm_analloc();
	if (anon == NULL) {
		/* out of swap causes us to fail */
		rw_exit(uvm_loanzero_object.vmobjlock);
		uvmfault_unlockall(ufi, amap, NULL, NULL);
		return (-1);
	}
	anon->an_page = pg;
	pg->uanon = anon;
	uvm_pagelock(pg);
	pg->loan_count++;
	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
	uvm_pageactivate(pg);
	uvm_pageunlock(pg);
	rw_exit(&anon->an_lock);
	rw_exit(uvm_loanzero_object.vmobjlock);
	**output = anon;
	(*output)++;
	return (1);
#else
	return (-1);
#endif
}


/*
 * uvm_unloananon: kill loans on anons (basically a normal ref drop)
 *
 * => we expect all our resources to be unlocked
 */

static void
uvm_unloananon(struct vm_anon **aloans, int nanons)
{
#ifdef notdef
	struct vm_anon *anon, *to_free = NULL;

	/* TODO: locking */
	amap_lock(amap, RW_WRITER);
	while (nanons-- > 0) {
		anon = *aloans++;
		if (--anon->an_ref == 0) {
			uvm_anfree(anon);
		}
	}
	amap_unlock(amap);
#endif	/* notdef */
}

/*
 * uvm_unloanpage: kill loans on pages loaned out to the kernel
 *
 * => we expect all our resources to be unlocked
 */

static void
uvm_unloanpage(struct vm_page **ploans, int npages)
{
	struct vm_page *pg;
	krwlock_t *slock;

	while (npages-- > 0) {
		pg = *ploans++;

		/*
		 * do a little dance to acquire the object or anon lock
		 * as appropriate.  we are locking in the wrong order,
		 * so we have to do a try-lock here.
		 */

		mutex_enter(&pg->interlock);
		slock = NULL;
		while (pg->uobject != NULL || pg->uanon != NULL) {
			if (pg->uobject != NULL) {
				slock = pg->uobject->vmobjlock;
			} else {
				slock = pg->uanon->an_lock;
			}
			if (rw_tryenter(slock, RW_WRITER)) {
				break;
			}
			/* XXX Better than yielding but inadequate. */
			kpause("livelock", false, 1, &pg->interlock);
			slock = NULL;
		}

		/*
		 * drop our loan.  if page is owned by an anon but
		 * PG_ANON is not set, the page was loaned to the anon
		 * from an object which dropped ownership, so resolve
		 * this by turning the anon's loan into real ownership
		 * (ie. decrement loan_count again and set PG_ANON).
		 * after all this, if there are no loans left, put the
		 * page back a paging queue (if the page is owned by
		 * an anon) or free it (if the page is now unowned).
		 */

		KASSERT(pg->loan_count > 0);
		pg->loan_count--;
		if (pg->uobject == NULL && pg->uanon != NULL &&
		    (pg->flags & PG_ANON) == 0) {
			KASSERT(pg->loan_count > 0);
			pg->loan_count--;
			pg->flags |= PG_ANON;
		}
		mutex_exit(&pg->interlock);
		if (pg->loan_count == 0 && pg->uobject == NULL &&
		    pg->uanon == NULL) {
			KASSERT((pg->flags & PG_BUSY) == 0);
			uvm_pagefree(pg);
		}
		if (slock != NULL) {
			rw_exit(slock);
		}
	}
}

/*
 * uvm_unloan: kill loans on pages or anons.
 */

void
uvm_unloan(void *v, int npages, int flags)
{
	if (flags & UVM_LOAN_TOANON) {
		uvm_unloananon(v, npages);
	} else {
		uvm_unloanpage(v, npages);
	}
}

/*
 * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
 * method, because the page can end up on a paging queue, and the
 * page daemon will want to call pgo_put when it encounters the page
 * on the inactive list.
 */

static int
ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
{
	struct vm_page *pg;

	KDASSERT(uobj == &uvm_loanzero_object);

	/*
	 * Don't need to do any work here if we're not freeing pages.
	 */

	if ((flags & PGO_FREE) == 0) {
		rw_exit(uobj->vmobjlock);
		return 0;
	}

	/*
	 * we don't actually want to ever free the uvm_loanzero_page, so
	 * just reactivate or dequeue it.
	 */

	pg = uvm_pagelookup(uobj, 0);
	KASSERT(pg != NULL);

	uvm_pagelock(pg);
	if (pg->uanon) {
		uvm_pageactivate(pg);
	} else {
		uvm_pagedequeue(pg);
	}
	uvm_pageunlock(pg);

	rw_exit(uobj->vmobjlock);
	return 0;
}

static const struct uvm_pagerops ulz_pager = {
	.pgo_put = ulz_put,
};

/*
 * uvm_loan_init(): initialize the uvm_loan() facility.
 */

void
uvm_loan_init(void)
{

	rw_init(&uvm_loanzero_lock);
	uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
	uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);

	UVMHIST_INIT(loanhist, 300);
}

/*
 * uvm_loanbreak: break loan on a uobj page
 *
 * => called with uobj locked
 * => the page may be busy; if it's busy, it will be unbusied
 * => return value:
 *	newly allocated page if succeeded
 */
struct vm_page *
uvm_loanbreak(struct vm_page *uobjpage)
{
	struct vm_page *pg;
	struct uvm_object *uobj __diagused = uobjpage->uobject;

	KASSERT(uobj != NULL);
	KASSERT(rw_write_held(uobj->vmobjlock));

	/* alloc new un-owned page */
	pg = uvm_pagealloc(NULL, 0, NULL, 0);
	if (pg == NULL)
		return NULL;

	/*
	 * copy the data from the old page to the new
	 * one and clear the fake flags on the new page (keep it busy).
	 * force a reload of the old page by clearing it from all
	 * pmaps.
	 * then rename the pages.
	 */

	uvm_pagecopy(uobjpage, pg);	/* old -> new */
	pg->flags &= ~PG_FAKE;
	KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);
	pmap_page_protect(uobjpage, VM_PROT_NONE);
	/* uobj still locked */
	if ((uobjpage->flags & PG_BUSY) != 0) {
		uobjpage->flags &= ~PG_BUSY;
		UVM_PAGE_OWN(uobjpage, NULL);
	}

	/*
	 * if the page is no longer referenced by
	 * an anon (i.e. we are breaking an O->K
	 * loan), then remove it from any pageq's.
	 */

	uvm_pagelock2(uobjpage, pg);
	uvm_pagewakeup(uobjpage);
	if (uobjpage->uanon == NULL)
		uvm_pagedequeue(uobjpage);

	/*
	 * replace uobjpage with new page.
	 */

	uvm_pagereplace(uobjpage, pg);

	/*
	 * at this point we have absolutely no
	 * control over uobjpage
	 */

	uvm_pageactivate(pg);
	uvm_pageunlock2(uobjpage, pg);

	/*
	 * done!  loan is broken and "pg" is
	 * PG_BUSY.   it can now replace uobjpage.
	 */

	return pg;
}

int
uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
{
	struct vm_page *newpg, *oldpg;
	unsigned oldstatus;

	KASSERT(rw_write_held(anon->an_lock));
	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
	KASSERT(anon->an_page->loan_count > 0);

	/* get new un-owned replacement page */
	newpg = uvm_pagealloc(NULL, 0, NULL, 0);
	if (newpg == NULL) {
		return ENOMEM;
	}

	oldpg = anon->an_page;
	/* copy old -> new */
	uvm_pagecopy(oldpg, newpg);
	KASSERT(uvm_pagegetdirty(newpg) == UVM_PAGE_STATUS_DIRTY);

	/* force reload */
	pmap_page_protect(oldpg, VM_PROT_NONE);
	oldstatus = uvm_pagegetdirty(anon->an_page);

	uvm_pagelock2(oldpg, newpg);
	if (uobj == NULL) {
		/*
		 * we were the lender (A->K); need to remove the page from
		 * pageq's.
		 *
		 * PG_ANON is updated by the caller.
		 */
		KASSERT((oldpg->flags & PG_ANON) != 0);
		oldpg->flags &= ~PG_ANON;
		uvm_pagedequeue(oldpg);
	}
	oldpg->uanon = NULL;

	if (uobj) {
		/* if we were receiver of loan */
		KASSERT((oldpg->pqflags & PG_ANON) == 0);
		oldpg->loan_count--;
	}

	/* install new page in anon */
	anon->an_page = newpg;
	newpg->uanon = anon;
	newpg->flags |= PG_ANON;

	uvm_pageactivate(newpg);
	uvm_pageunlock2(oldpg, newpg);

	newpg->flags &= ~(PG_BUSY|PG_FAKE);
	UVM_PAGE_OWN(newpg, NULL);

	if (uobj) {
		rw_exit(uobj->vmobjlock);
	}

	/* done! */
	kpreempt_disable();
	if (uobj == NULL) {
		CPU_COUNT(CPU_COUNT_ANONUNKNOWN + oldstatus, -1);
	}
	CPU_COUNT(CPU_COUNT_ANONDIRTY, 1);
	kpreempt_enable();
	return 0;
}