1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-19 10:53:58 +00:00

Import 4.4BSD-Lite2 onto the vendor branch, note that in the kernel, all

files are off the vendor branch, so this should not change anything.

A "U" marker generally means that the file was not changed in between
the 4.4Lite and Lite-2 releases, and does not need a merge.  "C" generally
means that there was a change.
This commit is contained in:
Peter Wemm 1996-03-11 19:54:35 +00:00
parent e1eec28a95
commit 8169788f40
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor/CSRG/dist/; revision=14564
22 changed files with 11676 additions and 0 deletions

531
sys/vm/kern_lock.c Normal file
View File

@ -0,0 +1,531 @@
/*
* Copyright (c) 1995
* The Regents of the University of California. All rights reserved.
*
* This code contains ideas from software contributed to Berkeley by
* Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
* System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)kern_lock.c 8.18 (Berkeley) 5/21/95
*/
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/lock.h>
#include <machine/cpu.h>
/*
* Locking primitives implementation.
* Locks provide shared/exclusive sychronization.
*/
#ifdef DEBUG
#define COUNT(p, x) if (p) (p)->p_locks += (x)
#else
#define COUNT(p, x)
#endif
#if NCPUS > 1
/*
* For multiprocessor system, try spin lock first.
*
* This should be inline expanded below, but we cannot have #if
* inside a multiline define.
*/
int lock_wait_time = 100;
#define PAUSE(lkp, wanted) \
if (lock_wait_time > 0) { \
int i; \
\
simple_unlock(&lkp->lk_interlock); \
for (i = lock_wait_time; i > 0; i--) \
if (!(wanted)) \
break; \
simple_lock(&lkp->lk_interlock); \
} \
if (!(wanted)) \
break;
#else /* NCPUS == 1 */
/*
* It is an error to spin on a uniprocessor as nothing will ever cause
* the simple lock to clear while we are executing.
*/
#define PAUSE(lkp, wanted)
#endif /* NCPUS == 1 */
/*
* Acquire a resource.
*/
#define ACQUIRE(lkp, error, extflags, wanted) \
PAUSE(lkp, wanted); \
for (error = 0; wanted; ) { \
(lkp)->lk_waitcount++; \
simple_unlock(&(lkp)->lk_interlock); \
error = tsleep((void *)lkp, (lkp)->lk_prio, \
(lkp)->lk_wmesg, (lkp)->lk_timo); \
simple_lock(&(lkp)->lk_interlock); \
(lkp)->lk_waitcount--; \
if (error) \
break; \
if ((extflags) & LK_SLEEPFAIL) { \
error = ENOLCK; \
break; \
} \
}
/*
* Initialize a lock; required before use.
*/
void
lockinit(lkp, prio, wmesg, timo, flags)
struct lock *lkp;
int prio;
char *wmesg;
int timo;
int flags;
{
bzero(lkp, sizeof(struct lock));
simple_lock_init(&lkp->lk_interlock);
lkp->lk_flags = flags & LK_EXTFLG_MASK;
lkp->lk_prio = prio;
lkp->lk_timo = timo;
lkp->lk_wmesg = wmesg;
lkp->lk_lockholder = LK_NOPROC;
}
/*
* Determine the status of a lock.
*/
int
lockstatus(lkp)
struct lock *lkp;
{
int lock_type = 0;
simple_lock(&lkp->lk_interlock);
if (lkp->lk_exclusivecount != 0)
lock_type = LK_EXCLUSIVE;
else if (lkp->lk_sharecount != 0)
lock_type = LK_SHARED;
simple_unlock(&lkp->lk_interlock);
return (lock_type);
}
/*
* Set, change, or release a lock.
*
* Shared requests increment the shared count. Exclusive requests set the
* LK_WANT_EXCL flag (preventing further shared locks), and wait for already
* accepted shared locks and shared-to-exclusive upgrades to go away.
*/
int
lockmgr(lkp, flags, interlkp, p)
__volatile struct lock *lkp;
u_int flags;
struct simplelock *interlkp;
struct proc *p;
{
int error;
pid_t pid;
int extflags;
error = 0;
if (p)
pid = p->p_pid;
else
pid = LK_KERNPROC;
simple_lock(&lkp->lk_interlock);
if (flags & LK_INTERLOCK)
simple_unlock(interlkp);
extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
#ifdef DIAGNOSTIC
/*
* Once a lock has drained, the LK_DRAINING flag is set and an
* exclusive lock is returned. The only valid operation thereafter
* is a single release of that exclusive lock. This final release
* clears the LK_DRAINING flag and sets the LK_DRAINED flag. Any
* further requests of any sort will result in a panic. The bits
* selected for these two flags are chosen so that they will be set
* in memory that is freed (freed memory is filled with 0xdeadbeef).
* The final release is permitted to give a new lease on life to
* the lock by specifying LK_REENABLE.
*/
if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) {
if (lkp->lk_flags & LK_DRAINED)
panic("lockmgr: using decommissioned lock");
if ((flags & LK_TYPE_MASK) != LK_RELEASE ||
lkp->lk_lockholder != pid)
panic("lockmgr: non-release on draining lock: %d\n",
flags & LK_TYPE_MASK);
lkp->lk_flags &= ~LK_DRAINING;
if ((flags & LK_REENABLE) == 0)
lkp->lk_flags |= LK_DRAINED;
}
#endif DIAGNOSTIC
switch (flags & LK_TYPE_MASK) {
case LK_SHARED:
if (lkp->lk_lockholder != pid) {
/*
* If just polling, check to see if we will block.
*/
if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE))) {
error = EBUSY;
break;
}
/*
* Wait for exclusive locks and upgrades to clear.
*/
ACQUIRE(lkp, error, extflags, lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE));
if (error)
break;
lkp->lk_sharecount++;
COUNT(p, 1);
break;
}
/*
* We hold an exclusive lock, so downgrade it to shared.
* An alternative would be to fail with EDEADLK.
*/
lkp->lk_sharecount++;
COUNT(p, 1);
/* fall into downgrade */
case LK_DOWNGRADE:
if (lkp->lk_lockholder != pid || lkp->lk_exclusivecount == 0)
panic("lockmgr: not holding exclusive lock");
lkp->lk_sharecount += lkp->lk_exclusivecount;
lkp->lk_exclusivecount = 0;
lkp->lk_flags &= ~LK_HAVE_EXCL;
lkp->lk_lockholder = LK_NOPROC;
if (lkp->lk_waitcount)
wakeup((void *)lkp);
break;
case LK_EXCLUPGRADE:
/*
* If another process is ahead of us to get an upgrade,
* then we want to fail rather than have an intervening
* exclusive access.
*/
if (lkp->lk_flags & LK_WANT_UPGRADE) {
lkp->lk_sharecount--;
COUNT(p, -1);
error = EBUSY;
break;
}
/* fall into normal upgrade */
case LK_UPGRADE:
/*
* Upgrade a shared lock to an exclusive one. If another
* shared lock has already requested an upgrade to an
* exclusive lock, our shared lock is released and an
* exclusive lock is requested (which will be granted
* after the upgrade). If we return an error, the file
* will always be unlocked.
*/
if (lkp->lk_lockholder == pid || lkp->lk_sharecount <= 0)
panic("lockmgr: upgrade exclusive lock");
lkp->lk_sharecount--;
COUNT(p, -1);
/*
* If we are just polling, check to see if we will block.
*/
if ((extflags & LK_NOWAIT) &&
((lkp->lk_flags & LK_WANT_UPGRADE) ||
lkp->lk_sharecount > 1)) {
error = EBUSY;
break;
}
if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) {
/*
* We are first shared lock to request an upgrade, so
* request upgrade and wait for the shared count to
* drop to zero, then take exclusive lock.
*/
lkp->lk_flags |= LK_WANT_UPGRADE;
ACQUIRE(lkp, error, extflags, lkp->lk_sharecount);
lkp->lk_flags &= ~LK_WANT_UPGRADE;
if (error)
break;
lkp->lk_flags |= LK_HAVE_EXCL;
lkp->lk_lockholder = pid;
if (lkp->lk_exclusivecount != 0)
panic("lockmgr: non-zero exclusive count");
lkp->lk_exclusivecount = 1;
COUNT(p, 1);
break;
}
/*
* Someone else has requested upgrade. Release our shared
* lock, awaken upgrade requestor if we are the last shared
* lock, then request an exclusive lock.
*/
if (lkp->lk_sharecount == 0 && lkp->lk_waitcount)
wakeup((void *)lkp);
/* fall into exclusive request */
case LK_EXCLUSIVE:
if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) {
/*
* Recursive lock.
*/
if ((extflags & LK_CANRECURSE) == 0)
panic("lockmgr: locking against myself");
lkp->lk_exclusivecount++;
COUNT(p, 1);
break;
}
/*
* If we are just polling, check to see if we will sleep.
*/
if ((extflags & LK_NOWAIT) && ((lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
lkp->lk_sharecount != 0)) {
error = EBUSY;
break;
}
/*
* Try to acquire the want_exclusive flag.
*/
ACQUIRE(lkp, error, extflags, lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL));
if (error)
break;
lkp->lk_flags |= LK_WANT_EXCL;
/*
* Wait for shared locks and upgrades to finish.
*/
ACQUIRE(lkp, error, extflags, lkp->lk_sharecount != 0 ||
(lkp->lk_flags & LK_WANT_UPGRADE));
lkp->lk_flags &= ~LK_WANT_EXCL;
if (error)
break;
lkp->lk_flags |= LK_HAVE_EXCL;
lkp->lk_lockholder = pid;
if (lkp->lk_exclusivecount != 0)
panic("lockmgr: non-zero exclusive count");
lkp->lk_exclusivecount = 1;
COUNT(p, 1);
break;
case LK_RELEASE:
if (lkp->lk_exclusivecount != 0) {
if (pid != lkp->lk_lockholder)
panic("lockmgr: pid %d, not %s %d unlocking",
pid, "exclusive lock holder",
lkp->lk_lockholder);
lkp->lk_exclusivecount--;
COUNT(p, -1);
if (lkp->lk_exclusivecount == 0) {
lkp->lk_flags &= ~LK_HAVE_EXCL;
lkp->lk_lockholder = LK_NOPROC;
}
} else if (lkp->lk_sharecount != 0) {
lkp->lk_sharecount--;
COUNT(p, -1);
}
if (lkp->lk_waitcount)
wakeup((void *)lkp);
break;
case LK_DRAIN:
/*
* Check that we do not already hold the lock, as it can
* never drain if we do. Unfortunately, we have no way to
* check for holding a shared lock, but at least we can
* check for an exclusive one.
*/
if (lkp->lk_lockholder == pid)
panic("lockmgr: draining against myself");
/*
* If we are just polling, check to see if we will sleep.
*/
if ((extflags & LK_NOWAIT) && ((lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0)) {
error = EBUSY;
break;
}
PAUSE(lkp, ((lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0));
for (error = 0; ((lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0); ) {
lkp->lk_flags |= LK_WAITDRAIN;
simple_unlock(&lkp->lk_interlock);
if (error = tsleep((void *)&lkp->lk_flags, lkp->lk_prio,
lkp->lk_wmesg, lkp->lk_timo))
return (error);
if ((extflags) & LK_SLEEPFAIL)
return (ENOLCK);
simple_lock(&lkp->lk_interlock);
}
lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL;
lkp->lk_lockholder = pid;
lkp->lk_exclusivecount = 1;
COUNT(p, 1);
break;
default:
simple_unlock(&lkp->lk_interlock);
panic("lockmgr: unknown locktype request %d",
flags & LK_TYPE_MASK);
/* NOTREACHED */
}
if ((lkp->lk_flags & LK_WAITDRAIN) && ((lkp->lk_flags &
(LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 &&
lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) {
lkp->lk_flags &= ~LK_WAITDRAIN;
wakeup((void *)&lkp->lk_flags);
}
simple_unlock(&lkp->lk_interlock);
return (error);
}
/*
* Print out information about state of a lock. Used by VOP_PRINT
* routines to display ststus about contained locks.
*/
lockmgr_printinfo(lkp)
struct lock *lkp;
{
if (lkp->lk_sharecount)
printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg,
lkp->lk_sharecount);
else if (lkp->lk_flags & LK_HAVE_EXCL)
printf(" lock type %s: EXCL (count %d) by pid %d",
lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder);
if (lkp->lk_waitcount > 0)
printf(" with %d pending", lkp->lk_waitcount);
}
#if defined(DEBUG) && NCPUS == 1
#include <sys/kernel.h>
#include <vm/vm.h>
#include <sys/sysctl.h>
int lockpausetime = 0;
struct ctldebug debug2 = { "lockpausetime", &lockpausetime };
int simplelockrecurse;
/*
* Simple lock functions so that the debugger can see from whence
* they are being called.
*/
void
simple_lock_init(alp)
struct simplelock *alp;
{
alp->lock_data = 0;
}
void
_simple_lock(alp, id, l)
__volatile struct simplelock *alp;
const char *id;
int l;
{
if (simplelockrecurse)
return;
if (alp->lock_data == 1) {
if (lockpausetime == -1)
panic("%s:%d: simple_lock: lock held", id, l);
printf("%s:%d: simple_lock: lock held\n", id, l);
if (lockpausetime == 1) {
BACKTRACE(curproc);
} else if (lockpausetime > 1) {
printf("%s:%d: simple_lock: lock held...", id, l);
tsleep(&lockpausetime, PCATCH | PPAUSE, "slock",
lockpausetime * hz);
printf(" continuing\n");
}
}
alp->lock_data = 1;
if (curproc)
curproc->p_simple_locks++;
}
int
_simple_lock_try(alp, id, l)
__volatile struct simplelock *alp;
const char *id;
int l;
{
if (alp->lock_data)
return (0);
if (simplelockrecurse)
return (1);
alp->lock_data = 1;
if (curproc)
curproc->p_simple_locks++;
return (1);
}
void
_simple_unlock(alp, id, l)
__volatile struct simplelock *alp;
const char *id;
int l;
{
if (simplelockrecurse)
return;
if (alp->lock_data == 0) {
if (lockpausetime == -1)
panic("%s:%d: simple_unlock: lock not held", id, l);
printf("%s:%d: simple_unlock: lock not held\n", id, l);
if (lockpausetime == 1) {
BACKTRACE(curproc);
} else if (lockpausetime > 1) {
printf("%s:%d: simple_unlock: lock not held...", id, l);
tsleep(&lockpausetime, PCATCH | PPAUSE, "sunlock",
lockpausetime * hz);
printf(" continuing\n");
}
}
alp->lock_data = 0;
if (curproc)
curproc->p_simple_locks--;
}
#endif /* DEBUG && NCPUS == 1 */

180
sys/vm/lock.h Normal file
View File

@ -0,0 +1,180 @@
/*
* Copyright (c) 1995
* The Regents of the University of California. All rights reserved.
*
* This code contains ideas from software contributed to Berkeley by
* Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
* System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)lock.h 8.12 (Berkeley) 5/19/95
*/
#ifndef _LOCK_H_
#define _LOCK_H_
/*
* The general lock structure. Provides for multiple shared locks,
* upgrading from shared to exclusive, and sleeping until the lock
* can be gained. The simple locks are defined in <machine/param.h>.
*/
struct lock {
struct simplelock lk_interlock; /* lock on remaining fields */
u_int lk_flags; /* see below */
int lk_sharecount; /* # of accepted shared locks */
int lk_waitcount; /* # of processes sleeping for lock */
short lk_exclusivecount; /* # of recursive exclusive locks */
short lk_prio; /* priority at which to sleep */
char *lk_wmesg; /* resource sleeping (for tsleep) */
int lk_timo; /* maximum sleep time (for tsleep) */
pid_t lk_lockholder; /* pid of exclusive lock holder */
};
/*
* Lock request types:
* LK_SHARED - get one of many possible shared locks. If a process
* holding an exclusive lock requests a shared lock, the exclusive
* lock(s) will be downgraded to shared locks.
* LK_EXCLUSIVE - stop further shared locks, when they are cleared,
* grant a pending upgrade if it exists, then grant an exclusive
* lock. Only one exclusive lock may exist at a time, except that
* a process holding an exclusive lock may get additional exclusive
* locks if it explicitly sets the LK_CANRECURSE flag in the lock
* request, or if the LK_CANRECUSE flag was set when the lock was
* initialized.
* LK_UPGRADE - the process must hold a shared lock that it wants to
* have upgraded to an exclusive lock. Other processes may get
* exclusive access to the resource between the time that the upgrade
* is requested and the time that it is granted.
* LK_EXCLUPGRADE - the process must hold a shared lock that it wants to
* have upgraded to an exclusive lock. If the request succeeds, no
* other processes will have gotten exclusive access to the resource
* between the time that the upgrade is requested and the time that
* it is granted. However, if another process has already requested
* an upgrade, the request will fail (see error returns below).
* LK_DOWNGRADE - the process must hold an exclusive lock that it wants
* to have downgraded to a shared lock. If the process holds multiple
* (recursive) exclusive locks, they will all be downgraded to shared
* locks.
* LK_RELEASE - release one instance of a lock.
* LK_DRAIN - wait for all activity on the lock to end, then mark it
* decommissioned. This feature is used before freeing a lock that
* is part of a piece of memory that is about to be freed.
*
* These are flags that are passed to the lockmgr routine.
*/
#define LK_TYPE_MASK 0x0000000f /* type of lock sought */
#define LK_SHARED 0x00000001 /* shared lock */
#define LK_EXCLUSIVE 0x00000002 /* exclusive lock */
#define LK_UPGRADE 0x00000003 /* shared-to-exclusive upgrade */
#define LK_EXCLUPGRADE 0x00000004 /* first shared-to-exclusive upgrade */
#define LK_DOWNGRADE 0x00000005 /* exclusive-to-shared downgrade */
#define LK_RELEASE 0x00000006 /* release any type of lock */
#define LK_DRAIN 0x00000007 /* wait for all lock activity to end */
/*
* External lock flags.
*
* The first three flags may be set in lock_init to set their mode permanently,
* or passed in as arguments to the lock manager. The LK_REENABLE flag may be
* set only at the release of a lock obtained by drain.
*/
#define LK_EXTFLG_MASK 0x00000070 /* mask of external flags */
#define LK_NOWAIT 0x00000010 /* do not sleep to await lock */
#define LK_SLEEPFAIL 0x00000020 /* sleep, then return failure */
#define LK_CANRECURSE 0x00000040 /* allow recursive exclusive lock */
#define LK_REENABLE 0x00000080 /* lock is be reenabled after drain */
/*
* Internal lock flags.
*
* These flags are used internally to the lock manager.
*/
#define LK_WANT_UPGRADE 0x00000100 /* waiting for share-to-excl upgrade */
#define LK_WANT_EXCL 0x00000200 /* exclusive lock sought */
#define LK_HAVE_EXCL 0x00000400 /* exclusive lock obtained */
#define LK_WAITDRAIN 0x00000800 /* process waiting for lock to drain */
#define LK_DRAINING 0x00004000 /* lock is being drained */
#define LK_DRAINED 0x00008000 /* lock has been decommissioned */
/*
* Control flags
*
* Non-persistent external flags.
*/
#define LK_INTERLOCK 0x00010000 /* unlock passed simple lock after
getting lk_interlock */
#define LK_RETRY 0x00020000 /* vn_lock: retry until locked */
/*
* Lock return status.
*
* Successfully obtained locks return 0. Locks will always succeed
* unless one of the following is true:
* LK_FORCEUPGRADE is requested and some other process has already
* requested a lock upgrade (returns EBUSY).
* LK_WAIT is set and a sleep would be required (returns EBUSY).
* LK_SLEEPFAIL is set and a sleep was done (returns ENOLCK).
* PCATCH is set in lock priority and a signal arrives (returns
* either EINTR or ERESTART if system calls is to be restarted).
* Non-null lock timeout and timeout expires (returns EWOULDBLOCK).
* A failed lock attempt always returns a non-zero error value. No lock
* is held after an error return (in particular, a failed LK_UPGRADE
* or LK_FORCEUPGRADE will have released its shared access lock).
*/
/*
* Indicator that no process holds exclusive lock
*/
#define LK_KERNPROC ((pid_t) -2)
#define LK_NOPROC ((pid_t) -1)
struct proc;
void lockinit __P((struct lock *, int prio, char *wmesg, int timo,
int flags));
int lockmgr __P((__volatile struct lock *, u_int flags,
struct simplelock *, struct proc *p));
int lockstatus __P((struct lock *));
#ifdef DEBUG
void _simple_unlock __P((__volatile struct simplelock *alp, const char *, int));
#define simple_unlock(alp) _simple_unlock(alp, __FILE__, __LINE__)
int _simple_lock_try __P((__volatile struct simplelock *alp, const char *, int));
#define simple_lock_try(alp) _simple_lock_try(alp, __FILE__, __LINE__)
void _simple_lock __P((__volatile struct simplelock *alp, const char *, int));
#define simple_lock(alp) _simple_lock(alp, __FILE__, __LINE__)
void simple_lock_init __P((struct simplelock *alp));
#else /* !DEBUG */
#if NCPUS == 1 /* no multiprocessor locking is necessary */
#define simple_lock_init(alp)
#define simple_lock(alp)
#define simple_lock_try(alp) (1) /* always succeeds */
#define simple_unlock(alp)
#endif /* NCPUS == 1 */
#endif /* !DEBUG */
#endif /* !_LOCK_H_ */

99
sys/vm/vm.h Normal file
View File

@ -0,0 +1,99 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm.h 8.5 (Berkeley) 5/11/95
*/
#ifndef VM_H
#define VM_H
typedef int vm_inherit_t; /* XXX: inheritance codes */
union vm_map_object;
typedef union vm_map_object vm_map_object_t;
struct vm_map_entry;
typedef struct vm_map_entry *vm_map_entry_t;
struct vm_map;
typedef struct vm_map *vm_map_t;
struct vm_object;
typedef struct vm_object *vm_object_t;
struct vm_page;
typedef struct vm_page *vm_page_t;
struct pager_struct;
typedef struct pager_struct *vm_pager_t;
/*
* MACH VM locking type mappings to kernel types
*/
typedef struct simplelock simple_lock_data_t;
typedef struct simplelock *simple_lock_t;
typedef struct lock lock_data_t;
typedef struct lock *lock_t;
#include <sys/vmmeter.h>
#include <sys/queue.h>
#include <vm/vm_param.h>
#include <sys/lock.h>
#include <vm/vm_prot.h>
#include <vm/vm_inherit.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/pmap.h>
#include <vm/vm_extern.h>
/*
* Shareable process virtual address space.
* May eventually be merged with vm_map.
* Several fields are temporary (text, data stuff).
*/
struct vmspace {
struct vm_map vm_map; /* VM address map */
struct pmap vm_pmap; /* private physical map */
int vm_refcnt; /* number of references */
caddr_t vm_shm; /* SYS5 shared memory private data XXX */
/* we copy from vm_startcopy to the end of the structure on fork */
#define vm_startcopy vm_rssize
segsz_t vm_rssize; /* current resident set size in pages */
segsz_t vm_swrss; /* resident set size before last swap */
segsz_t vm_tsize; /* text size (pages) XXX */
segsz_t vm_dsize; /* data size (pages) XXX */
segsz_t vm_ssize; /* stack size (pages) */
caddr_t vm_taddr; /* user virtual address of text XXX */
caddr_t vm_daddr; /* user virtual address of data XXX */
caddr_t vm_maxsaddr; /* user VA at max stack growth */
};
#endif /* VM_H */

125
sys/vm/vm_extern.h Normal file
View File

@ -0,0 +1,125 @@
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_extern.h 8.5 (Berkeley) 5/3/95
*/
struct buf;
struct loadavg;
struct proc;
struct vmspace;
struct vmtotal;
struct mount;
struct vnode;
#ifdef KGDB
void chgkprot __P((caddr_t, int, int));
#endif
#ifdef KERNEL
#ifdef TYPEDEF_FOR_UAP
int compat_43_getpagesize __P((struct proc *p, void *, int *));
int madvise __P((struct proc *, void *, int *));
int mincore __P((struct proc *, void *, int *));
int mprotect __P((struct proc *, void *, int *));
int msync __P((struct proc *, void *, int *));
int munmap __P((struct proc *, void *, int *));
int obreak __P((struct proc *, void *, int *));
int sbrk __P((struct proc *, void *, int *));
int smmap __P((struct proc *, void *, int *));
int sstk __P((struct proc *, void *, int *));
#endif
void assert_wait __P((void *, boolean_t));
int grow __P((struct proc *, vm_offset_t));
void iprintf __P((const char *, ...));
int kernacc __P((caddr_t, int, int));
vm_offset_t kmem_alloc __P((vm_map_t, vm_size_t));
vm_offset_t kmem_alloc_pageable __P((vm_map_t, vm_size_t));
vm_offset_t kmem_alloc_wait __P((vm_map_t, vm_size_t));
void kmem_free __P((vm_map_t, vm_offset_t, vm_size_t));
void kmem_free_wakeup __P((vm_map_t, vm_offset_t, vm_size_t));
void kmem_init __P((vm_offset_t, vm_offset_t));
vm_offset_t kmem_malloc __P((vm_map_t, vm_size_t, boolean_t));
vm_map_t kmem_suballoc __P((vm_map_t, vm_offset_t *, vm_offset_t *,
vm_size_t, boolean_t));
void loadav __P((struct loadavg *));
void munmapfd __P((struct proc *, int));
int pager_cache __P((vm_object_t, boolean_t));
void sched __P((void));
int svm_allocate __P((struct proc *, void *, int *));
int svm_deallocate __P((struct proc *, void *, int *));
int svm_inherit __P((struct proc *, void *, int *));
int svm_protect __P((struct proc *, void *, int *));
void swapinit __P((void));
int swapon __P((struct proc *, void *, int *));
void swapout __P((struct proc *));
void swapout_threads __P((void));
int swfree __P((struct proc *, int));
void swstrategy __P((struct buf *));
void thread_block __P((void));
void thread_sleep __P((void *, simple_lock_t, boolean_t));
void thread_wakeup __P((void *));
int useracc __P((caddr_t, int, int));
int vm_allocate __P((vm_map_t,
vm_offset_t *, vm_size_t, boolean_t));
int vm_allocate_with_pager __P((vm_map_t, vm_offset_t *,
vm_size_t, boolean_t, vm_pager_t, vm_offset_t, boolean_t));
int vm_deallocate __P((vm_map_t, vm_offset_t, vm_size_t));
int vm_fault __P((vm_map_t, vm_offset_t, vm_prot_t, boolean_t));
void vm_fault_copy_entry __P((vm_map_t,
vm_map_t, vm_map_entry_t, vm_map_entry_t));
void vm_fault_unwire __P((vm_map_t, vm_offset_t, vm_offset_t));
int vm_fault_wire __P((vm_map_t, vm_offset_t, vm_offset_t));
int vm_fork __P((struct proc *, struct proc *, int));
int vm_inherit __P((vm_map_t,
vm_offset_t, vm_size_t, vm_inherit_t));
void vm_init_limits __P((struct proc *));
void vm_mem_init __P((void));
int vm_mmap __P((vm_map_t, vm_offset_t *, vm_size_t,
vm_prot_t, vm_prot_t, int, caddr_t, vm_offset_t));
int vm_protect __P((vm_map_t,
vm_offset_t, vm_size_t, boolean_t, vm_prot_t));
void vm_set_page_size __P((void));
int vm_sysctl __P((int *, u_int, void *, size_t *, void *,
size_t, struct proc *));
void vmmeter __P((void));
struct vmspace *vmspace_alloc __P((vm_offset_t, vm_offset_t, int));
struct vmspace *vmspace_fork __P((struct vmspace *));
void vmspace_free __P((struct vmspace *));
void vmtotal __P((struct vmtotal *));
void vnode_pager_setsize __P((struct vnode *, u_long));
void vnode_pager_umount __P((struct mount *));
boolean_t vnode_pager_uncache __P((struct vnode *));
void vslock __P((caddr_t, u_int));
void vsunlock __P((caddr_t, u_int, int));
#endif

1037
sys/vm/vm_fault.c Normal file

File diff suppressed because it is too large Load Diff

607
sys/vm/vm_glue.c Normal file
View File

@ -0,0 +1,607 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_glue.c 8.9 (Berkeley) 3/4/95
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/buf.h>
#include <sys/user.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_kern.h>
#include <machine/cpu.h>
int avefree = 0; /* XXX */
unsigned maxdmap = MAXDSIZ; /* XXX */
int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */
int
kernacc(addr, len, rw)
caddr_t addr;
int len, rw;
{
boolean_t rv;
vm_offset_t saddr, eaddr;
vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
saddr = trunc_page(addr);
eaddr = round_page(addr+len);
rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
/*
* XXX there are still some things (e.g. the buffer cache) that
* are managed behind the VM system's back so even though an
* address is accessible in the mind of the VM system, there may
* not be physical pages where the VM thinks there is. This can
* lead to bogus allocation of pages in the kernel address space
* or worse, inconsistencies at the pmap level. We only worry
* about the buffer cache for now.
*/
if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
rv = FALSE;
return(rv == TRUE);
}
int
useracc(addr, len, rw)
caddr_t addr;
int len, rw;
{
boolean_t rv;
vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
trunc_page(addr), round_page(addr+len), prot);
return(rv == TRUE);
}
#ifdef KGDB
/*
* Change protections on kernel pages from addr to addr+len
* (presumably so debugger can plant a breakpoint).
*
* We force the protection change at the pmap level. If we were
* to use vm_map_protect a change to allow writing would be lazily-
* applied meaning we would still take a protection fault, something
* we really don't want to do. It would also fragment the kernel
* map unnecessarily. We cannot use pmap_protect since it also won't
* enforce a write-enable request. Using pmap_enter is the only way
* we can ensure the change takes place properly.
*/
void
chgkprot(addr, len, rw)
register caddr_t addr;
int len, rw;
{
vm_prot_t prot;
vm_offset_t pa, sva, eva;
prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
eva = round_page(addr + len);
for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
/*
* Extract physical address for the page.
* We use a cheezy hack to differentiate physical
* page 0 from an invalid mapping, not that it
* really matters...
*/
pa = pmap_extract(kernel_pmap, sva|1);
if (pa == 0)
panic("chgkprot: invalid page");
pmap_enter(kernel_pmap, sva, pa&~1, prot, TRUE);
}
}
#endif
void
vslock(addr, len)
caddr_t addr;
u_int len;
{
vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
round_page(addr+len), FALSE);
}
void
vsunlock(addr, len, dirtied)
caddr_t addr;
u_int len;
int dirtied;
{
#ifdef lint
dirtied++;
#endif
vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
round_page(addr+len), TRUE);
}
/*
* Implement fork's actions on an address space.
* Here we arrange for the address space to be copied or referenced,
* allocate a user struct (pcb and kernel stack), then call the
* machine-dependent layer to fill those in and make the new process
* ready to run.
* NOTE: the kernel stack may be at a different location in the child
* process, and thus addresses of automatic variables may be invalid
* after cpu_fork returns in the child process. We do nothing here
* after cpu_fork returns.
*/
int
vm_fork(p1, p2, isvfork)
register struct proc *p1, *p2;
int isvfork;
{
register struct user *up;
vm_offset_t addr;
#ifdef i386
/*
* avoid copying any of the parent's pagetables or other per-process
* objects that reside in the map by marking all of them non-inheritable
*/
(void)vm_map_inherit(&p1->p_vmspace->vm_map,
UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
#endif
p2->p_vmspace = vmspace_fork(p1->p_vmspace);
#ifdef SYSVSHM
if (p1->p_vmspace->vm_shm)
shmfork(p1, p2, isvfork);
#endif
#ifndef i386
/*
* Allocate a wired-down (for now) pcb and kernel stack for the process
*/
addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
if (addr == 0)
panic("vm_fork: no more kernel virtual memory");
vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
#else
/* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
not yet clear, yet it does... */
addr = kmem_alloc(kernel_map, ctob(UPAGES));
if (addr == 0)
panic("vm_fork: no more kernel virtual memory");
#endif
up = (struct user *)addr;
p2->p_addr = up;
/*
* p_stats and p_sigacts currently point at fields
* in the user struct but not at &u, instead at p_addr.
* Copy p_sigacts and parts of p_stats; zero the rest
* of p_stats (statistics).
*/
p2->p_stats = &up->u_stats;
p2->p_sigacts = &up->u_sigacts;
up->u_sigacts = *p1->p_sigacts;
bzero(&up->u_stats.pstat_startzero,
(unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
(caddr_t)&up->u_stats.pstat_startzero));
bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
((caddr_t)&up->u_stats.pstat_endcopy -
(caddr_t)&up->u_stats.pstat_startcopy));
#ifdef i386
{ u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
vp = &p2->p_vmspace->vm_map;
(void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
(void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
(void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
}
#endif
/*
* cpu_fork will copy and update the kernel stack and pcb,
* and make the child ready to run. It marks the child
* so that it can return differently than the parent.
* It returns twice, once in the parent process and
* once in the child.
*/
return (cpu_fork(p1, p2));
}
/*
* Set default limits for VM system.
* Called for proc 0, and then inherited by all others.
*/
void
vm_init_limits(p)
register struct proc *p;
{
/*
* Set up the initial limits on process VM.
* Set the maximum resident set size to be all
* of (reasonably) available memory. This causes
* any single, large process to start random page
* replacement once it fills memory.
*/
p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count);
}
#include <vm/vm_pageout.h>
#ifdef DEBUG
int enableswap = 1;
int swapdebug = 0;
#define SDB_FOLLOW 1
#define SDB_SWAPIN 2
#define SDB_SWAPOUT 4
#endif
/*
* Brutally simple:
* 1. Attempt to swapin every swaped-out, runnable process in
* order of priority.
* 2. If not enough memory, wake the pageout daemon and let it
* clear some space.
*/
void
scheduler()
{
register struct proc *p;
register int pri;
struct proc *pp;
int ppri;
vm_offset_t addr;
vm_size_t size;
loop:
#ifdef DEBUG
while (!enableswap)
tsleep((caddr_t)&proc0, PVM, "noswap", 0);
#endif
pp = NULL;
ppri = INT_MIN;
for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
/* XXX should also penalize based on vm_swrss */
pri = p->p_swtime + p->p_slptime - p->p_nice * 8;
if (pri > ppri) {
pp = p;
ppri = pri;
}
}
}
#ifdef DEBUG
if (swapdebug & SDB_FOLLOW)
printf("scheduler: running, procp %x pri %d\n", pp, ppri);
#endif
/*
* Nothing to do, back to sleep
*/
if ((p = pp) == NULL) {
tsleep((caddr_t)&proc0, PVM, "scheduler", 0);
goto loop;
}
/*
* We would like to bring someone in.
* This part is really bogus cuz we could deadlock on memory
* despite our feeble check.
* XXX should require at least vm_swrss / 2
*/
size = round_page(ctob(UPAGES));
addr = (vm_offset_t) p->p_addr;
if (cnt.v_free_count > atop(size)) {
#ifdef DEBUG
if (swapdebug & SDB_SWAPIN)
printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
p->p_pid, p->p_comm, p->p_addr,
ppri, cnt.v_free_count);
#endif
vm_map_pageable(kernel_map, addr, addr+size, FALSE);
/*
* Some architectures need to be notified when the
* user area has moved to new physical page(s) (e.g.
* see pmax/pmax/vm_machdep.c).
*/
cpu_swapin(p);
(void) splstatclock();
if (p->p_stat == SRUN)
setrunqueue(p);
p->p_flag |= P_INMEM;
(void) spl0();
p->p_swtime = 0;
goto loop;
}
/*
* Not enough memory, jab the pageout daemon and wait til the
* coast is clear.
*/
#ifdef DEBUG
if (swapdebug & SDB_FOLLOW)
printf("scheduler: no room for pid %d(%s), free %d\n",
p->p_pid, p->p_comm, cnt.v_free_count);
#endif
(void) splhigh();
VM_WAIT;
(void) spl0();
#ifdef DEBUG
if (swapdebug & SDB_FOLLOW)
printf("scheduler: room again, free %d\n", cnt.v_free_count);
#endif
goto loop;
}
#define swappable(p) \
(((p)->p_flag & \
(P_SYSTEM | P_INMEM | P_NOSWAP | P_WEXIT | P_PHYSIO)) == P_INMEM)
/*
* Swapout is driven by the pageout daemon. Very simple, we find eligible
* procs and unwire their u-areas. We try to always "swap" at least one
* process in case we need the room for a swapin.
* If any procs have been sleeping/stopped for at least maxslp seconds,
* they are swapped. Else, we swap the longest-sleeping or stopped process,
* if any, otherwise the longest-resident process.
*/
void
swapout_threads()
{
register struct proc *p;
struct proc *outp, *outp2;
int outpri, outpri2;
int didswap = 0;
extern int maxslp;
#ifdef DEBUG
if (!enableswap)
return;
#endif
outp = outp2 = NULL;
outpri = outpri2 = 0;
for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
if (!swappable(p))
continue;
switch (p->p_stat) {
case SRUN:
if (p->p_swtime > outpri2) {
outp2 = p;
outpri2 = p->p_swtime;
}
continue;
case SSLEEP:
case SSTOP:
if (p->p_slptime >= maxslp) {
swapout(p);
didswap++;
} else if (p->p_slptime > outpri) {
outp = p;
outpri = p->p_slptime;
}
continue;
}
}
/*
* If we didn't get rid of any real duds, toss out the next most
* likely sleeping/stopped or running candidate. We only do this
* if we are real low on memory since we don't gain much by doing
* it (UPAGES pages).
*/
if (didswap == 0 &&
cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
if ((p = outp) == 0)
p = outp2;
#ifdef DEBUG
if (swapdebug & SDB_SWAPOUT)
printf("swapout_threads: no duds, try procp %x\n", p);
#endif
if (p)
swapout(p);
}
}
void
swapout(p)
register struct proc *p;
{
vm_offset_t addr;
vm_size_t size;
#ifdef DEBUG
if (swapdebug & SDB_SWAPOUT)
printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
p->p_pid, p->p_comm, p->p_addr, p->p_stat,
p->p_slptime, cnt.v_free_count);
#endif
size = round_page(ctob(UPAGES));
addr = (vm_offset_t) p->p_addr;
#if defined(hp300) || defined(luna68k)
/*
* Ugh! u-area is double mapped to a fixed address behind the
* back of the VM system and accesses are usually through that
* address rather than the per-process address. Hence reference
* and modify information are recorded at the fixed address and
* lost at context switch time. We assume the u-struct and
* kernel stack are always accessed/modified and force it to be so.
*/
{
register int i;
volatile long tmp;
for (i = 0; i < UPAGES; i++) {
tmp = *(long *)addr; *(long *)addr = tmp;
addr += NBPG;
}
addr = (vm_offset_t) p->p_addr;
}
#endif
#ifdef mips
/*
* Be sure to save the floating point coprocessor state before
* paging out the u-struct.
*/
{
extern struct proc *machFPCurProcPtr;
if (p == machFPCurProcPtr) {
MachSaveCurFPState(p);
machFPCurProcPtr = (struct proc *)0;
}
}
#endif
#ifndef i386 /* temporary measure till we find spontaineous unwire of kstack */
vm_map_pageable(kernel_map, addr, addr+size, TRUE);
pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
#endif
(void) splhigh();
p->p_flag &= ~P_INMEM;
if (p->p_stat == SRUN)
remrq(p);
(void) spl0();
p->p_swtime = 0;
}
/*
* The rest of these routines fake thread handling
*/
void
assert_wait(event, ruptible)
void *event;
boolean_t ruptible;
{
#ifdef lint
ruptible++;
#endif
curproc->p_thread = event;
}
void
thread_block()
{
int s = splhigh();
if (curproc->p_thread)
tsleep(curproc->p_thread, PVM, "thrd_block", 0);
splx(s);
}
void
thread_sleep(event, lock, ruptible)
void *event;
simple_lock_t lock;
boolean_t ruptible;
{
int s = splhigh();
#ifdef lint
ruptible++;
#endif
curproc->p_thread = event;
simple_unlock(lock);
if (curproc->p_thread)
tsleep(event, PVM, "thrd_sleep", 0);
splx(s);
}
void
thread_wakeup(event)
void *event;
{
int s = splhigh();
wakeup(event);
splx(s);
}
/*
* DEBUG stuff
*/
int indent = 0;
#include <machine/stdarg.h> /* see subr_prf.c */
/*ARGSUSED2*/
void
#if __STDC__
iprintf(const char *fmt, ...)
#else
iprintf(fmt /* , va_alist */)
char *fmt;
/* va_dcl */
#endif
{
register int i;
va_list ap;
for (i = indent; i >= 8; i -= 8)
printf("\t");
while (--i >= 0)
printf(" ");
va_start(ap, fmt);
printf("%r", fmt, ap);
va_end(ap);
}

456
sys/vm/vm_kern.c Normal file
View File

@ -0,0 +1,456 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_kern.c 8.4 (Berkeley) 1/9/95
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Kernel memory management.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_kern.h>
/*
* kmem_alloc_pageable:
*
* Allocate pageable memory to the kernel's address map.
* map must be "kernel_map" below.
*/
vm_offset_t
kmem_alloc_pageable(map, size)
vm_map_t map;
register vm_size_t size;
{
vm_offset_t addr;
register int result;
#if 0
if (map != kernel_map)
panic("kmem_alloc_pageable: not called with kernel_map");
#endif
size = round_page(size);
addr = vm_map_min(map);
result = vm_map_find(map, NULL, (vm_offset_t) 0,
&addr, size, TRUE);
if (result != KERN_SUCCESS) {
return(0);
}
return(addr);
}
/*
* Allocate wired-down memory in the kernel's address map
* or a submap.
*/
vm_offset_t
kmem_alloc(map, size)
register vm_map_t map;
register vm_size_t size;
{
vm_offset_t addr;
register vm_offset_t offset;
extern vm_object_t kernel_object;
vm_offset_t i;
size = round_page(size);
/*
* Use the kernel object for wired-down kernel pages.
* Assume that no region of the kernel object is
* referenced more than once.
*/
/*
* Locate sufficient space in the map. This will give us the
* final virtual address for the new memory, and thus will tell
* us the offset within the kernel map.
*/
vm_map_lock(map);
if (vm_map_findspace(map, 0, size, &addr)) {
vm_map_unlock(map);
return (0);
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
vm_object_reference(kernel_object);
vm_map_insert(map, kernel_object, offset, addr, addr + size);
vm_map_unlock(map);
/*
* Guarantee that there are pages already in this object
* before calling vm_map_pageable. This is to prevent the
* following scenario:
*
* 1) Threads have swapped out, so that there is a
* pager for the kernel_object.
* 2) The kmsg zone is empty, and so we are kmem_allocing
* a new page for it.
* 3) vm_map_pageable calls vm_fault; there is no page,
* but there is a pager, so we call
* pager_data_request. But the kmsg zone is empty,
* so we must kmem_alloc.
* 4) goto 1
* 5) Even if the kmsg zone is not empty: when we get
* the data back from the pager, it will be (very
* stale) non-zero data. kmem_alloc is defined to
* return zero-filled memory.
*
* We're intentionally not activating the pages we allocate
* to prevent a race with page-out. vm_map_pageable will wire
* the pages.
*/
vm_object_lock(kernel_object);
for (i = 0 ; i < size; i+= PAGE_SIZE) {
vm_page_t mem;
while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) {
vm_object_unlock(kernel_object);
VM_WAIT;
vm_object_lock(kernel_object);
}
vm_page_zero_fill(mem);
mem->flags &= ~PG_BUSY;
}
vm_object_unlock(kernel_object);
/*
* And finally, mark the data as non-pageable.
*/
(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
/*
* Try to coalesce the map
*/
vm_map_simplify(map, addr);
return(addr);
}
/*
* kmem_free:
*
* Release a region of kernel virtual memory allocated
* with kmem_alloc, and return the physical pages
* associated with that region.
*/
void
kmem_free(map, addr, size)
vm_map_t map;
register vm_offset_t addr;
vm_size_t size;
{
(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
}
/*
* kmem_suballoc:
*
* Allocates a map to manage a subrange
* of the kernel virtual address space.
*
* Arguments are as follows:
*
* parent Map to take range from
* size Size of range to find
* min, max Returned endpoints of map
* pageable Can the region be paged
*/
vm_map_t
kmem_suballoc(parent, min, max, size, pageable)
register vm_map_t parent;
vm_offset_t *min, *max;
register vm_size_t size;
boolean_t pageable;
{
register int ret;
vm_map_t result;
size = round_page(size);
*min = (vm_offset_t) vm_map_min(parent);
ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
min, size, TRUE);
if (ret != KERN_SUCCESS) {
printf("kmem_suballoc: bad status return of %d.\n", ret);
panic("kmem_suballoc");
}
*max = *min + size;
pmap_reference(vm_map_pmap(parent));
result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
if (result == NULL)
panic("kmem_suballoc: cannot create submap");
if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
panic("kmem_suballoc: unable to change range to submap");
return(result);
}
/*
* Allocate wired-down memory in the kernel's address map for the higher
* level kernel memory allocator (kern/kern_malloc.c). We cannot use
* kmem_alloc() because we may need to allocate memory at interrupt
* level where we cannot block (canwait == FALSE).
*
* This routine has its own private kernel submap (kmem_map) and object
* (kmem_object). This, combined with the fact that only malloc uses
* this routine, ensures that we will never block in map or object waits.
*
* Note that this still only works in a uni-processor environment and
* when called at splhigh().
*
* We don't worry about expanding the map (adding entries) since entries
* for wired maps are statically allocated.
*/
vm_offset_t
kmem_malloc(map, size, canwait)
register vm_map_t map;
register vm_size_t size;
boolean_t canwait;
{
register vm_offset_t offset, i;
vm_map_entry_t entry;
vm_offset_t addr;
vm_page_t m;
extern vm_object_t kmem_object;
if (map != kmem_map && map != mb_map)
panic("kern_malloc_alloc: map != {kmem,mb}_map");
size = round_page(size);
addr = vm_map_min(map);
/*
* Locate sufficient space in the map. This will give us the
* final virtual address for the new memory, and thus will tell
* us the offset within the kernel map.
*/
vm_map_lock(map);
if (vm_map_findspace(map, 0, size, &addr)) {
vm_map_unlock(map);
if (canwait) /* XXX should wait */
panic("kmem_malloc: %s too small",
map == kmem_map ? "kmem_map" : "mb_map");
return (0);
}
offset = addr - vm_map_min(kmem_map);
vm_object_reference(kmem_object);
vm_map_insert(map, kmem_object, offset, addr, addr + size);
/*
* If we can wait, just mark the range as wired
* (will fault pages as necessary).
*/
if (canwait) {
vm_map_unlock(map);
(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size,
FALSE);
vm_map_simplify(map, addr);
return(addr);
}
/*
* If we cannot wait then we must allocate all memory up front,
* pulling it off the active queue to prevent pageout.
*/
vm_object_lock(kmem_object);
for (i = 0; i < size; i += PAGE_SIZE) {
m = vm_page_alloc(kmem_object, offset + i);
/*
* Ran out of space, free everything up and return.
* Don't need to lock page queues here as we know
* that the pages we got aren't on any queues.
*/
if (m == NULL) {
while (i != 0) {
i -= PAGE_SIZE;
m = vm_page_lookup(kmem_object, offset + i);
vm_page_free(m);
}
vm_object_unlock(kmem_object);
vm_map_delete(map, addr, addr + size);
vm_map_unlock(map);
return(0);
}
#if 0
vm_page_zero_fill(m);
#endif
m->flags &= ~PG_BUSY;
}
vm_object_unlock(kmem_object);
/*
* Mark map entry as non-pageable.
* Assert: vm_map_insert() will never be able to extend the previous
* entry so there will be a new entry exactly corresponding to this
* address range and it will have wired_count == 0.
*/
if (!vm_map_lookup_entry(map, addr, &entry) ||
entry->start != addr || entry->end != addr + size ||
entry->wired_count)
panic("kmem_malloc: entry not found or misaligned");
entry->wired_count++;
/*
* Loop thru pages, entering them in the pmap.
* (We cannot add them to the wired count without
* wrapping the vm_page_queue_lock in splimp...)
*/
for (i = 0; i < size; i += PAGE_SIZE) {
vm_object_lock(kmem_object);
m = vm_page_lookup(kmem_object, offset + i);
vm_object_unlock(kmem_object);
pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m),
VM_PROT_DEFAULT, TRUE);
}
vm_map_unlock(map);
vm_map_simplify(map, addr);
return(addr);
}
/*
* kmem_alloc_wait
*
* Allocates pageable memory from a sub-map of the kernel. If the submap
* has no room, the caller sleeps waiting for more memory in the submap.
*
*/
vm_offset_t
kmem_alloc_wait(map, size)
vm_map_t map;
vm_size_t size;
{
vm_offset_t addr;
size = round_page(size);
for (;;) {
/*
* To make this work for more than one map,
* use the map's lock to lock out sleepers/wakers.
*/
vm_map_lock(map);
if (vm_map_findspace(map, 0, size, &addr) == 0)
break;
/* no space now; see if we can ever get space */
if (vm_map_max(map) - vm_map_min(map) < size) {
vm_map_unlock(map);
return (0);
}
assert_wait(map, TRUE);
vm_map_unlock(map);
thread_block();
}
vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size);
vm_map_unlock(map);
return (addr);
}
/*
* kmem_free_wakeup
*
* Returns memory to a submap of the kernel, and wakes up any threads
* waiting for memory in that map.
*/
void
kmem_free_wakeup(map, addr, size)
vm_map_t map;
vm_offset_t addr;
vm_size_t size;
{
vm_map_lock(map);
(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
thread_wakeup(map);
vm_map_unlock(map);
}
/*
* Create the kernel map; insert a mapping covering kernel text, data, bss,
* and all space allocated thus far (`boostrap' data). The new map will thus
* map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
* the range between `start' and `end' as free.
*/
void
kmem_init(start, end)
vm_offset_t start, end;
{
register vm_map_t m;
m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE);
vm_map_lock(m);
/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
kernel_map = m;
(void) vm_map_insert(m, NULL, (vm_offset_t)0,
VM_MIN_KERNEL_ADDRESS, start);
/* ... and ending with the completion of the above `insert' */
vm_map_unlock(m);
}

2648
sys/vm/vm_map.c Normal file

File diff suppressed because it is too large Load Diff

256
sys/vm/vm_map.h Normal file
View File

@ -0,0 +1,256 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_map.h 8.9 (Berkeley) 5/17/95
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Virtual memory map module definitions.
*/
#ifndef _VM_MAP_
#define _VM_MAP_
/*
* Types defined:
*
* vm_map_t the high-level address map data structure.
* vm_map_entry_t an entry in an address map.
* vm_map_version_t a timestamp of a map, for use with vm_map_lookup
*/
/*
* Objects which live in maps may be either VM objects, or
* another map (called a "sharing map") which denotes read-write
* sharing with other maps.
*/
union vm_map_object {
struct vm_object *vm_object; /* object object */
struct vm_map *share_map; /* share map */
struct vm_map *sub_map; /* belongs to another map */
};
/*
* Address map entries consist of start and end addresses,
* a VM object (or sharing map) and offset into that object,
* and user-exported inheritance and protection information.
* Also included is control information for virtual copy operations.
*/
struct vm_map_entry {
struct vm_map_entry *prev; /* previous entry */
struct vm_map_entry *next; /* next entry */
vm_offset_t start; /* start address */
vm_offset_t end; /* end address */
union vm_map_object object; /* object I point to */
vm_offset_t offset; /* offset into object */
boolean_t is_a_map; /* Is "object" a map? */
boolean_t is_sub_map; /* Is "object" a submap? */
/* Only in sharing maps: */
boolean_t copy_on_write; /* is data copy-on-write */
boolean_t needs_copy; /* does object need to be copied */
/* Only in task maps: */
vm_prot_t protection; /* protection code */
vm_prot_t max_protection; /* maximum protection */
vm_inherit_t inheritance; /* inheritance */
int wired_count; /* can be paged if = 0 */
};
/*
* Maps are doubly-linked lists of map entries, kept sorted
* by address. A single hint is provided to start
* searches again from the last successful search,
* insertion, or removal.
*/
struct vm_map {
struct pmap * pmap; /* Physical map */
lock_data_t lock; /* Lock for map data */
struct vm_map_entry header; /* List of entries */
int nentries; /* Number of entries */
vm_size_t size; /* virtual size */
boolean_t is_main_map; /* Am I a main map? */
int ref_count; /* Reference count */
simple_lock_data_t ref_lock; /* Lock for ref_count field */
vm_map_entry_t hint; /* hint for quick lookups */
simple_lock_data_t hint_lock; /* lock for hint storage */
vm_map_entry_t first_free; /* First free space hint */
boolean_t entries_pageable; /* map entries pageable?? */
unsigned int timestamp; /* Version number */
#define min_offset header.start
#define max_offset header.end
};
/*
* Map versions are used to validate a previous lookup attempt.
*
* Since lookup operations may involve both a main map and
* a sharing map, it is necessary to have a timestamp from each.
* [If the main map timestamp has changed, the share_map and
* associated timestamp are no longer valid; the map version
* does not include a reference for the imbedded share_map.]
*/
typedef struct {
int main_timestamp;
vm_map_t share_map;
int share_timestamp;
} vm_map_version_t;
/*
* Macros: vm_map_lock, etc.
* Function:
* Perform locking on the data portion of a map.
*/
#include <sys/proc.h> /* XXX for curproc and p_pid */
#define vm_map_lock_drain_interlock(map) { \
lockmgr(&(map)->lock, LK_DRAIN|LK_INTERLOCK, \
&(map)->ref_lock, curproc); \
(map)->timestamp++; \
}
#ifdef DIAGNOSTIC
#define vm_map_lock(map) { \
if (lockmgr(&(map)->lock, LK_EXCLUSIVE, (void *)0, curproc) != 0) { \
panic("vm_map_lock: failed to get lock"); \
} \
(map)->timestamp++; \
}
#else
#define vm_map_lock(map) { \
lockmgr(&(map)->lock, LK_EXCLUSIVE, (void *)0, curproc); \
(map)->timestamp++; \
}
#endif /* DIAGNOSTIC */
#define vm_map_unlock(map) \
lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc)
#define vm_map_lock_read(map) \
lockmgr(&(map)->lock, LK_SHARED, (void *)0, curproc)
#define vm_map_unlock_read(map) \
lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc)
#define vm_map_set_recursive(map) { \
simple_lock(&(map)->lk_interlock); \
(map)->lk_flags |= LK_CANRECURSE; \
simple_unlock(&(map)->lk_interlock); \
}
#define vm_map_clear_recursive(map) { \
simple_lock(&(map)->lk_interlock); \
(map)->lk_flags &= ~LK_CANRECURSE; \
simple_unlock(&(map)->lk_interlock); \
}
/*
* Functions implemented as macros
*/
#define vm_map_min(map) ((map)->min_offset)
#define vm_map_max(map) ((map)->max_offset)
#define vm_map_pmap(map) ((map)->pmap)
/* XXX: number of kernel maps and entries to statically allocate */
#define MAX_KMAP 10
#define MAX_KMAPENT 500
#ifdef KERNEL
boolean_t vm_map_check_protection __P((vm_map_t,
vm_offset_t, vm_offset_t, vm_prot_t));
int vm_map_copy __P((vm_map_t, vm_map_t, vm_offset_t,
vm_size_t, vm_offset_t, boolean_t, boolean_t));
void vm_map_copy_entry __P((vm_map_t,
vm_map_t, vm_map_entry_t, vm_map_entry_t));
struct pmap;
vm_map_t vm_map_create __P((struct pmap *,
vm_offset_t, vm_offset_t, boolean_t));
void vm_map_deallocate __P((vm_map_t));
int vm_map_delete __P((vm_map_t, vm_offset_t, vm_offset_t));
vm_map_entry_t vm_map_entry_create __P((vm_map_t));
void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
int vm_map_find __P((vm_map_t, vm_object_t,
vm_offset_t, vm_offset_t *, vm_size_t, boolean_t));
int vm_map_findspace __P((vm_map_t,
vm_offset_t, vm_size_t, vm_offset_t *));
int vm_map_inherit __P((vm_map_t,
vm_offset_t, vm_offset_t, vm_inherit_t));
void vm_map_init __P((struct vm_map *,
vm_offset_t, vm_offset_t, boolean_t));
int vm_map_insert __P((vm_map_t,
vm_object_t, vm_offset_t, vm_offset_t, vm_offset_t));
int vm_map_lookup __P((vm_map_t *, vm_offset_t, vm_prot_t,
vm_map_entry_t *, vm_object_t *, vm_offset_t *, vm_prot_t *,
boolean_t *, boolean_t *));
void vm_map_lookup_done __P((vm_map_t, vm_map_entry_t));
boolean_t vm_map_lookup_entry __P((vm_map_t,
vm_offset_t, vm_map_entry_t *));
int vm_map_pageable __P((vm_map_t,
vm_offset_t, vm_offset_t, boolean_t));
int vm_map_clean __P((vm_map_t,
vm_offset_t, vm_offset_t, boolean_t, boolean_t));
void vm_map_print __P((vm_map_t, boolean_t));
int vm_map_protect __P((vm_map_t,
vm_offset_t, vm_offset_t, vm_prot_t, boolean_t));
void vm_map_reference __P((vm_map_t));
int vm_map_remove __P((vm_map_t, vm_offset_t, vm_offset_t));
void vm_map_simplify __P((vm_map_t, vm_offset_t));
void vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t));
void vm_map_startup __P((void));
int vm_map_submap __P((vm_map_t,
vm_offset_t, vm_offset_t, vm_map_t));
#endif
#endif /* _VM_MAP_ */

235
sys/vm/vm_meter.c Normal file
View File

@ -0,0 +1,235 @@
/*
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_meter.c 8.7 (Berkeley) 5/10/95
*/
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <vm/vm.h>
#include <sys/sysctl.h>
struct loadavg averunnable; /* load average, of runnable procs */
int maxslp = MAXSLP;
int saferss = SAFERSS;
void
vmmeter()
{
if (time.tv_sec % 5 == 0)
loadav(&averunnable);
if (proc0.p_slptime > maxslp/2)
wakeup((caddr_t)&proc0);
}
/*
* Constants for averages over 1, 5, and 15 minutes
* when sampling at 5 second intervals.
*/
fixpt_t cexp[3] = {
0.9200444146293232 * FSCALE, /* exp(-1/12) */
0.9834714538216174 * FSCALE, /* exp(-1/60) */
0.9944598480048967 * FSCALE, /* exp(-1/180) */
};
/*
* Compute a tenex style load average of a quantity on
* 1, 5 and 15 minute intervals.
*/
void
loadav(avg)
register struct loadavg *avg;
{
register int i, nrun;
register struct proc *p;
for (nrun = 0, p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
switch (p->p_stat) {
case SSLEEP:
if (p->p_priority > PZERO || p->p_slptime != 0)
continue;
/* fall through */
case SRUN:
case SIDL:
nrun++;
}
}
for (i = 0; i < 3; i++)
avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
}
/*
* Attributes associated with virtual memory.
*/
vm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
int *name;
u_int namelen;
void *oldp;
size_t *oldlenp;
void *newp;
size_t newlen;
struct proc *p;
{
struct vmtotal vmtotals;
/* all sysctl names at this level are terminal */
if (namelen != 1)
return (ENOTDIR); /* overloaded */
switch (name[0]) {
case VM_LOADAVG:
averunnable.fscale = FSCALE;
return (sysctl_rdstruct(oldp, oldlenp, newp, &averunnable,
sizeof(averunnable)));
case VM_METER:
vmtotal(&vmtotals);
return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
sizeof(vmtotals)));
default:
return (EOPNOTSUPP);
}
/* NOTREACHED */
}
/*
* Calculate the current state of the system.
* Done on demand from getkerninfo().
*/
void
vmtotal(totalp)
register struct vmtotal *totalp;
{
register struct proc *p;
register vm_map_entry_t entry;
register vm_object_t object;
register vm_map_t map;
int paging;
bzero(totalp, sizeof *totalp);
/*
* Mark all objects as inactive.
*/
simple_lock(&vm_object_list_lock);
for (object = vm_object_list.tqh_first;
object != NULL;
object = object->object_list.tqe_next)
object->flags &= ~OBJ_ACTIVE;
simple_unlock(&vm_object_list_lock);
/*
* Calculate process statistics.
*/
for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
if (p->p_flag & P_SYSTEM)
continue;
switch (p->p_stat) {
case 0:
continue;
case SSLEEP:
case SSTOP:
if (p->p_flag & P_INMEM) {
if (p->p_priority <= PZERO)
totalp->t_dw++;
else if (p->p_slptime < maxslp)
totalp->t_sl++;
} else if (p->p_slptime < maxslp)
totalp->t_sw++;
if (p->p_slptime >= maxslp)
continue;
break;
case SRUN:
case SIDL:
if (p->p_flag & P_INMEM)
totalp->t_rq++;
else
totalp->t_sw++;
if (p->p_stat == SIDL)
continue;
break;
}
/*
* Note active objects.
*
* XXX don't count shadow objects with no resident pages.
* This eliminates the forced shadows caused by MAP_PRIVATE.
* Right now we require that such an object completely shadow
* the original, to catch just those cases.
*/
paging = 0;
for (map = &p->p_vmspace->vm_map, entry = map->header.next;
entry != &map->header; entry = entry->next) {
if (entry->is_a_map || entry->is_sub_map ||
(object = entry->object.vm_object) == NULL)
continue;
while (object->shadow &&
object->resident_page_count == 0 &&
object->shadow_offset == 0 &&
object->size == object->shadow->size)
object = object->shadow;
object->flags |= OBJ_ACTIVE;
paging |= object->paging_in_progress;
}
if (paging)
totalp->t_pw++;
}
/*
* Calculate object memory usage statistics.
*/
simple_lock(&vm_object_list_lock);
for (object = vm_object_list.tqh_first;
object != NULL;
object = object->object_list.tqe_next) {
totalp->t_vm += num_pages(object->size);
totalp->t_rm += object->resident_page_count;
if (object->flags & OBJ_ACTIVE) {
totalp->t_avm += num_pages(object->size);
totalp->t_arm += object->resident_page_count;
}
if (object->ref_count > 1) {
/* shared object */
totalp->t_vmshr += num_pages(object->size);
totalp->t_rmshr += object->resident_page_count;
if (object->flags & OBJ_ACTIVE) {
totalp->t_avmshr += num_pages(object->size);
totalp->t_armshr += object->resident_page_count;
}
}
}
simple_unlock(&vm_object_list_lock);
totalp->t_free = cnt.v_free_count;
}

844
sys/vm/vm_mmap.c Normal file
View File

@ -0,0 +1,844 @@
/*
* Copyright (c) 1988 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
*
* @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
*/
/*
* Mapped file (mmap) interface to VM
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/resourcevar.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/mman.h>
#include <sys/conf.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <miscfs/specfs/specdev.h>
#include <vm/vm.h>
#include <vm/vm_pager.h>
#include <vm/vm_prot.h>
#ifdef DEBUG
int mmapdebug = 0;
#define MDB_FOLLOW 0x01
#define MDB_SYNC 0x02
#define MDB_MAPIT 0x04
#endif
/* ARGSUSED */
int
sbrk(p, uap, retval)
struct proc *p;
struct sbrk_args /* {
syscallarg(int) incr;
} */ *uap;
register_t *retval;
{
/* Not yet implemented */
return (EOPNOTSUPP);
}
/* ARGSUSED */
int
sstk(p, uap, retval)
struct proc *p;
struct sstk_args /* {
syscallarg(int) incr;
} */ *uap;
register_t *retval;
{
/* Not yet implemented */
return (EOPNOTSUPP);
}
#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
/* ARGSUSED */
int
compat_43_getpagesize(p, uap, retval)
struct proc *p;
void *uap;
register_t *retval;
{
*retval = PAGE_SIZE;
return (0);
}
#endif /* COMPAT_43 || COMPAT_SUNOS */
#ifdef COMPAT_43
int
compat_43_mmap(p, uap, retval)
struct proc *p;
register struct compat_43_mmap_args /* {
syscallarg(caddr_t) addr;
syscallarg(int) len;
syscallarg(int) prot;
syscallarg(int) flags;
syscallarg(int) fd;
syscallarg(long) pos;
} */ *uap;
register_t *retval;
{
struct mmap_args /* {
syscallarg(caddr_t) addr;
syscallarg(size_t) len;
syscallarg(int) prot;
syscallarg(int) flags;
syscallarg(int) fd;
syscallarg(long) pad;
syscallarg(off_t) pos;
} */ nargs;
static const char cvtbsdprot[8] = {
0,
PROT_EXEC,
PROT_WRITE,
PROT_EXEC|PROT_WRITE,
PROT_READ,
PROT_EXEC|PROT_READ,
PROT_WRITE|PROT_READ,
PROT_EXEC|PROT_WRITE|PROT_READ,
};
#define OMAP_ANON 0x0002
#define OMAP_COPY 0x0020
#define OMAP_SHARED 0x0010
#define OMAP_FIXED 0x0100
#define OMAP_INHERIT 0x0800
SCARG(&nargs, addr) = SCARG(uap, addr);
SCARG(&nargs, len) = SCARG(uap, len);
SCARG(&nargs, prot) = cvtbsdprot[SCARG(uap, prot)&0x7];
SCARG(&nargs, flags) = 0;
if (SCARG(uap, flags) & OMAP_ANON)
SCARG(&nargs, flags) |= MAP_ANON;
if (SCARG(uap, flags) & OMAP_COPY)
SCARG(&nargs, flags) |= MAP_COPY;
if (SCARG(uap, flags) & OMAP_SHARED)
SCARG(&nargs, flags) |= MAP_SHARED;
else
SCARG(&nargs, flags) |= MAP_PRIVATE;
if (SCARG(uap, flags) & OMAP_FIXED)
SCARG(&nargs, flags) |= MAP_FIXED;
if (SCARG(uap, flags) & OMAP_INHERIT)
SCARG(&nargs, flags) |= MAP_INHERIT;
SCARG(&nargs, fd) = SCARG(uap, fd);
SCARG(&nargs, pos) = SCARG(uap, pos);
return (mmap(p, &nargs, retval));
}
#endif
int
mmap(p, uap, retval)
struct proc *p;
register struct mmap_args /* {
syscallarg(caddr_t) addr;
syscallarg(size_t) len;
syscallarg(int) prot;
syscallarg(int) flags;
syscallarg(int) fd;
syscallarg(long) pad;
syscallarg(off_t) pos;
} */ *uap;
register_t *retval;
{
register struct filedesc *fdp = p->p_fd;
register struct file *fp;
struct vnode *vp;
vm_offset_t addr, pos;
vm_size_t size;
vm_prot_t prot, maxprot;
caddr_t handle;
int flags, error;
prot = SCARG(uap, prot) & VM_PROT_ALL;
flags = SCARG(uap, flags);
pos = SCARG(uap, pos);
#ifdef DEBUG
if (mmapdebug & MDB_FOLLOW)
printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
p->p_pid, SCARG(uap, addr), SCARG(uap, len), prot,
flags, SCARG(uap, fd), pos);
#endif
/*
* Address (if FIXED) must be page aligned.
* Size is implicitly rounded to a page boundary.
*
* XXX most (all?) vendors require that the file offset be
* page aligned as well. However, we already have applications
* (e.g. nlist) that rely on unrestricted alignment. Since we
* support it, let it happen.
*/
addr = (vm_offset_t) SCARG(uap, addr);
if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
#if 0
((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) ||
#endif
(ssize_t)SCARG(uap, len) < 0 || ((flags & MAP_ANON) && SCARG(uap, fd) != -1))
return (EINVAL);
size = (vm_size_t) round_page(SCARG(uap, len));
/*
* Check for illegal addresses. Watch out for address wrap...
* Note that VM_*_ADDRESS are not constants due to casts (argh).
*/
if (flags & MAP_FIXED) {
if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
return (EINVAL);
if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
return (EINVAL);
if (addr > addr + size)
return (EINVAL);
}
/*
* XXX for non-fixed mappings where no hint is provided or
* the hint would fall in the potential heap space,
* place it after the end of the largest possible heap.
*
* There should really be a pmap call to determine a reasonable
* location.
*/
else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
if (flags & MAP_ANON) {
/*
* Mapping blank space is trivial.
*/
handle = NULL;
maxprot = VM_PROT_ALL;
pos = 0;
} else {
/*
* Mapping file, get fp for validation.
* Obtain vnode and make sure it is of appropriate type.
*/
if (((unsigned)SCARG(uap, fd)) >= fdp->fd_nfiles ||
(fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
return (EBADF);
if (fp->f_type != DTYPE_VNODE)
return (EINVAL);
vp = (struct vnode *)fp->f_data;
if (vp->v_type != VREG && vp->v_type != VCHR)
return (EINVAL);
/*
* XXX hack to handle use of /dev/zero to map anon
* memory (ala SunOS).
*/
if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
handle = NULL;
maxprot = VM_PROT_ALL;
flags |= MAP_ANON;
} else {
/*
* Ensure that file and memory protections are
* compatible. Note that we only worry about
* writability if mapping is shared; in this case,
* current and max prot are dictated by the open file.
* XXX use the vnode instead? Problem is: what
* credentials do we use for determination?
* What if proc does a setuid?
*/
maxprot = VM_PROT_EXECUTE; /* ??? */
if (fp->f_flag & FREAD)
maxprot |= VM_PROT_READ;
else if (prot & PROT_READ)
return (EACCES);
if (flags & MAP_SHARED) {
if (fp->f_flag & FWRITE)
maxprot |= VM_PROT_WRITE;
else if (prot & PROT_WRITE)
return (EACCES);
} else
maxprot |= VM_PROT_WRITE;
handle = (caddr_t)vp;
}
}
error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
flags, handle, pos);
if (error == 0)
*retval = (register_t)addr;
return (error);
}
int
msync(p, uap, retval)
struct proc *p;
struct msync_args /* {
syscallarg(caddr_t) addr;
syscallarg(int) len;
} */ *uap;
register_t *retval;
{
vm_offset_t addr;
vm_size_t size;
vm_map_t map;
int rv;
boolean_t syncio, invalidate;
#ifdef DEBUG
if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
printf("msync(%d): addr %x len %x\n",
p->p_pid, SCARG(uap, addr), SCARG(uap, len));
#endif
if (((vm_offset_t)SCARG(uap, addr) & PAGE_MASK) ||
SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
return (EINVAL);
map = &p->p_vmspace->vm_map;
addr = (vm_offset_t)SCARG(uap, addr);
size = (vm_size_t)SCARG(uap, len);
/*
* XXX Gak! If size is zero we are supposed to sync "all modified
* pages with the region containing addr". Unfortunately, we
* don't really keep track of individual mmaps so we approximate
* by flushing the range of the map entry containing addr.
* This can be incorrect if the region splits or is coalesced
* with a neighbor.
*/
if (size == 0) {
vm_map_entry_t entry;
vm_map_lock_read(map);
rv = vm_map_lookup_entry(map, addr, &entry);
vm_map_unlock_read(map);
if (!rv)
return (EINVAL);
addr = entry->start;
size = entry->end - entry->start;
}
#ifdef DEBUG
if (mmapdebug & MDB_SYNC)
printf("msync: cleaning/flushing address range [%x-%x)\n",
addr, addr+size);
#endif
/*
* Could pass this in as a third flag argument to implement
* Sun's MS_ASYNC.
*/
syncio = TRUE;
/*
* XXX bummer, gotta flush all cached pages to ensure
* consistency with the file system cache. Otherwise, we could
* pass this in to implement Sun's MS_INVALIDATE.
*/
invalidate = TRUE;
/*
* Clean the pages and interpret the return value.
*/
rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
switch (rv) {
case KERN_SUCCESS:
break;
case KERN_INVALID_ADDRESS:
return (EINVAL); /* Sun returns ENOMEM? */
case KERN_FAILURE:
return (EIO);
default:
return (EINVAL);
}
return (0);
}
int
munmap(p, uap, retval)
register struct proc *p;
register struct munmap_args /* {
syscallarg(caddr_t) addr;
syscallarg(int) len;
} */ *uap;
register_t *retval;
{
vm_offset_t addr;
vm_size_t size;
vm_map_t map;
#ifdef DEBUG
if (mmapdebug & MDB_FOLLOW)
printf("munmap(%d): addr %x len %x\n",
p->p_pid, SCARG(uap, addr), SCARG(uap, len));
#endif
addr = (vm_offset_t) SCARG(uap, addr);
if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
return(EINVAL);
size = (vm_size_t) round_page(SCARG(uap, len));
if (size == 0)
return(0);
/*
* Check for illegal addresses. Watch out for address wrap...
* Note that VM_*_ADDRESS are not constants due to casts (argh).
*/
if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
return (EINVAL);
if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
return (EINVAL);
if (addr > addr + size)
return (EINVAL);
map = &p->p_vmspace->vm_map;
/*
* Make sure entire range is allocated.
* XXX this seemed overly restrictive, so we relaxed it.
*/
#if 0
if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
return(EINVAL);
#endif
/* returns nothing but KERN_SUCCESS anyway */
(void) vm_map_remove(map, addr, addr+size);
return(0);
}
void
munmapfd(p, fd)
struct proc *p;
int fd;
{
#ifdef DEBUG
if (mmapdebug & MDB_FOLLOW)
printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
#endif
/*
* XXX should vm_deallocate any regions mapped to this file
*/
p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
}
int
mprotect(p, uap, retval)
struct proc *p;
struct mprotect_args /* {
syscallarg(caddr_t) addr;
syscallarg(int) len;
syscallarg(int) prot;
} */ *uap;
register_t *retval;
{
vm_offset_t addr;
vm_size_t size;
register vm_prot_t prot;
#ifdef DEBUG
if (mmapdebug & MDB_FOLLOW)
printf("mprotect(%d): addr %x len %x prot %d\n",
p->p_pid, SCARG(uap, addr), SCARG(uap, len), SCARG(uap, prot));
#endif
addr = (vm_offset_t)SCARG(uap, addr);
if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
return(EINVAL);
size = (vm_size_t)SCARG(uap, len);
prot = SCARG(uap, prot) & VM_PROT_ALL;
switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
FALSE)) {
case KERN_SUCCESS:
return (0);
case KERN_PROTECTION_FAILURE:
return (EACCES);
}
return (EINVAL);
}
/* ARGSUSED */
int
madvise(p, uap, retval)
struct proc *p;
struct madvise_args /* {
syscallarg(caddr_t) addr;
syscallarg(int) len;
syscallarg(int) behav;
} */ *uap;
register_t *retval;
{
/* Not yet implemented */
return (EOPNOTSUPP);
}
/* ARGSUSED */
int
mincore(p, uap, retval)
struct proc *p;
struct mincore_args /* {
syscallarg(caddr_t) addr;
syscallarg(int) len;
syscallarg(char *) vec;
} */ *uap;
register_t *retval;
{
/* Not yet implemented */
return (EOPNOTSUPP);
}
int
mlock(p, uap, retval)
struct proc *p;
struct mlock_args /* {
syscallarg(caddr_t) addr;
syscallarg(size_t) len;
} */ *uap;
register_t *retval;
{
vm_offset_t addr;
vm_size_t size;
int error;
extern int vm_page_max_wired;
#ifdef DEBUG
if (mmapdebug & MDB_FOLLOW)
printf("mlock(%d): addr %x len %x\n",
p->p_pid, SCARG(uap, addr), SCARG(uap, len));
#endif
addr = (vm_offset_t)SCARG(uap, addr);
if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
return (EINVAL);
size = round_page((vm_size_t)SCARG(uap, len));
if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
return (EAGAIN);
#ifdef pmap_wired_count
if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
return (EAGAIN);
#else
if (error = suser(p->p_ucred, &p->p_acflag))
return (error);
#endif
error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
return (error == KERN_SUCCESS ? 0 : ENOMEM);
}
int
munlock(p, uap, retval)
struct proc *p;
struct munlock_args /* {
syscallarg(caddr_t) addr;
syscallarg(size_t) len;
} */ *uap;
register_t *retval;
{
vm_offset_t addr;
vm_size_t size;
int error;
#ifdef DEBUG
if (mmapdebug & MDB_FOLLOW)
printf("munlock(%d): addr %x len %x\n",
p->p_pid, SCARG(uap, addr), SCARG(uap, len));
#endif
addr = (vm_offset_t)SCARG(uap, addr);
if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
return (EINVAL);
#ifndef pmap_wired_count
if (error = suser(p->p_ucred, &p->p_acflag))
return (error);
#endif
size = round_page((vm_size_t)SCARG(uap, len));
error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
return (error == KERN_SUCCESS ? 0 : ENOMEM);
}
/*
* Internal version of mmap.
* Currently used by mmap, exec, and sys5 shared memory.
* Handle is either a vnode pointer or NULL for MAP_ANON.
*/
int
vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
register vm_map_t map;
register vm_offset_t *addr;
register vm_size_t size;
vm_prot_t prot, maxprot;
register int flags;
caddr_t handle; /* XXX should be vp */
vm_offset_t foff;
{
register vm_pager_t pager;
boolean_t fitit;
vm_object_t object;
struct vnode *vp = NULL;
int type;
int rv = KERN_SUCCESS;
if (size == 0)
return (0);
if ((flags & MAP_FIXED) == 0) {
fitit = TRUE;
*addr = round_page(*addr);
} else {
fitit = FALSE;
(void)vm_deallocate(map, *addr, size);
}
/*
* Lookup/allocate pager. All except an unnamed anonymous lookup
* gain a reference to ensure continued existance of the object.
* (XXX the exception is to appease the pageout daemon)
*/
if (flags & MAP_ANON)
type = PG_DFLT;
else {
vp = (struct vnode *)handle;
if (vp->v_type == VCHR) {
type = PG_DEVICE;
handle = (caddr_t)vp->v_rdev;
} else
type = PG_VNODE;
}
pager = vm_pager_allocate(type, handle, size, prot, foff);
if (pager == NULL)
return (type == PG_DEVICE ? EINVAL : ENOMEM);
/*
* Find object and release extra reference gained by lookup
*/
object = vm_object_lookup(pager);
vm_object_deallocate(object);
/*
* Anonymous memory.
*/
if (flags & MAP_ANON) {
rv = vm_allocate_with_pager(map, addr, size, fitit,
pager, foff, TRUE);
if (rv != KERN_SUCCESS) {
if (handle == NULL)
vm_pager_deallocate(pager);
else
vm_object_deallocate(object);
goto out;
}
/*
* Don't cache anonymous objects.
* Loses the reference gained by vm_pager_allocate.
* Note that object will be NULL when handle == NULL,
* this is ok since vm_allocate_with_pager has made
* sure that these objects are uncached.
*/
(void) pager_cache(object, FALSE);
#ifdef DEBUG
if (mmapdebug & MDB_MAPIT)
printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
curproc->p_pid, *addr, size, pager);
#endif
}
/*
* Must be a mapped file.
* Distinguish between character special and regular files.
*/
else if (vp->v_type == VCHR) {
rv = vm_allocate_with_pager(map, addr, size, fitit,
pager, foff, FALSE);
/*
* Uncache the object and lose the reference gained
* by vm_pager_allocate(). If the call to
* vm_allocate_with_pager() was sucessful, then we
* gained an additional reference ensuring the object
* will continue to exist. If the call failed then
* the deallocate call below will terminate the
* object which is fine.
*/
(void) pager_cache(object, FALSE);
if (rv != KERN_SUCCESS)
goto out;
}
/*
* A regular file
*/
else {
#ifdef DEBUG
if (object == NULL)
printf("vm_mmap: no object: vp %x, pager %x\n",
vp, pager);
#endif
/*
* Map it directly.
* Allows modifications to go out to the vnode.
*/
if (flags & MAP_SHARED) {
rv = vm_allocate_with_pager(map, addr, size,
fitit, pager,
foff, FALSE);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
goto out;
}
/*
* Don't cache the object. This is the easiest way
* of ensuring that data gets back to the filesystem
* because vnode_pager_deallocate() will fsync the
* vnode. pager_cache() will lose the extra ref.
*/
if (prot & VM_PROT_WRITE)
pager_cache(object, FALSE);
else
vm_object_deallocate(object);
}
/*
* Copy-on-write of file. Two flavors.
* MAP_COPY is true COW, you essentially get a snapshot of
* the region at the time of mapping. MAP_PRIVATE means only
* that your changes are not reflected back to the object.
* Changes made by others will be seen.
*/
else {
vm_map_t tmap;
vm_offset_t off;
/* locate and allocate the target address space */
rv = vm_map_find(map, NULL, (vm_offset_t)0,
addr, size, fitit);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
goto out;
}
tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
VM_MIN_ADDRESS+size, TRUE);
off = VM_MIN_ADDRESS;
rv = vm_allocate_with_pager(tmap, &off, size,
TRUE, pager,
foff, FALSE);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
vm_map_deallocate(tmap);
goto out;
}
/*
* (XXX)
* MAP_PRIVATE implies that we see changes made by
* others. To ensure that we need to guarentee that
* no copy object is created (otherwise original
* pages would be pushed to the copy object and we
* would never see changes made by others). We
* totally sleeze it right now by marking the object
* internal temporarily.
*/
if ((flags & MAP_COPY) == 0)
object->flags |= OBJ_INTERNAL;
rv = vm_map_copy(map, tmap, *addr, size, off,
FALSE, FALSE);
object->flags &= ~OBJ_INTERNAL;
/*
* (XXX)
* My oh my, this only gets worse...
* Force creation of a shadow object so that
* vm_map_fork will do the right thing.
*/
if ((flags & MAP_COPY) == 0) {
vm_map_t tmap;
vm_map_entry_t tentry;
vm_object_t tobject;
vm_offset_t toffset;
vm_prot_t tprot;
boolean_t twired, tsu;
tmap = map;
vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
&tentry, &tobject, &toffset,
&tprot, &twired, &tsu);
vm_map_lookup_done(tmap, tentry);
}
/*
* (XXX)
* Map copy code cannot detect sharing unless a
* sharing map is involved. So we cheat and write
* protect everything ourselves.
*/
vm_object_pmap_copy(object, foff, foff + size);
vm_object_deallocate(object);
vm_map_deallocate(tmap);
if (rv != KERN_SUCCESS)
goto out;
}
#ifdef DEBUG
if (mmapdebug & MDB_MAPIT)
printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
curproc->p_pid, *addr, size, pager);
#endif
}
/*
* Correct protection (default is VM_PROT_ALL).
* If maxprot is different than prot, we must set both explicitly.
*/
rv = KERN_SUCCESS;
if (maxprot != VM_PROT_ALL)
rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
if (rv == KERN_SUCCESS && prot != maxprot)
rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
if (rv != KERN_SUCCESS) {
(void) vm_deallocate(map, *addr, size);
goto out;
}
/*
* Shared memory is also shared with children.
*/
if (flags & MAP_SHARED) {
rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
if (rv != KERN_SUCCESS) {
(void) vm_deallocate(map, *addr, size);
goto out;
}
}
out:
#ifdef DEBUG
if (mmapdebug & MDB_MAPIT)
printf("vm_mmap: rv %d\n", rv);
#endif
switch (rv) {
case KERN_SUCCESS:
return (0);
case KERN_INVALID_ADDRESS:
case KERN_NO_SPACE:
return (ENOMEM);
case KERN_PROTECTION_FAILURE:
return (EACCES);
default:
return (EINVAL);
}
}

1449
sys/vm/vm_object.c Normal file

File diff suppressed because it is too large Load Diff

173
sys/vm/vm_object.h Normal file
View File

@ -0,0 +1,173 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_object.h 8.4 (Berkeley) 1/9/95
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Virtual memory object module definitions.
*/
#ifndef _VM_OBJECT_
#define _VM_OBJECT_
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
/*
* Types defined:
*
* vm_object_t Virtual memory object.
*/
struct vm_object {
struct pglist memq; /* Resident memory */
TAILQ_ENTRY(vm_object) object_list; /* list of all objects */
u_short flags; /* see below */
u_short paging_in_progress; /* Paging (in or out) so
don't collapse or destroy */
simple_lock_data_t Lock; /* Synchronization */
int ref_count; /* How many refs?? */
vm_size_t size; /* Object size */
int resident_page_count;
/* number of resident pages */
struct vm_object *copy; /* Object that holds copies of
my changed pages */
vm_pager_t pager; /* Where to get data */
vm_offset_t paging_offset; /* Offset into paging space */
struct vm_object *shadow; /* My shadow */
vm_offset_t shadow_offset; /* Offset in shadow */
TAILQ_ENTRY(vm_object) cached_list; /* for persistence */
};
/*
* Flags
*/
#define OBJ_CANPERSIST 0x0001 /* allow to persist */
#define OBJ_INTERNAL 0x0002 /* internally created object */
#define OBJ_ACTIVE 0x0004 /* used to mark active objects */
TAILQ_HEAD(vm_object_hash_head, vm_object_hash_entry);
struct vm_object_hash_entry {
TAILQ_ENTRY(vm_object_hash_entry) hash_links; /* hash chain links */
vm_object_t object; /* object represented */
};
typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
#ifdef KERNEL
TAILQ_HEAD(object_q, vm_object);
struct object_q vm_object_cached_list; /* list of objects persisting */
int vm_object_cached; /* size of cached list */
simple_lock_data_t vm_cache_lock; /* lock for object cache */
struct object_q vm_object_list; /* list of allocated objects */
long vm_object_count; /* count of all objects */
simple_lock_data_t vm_object_list_lock;
/* lock for object list and count */
vm_object_t kernel_object; /* the single kernel object */
vm_object_t kmem_object;
#define vm_object_cache_lock() simple_lock(&vm_cache_lock)
#define vm_object_cache_unlock() simple_unlock(&vm_cache_lock)
#endif /* KERNEL */
#define vm_object_lock_init(object) simple_lock_init(&(object)->Lock)
#define vm_object_lock(object) simple_lock(&(object)->Lock)
#define vm_object_unlock(object) simple_unlock(&(object)->Lock)
#define vm_object_lock_try(object) simple_lock_try(&(object)->Lock)
#define vm_object_sleep(event, object, interruptible) \
thread_sleep((event), &(object)->Lock, (interruptible))
#ifdef KERNEL
vm_object_t vm_object_allocate __P((vm_size_t));
void vm_object_cache_clear __P((void));
void vm_object_cache_trim __P((void));
boolean_t vm_object_coalesce __P((vm_object_t, vm_object_t,
vm_offset_t, vm_offset_t, vm_offset_t, vm_size_t));
void vm_object_collapse __P((vm_object_t));
void vm_object_copy __P((vm_object_t, vm_offset_t, vm_size_t,
vm_object_t *, vm_offset_t *, boolean_t *));
void vm_object_deactivate_pages __P((vm_object_t));
void vm_object_deallocate __P((vm_object_t));
void vm_object_enter __P((vm_object_t, vm_pager_t));
void vm_object_init __P((vm_size_t));
vm_object_t vm_object_lookup __P((vm_pager_t));
boolean_t vm_object_page_clean __P((vm_object_t,
vm_offset_t, vm_offset_t, boolean_t, boolean_t));
void vm_object_page_remove __P((vm_object_t,
vm_offset_t, vm_offset_t));
void vm_object_pmap_copy __P((vm_object_t,
vm_offset_t, vm_offset_t));
void vm_object_pmap_remove __P((vm_object_t,
vm_offset_t, vm_offset_t));
void vm_object_print __P((vm_object_t, boolean_t));
void vm_object_reference __P((vm_object_t));
void vm_object_remove __P((vm_pager_t));
void vm_object_setpager __P((vm_object_t,
vm_pager_t, vm_offset_t, boolean_t));
void vm_object_shadow __P((vm_object_t *,
vm_offset_t *, vm_size_t));
void vm_object_terminate __P((vm_object_t));
#endif
#endif /* _VM_OBJECT_ */

710
sys/vm/vm_page.c Normal file
View File

@ -0,0 +1,710 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_page.c 8.4 (Berkeley) 1/9/95
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Resident memory management module.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/vm_pageout.h>
/*
* Associated with page of user-allocatable memory is a
* page structure.
*/
struct pglist *vm_page_buckets; /* Array of buckets */
int vm_page_bucket_count = 0; /* How big is array? */
int vm_page_hash_mask; /* Mask for hash function */
simple_lock_data_t bucket_lock; /* lock for all buckets XXX */
struct pglist vm_page_queue_free;
struct pglist vm_page_queue_active;
struct pglist vm_page_queue_inactive;
simple_lock_data_t vm_page_queue_lock;
simple_lock_data_t vm_page_queue_free_lock;
/* has physical page allocation been initialized? */
boolean_t vm_page_startup_initialized;
vm_page_t vm_page_array;
long first_page;
long last_page;
vm_offset_t first_phys_addr;
vm_offset_t last_phys_addr;
vm_size_t page_mask;
int page_shift;
/*
* vm_set_page_size:
*
* Sets the page size, perhaps based upon the memory
* size. Must be called before any use of page-size
* dependent functions.
*
* Sets page_shift and page_mask from cnt.v_page_size.
*/
void
vm_set_page_size()
{
if (cnt.v_page_size == 0)
cnt.v_page_size = DEFAULT_PAGE_SIZE;
page_mask = cnt.v_page_size - 1;
if ((page_mask & cnt.v_page_size) != 0)
panic("vm_set_page_size: page size not a power of two");
for (page_shift = 0; ; page_shift++)
if ((1 << page_shift) == cnt.v_page_size)
break;
}
/*
* vm_page_startup:
*
* Initializes the resident memory module.
*
* Allocates memory for the page cells, and
* for the object/offset-to-page hash table headers.
* Each page cell is initialized and placed on the free list.
*/
void
vm_page_startup(start, end)
vm_offset_t *start;
vm_offset_t *end;
{
register vm_page_t m;
register struct pglist *bucket;
vm_size_t npages;
int i;
vm_offset_t pa;
extern vm_offset_t kentry_data;
extern vm_size_t kentry_data_size;
/*
* Initialize the locks
*/
simple_lock_init(&vm_page_queue_free_lock);
simple_lock_init(&vm_page_queue_lock);
/*
* Initialize the queue headers for the free queue,
* the active queue and the inactive queue.
*/
TAILQ_INIT(&vm_page_queue_free);
TAILQ_INIT(&vm_page_queue_active);
TAILQ_INIT(&vm_page_queue_inactive);
/*
* Calculate the number of hash table buckets.
*
* The number of buckets MUST BE a power of 2, and
* the actual value is the next power of 2 greater
* than the number of physical pages in the system.
*
* Note:
* This computation can be tweaked if desired.
*/
if (vm_page_bucket_count == 0) {
vm_page_bucket_count = 1;
while (vm_page_bucket_count < atop(*end - *start))
vm_page_bucket_count <<= 1;
}
vm_page_hash_mask = vm_page_bucket_count - 1;
/*
* Allocate (and initialize) the hash table buckets.
*/
vm_page_buckets = (struct pglist *)
pmap_bootstrap_alloc(vm_page_bucket_count * sizeof(struct pglist));
bucket = vm_page_buckets;
for (i = vm_page_bucket_count; i--;) {
TAILQ_INIT(bucket);
bucket++;
}
simple_lock_init(&bucket_lock);
/*
* Truncate the remainder of physical memory to our page size.
*/
*end = trunc_page(*end);
/*
* Pre-allocate maps and map entries that cannot be dynamically
* allocated via malloc(). The maps include the kernel_map and
* kmem_map which must be initialized before malloc() will
* work (obviously). Also could include pager maps which would
* be allocated before kmeminit.
*
* Allow some kernel map entries... this should be plenty
* since people shouldn't be cluttering up the kernel
* map (they should use their own maps).
*/
kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
MAX_KMAPENT*sizeof(struct vm_map_entry));
kentry_data = (vm_offset_t) pmap_bootstrap_alloc(kentry_data_size);
/*
* Compute the number of pages of memory that will be
* available for use (taking into account the overhead
* of a page structure per page).
*/
cnt.v_free_count = npages = (*end - *start + sizeof(struct vm_page))
/ (PAGE_SIZE + sizeof(struct vm_page));
/*
* Record the extent of physical memory that the
* virtual memory system manages.
*/
first_page = *start;
first_page += npages*sizeof(struct vm_page);
first_page = atop(round_page(first_page));
last_page = first_page + npages - 1;
first_phys_addr = ptoa(first_page);
last_phys_addr = ptoa(last_page) + PAGE_MASK;
/*
* Allocate and clear the mem entry structures.
*/
m = vm_page_array = (vm_page_t)
pmap_bootstrap_alloc(npages * sizeof(struct vm_page));
/*
* Initialize the mem entry structures now, and
* put them in the free queue.
*/
pa = first_phys_addr;
while (npages--) {
m->flags = 0;
m->object = NULL;
m->phys_addr = pa;
#ifdef i386
if (pmap_isvalidphys(m->phys_addr)) {
TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
} else {
/* perhaps iomem needs it's own type, or dev pager? */
m->flags |= PG_FICTITIOUS | PG_BUSY;
cnt.v_free_count--;
}
#else /* i386 */
TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
#endif /* i386 */
m++;
pa += PAGE_SIZE;
}
/*
* Initialize vm_pages_needed lock here - don't wait for pageout
* daemon XXX
*/
simple_lock_init(&vm_pages_needed_lock);
/* from now on, pmap_bootstrap_alloc can't be used */
vm_page_startup_initialized = TRUE;
}
/*
* vm_page_hash:
*
* Distributes the object/offset key pair among hash buckets.
*
* NOTE: This macro depends on vm_page_bucket_count being a power of 2.
*/
#define vm_page_hash(object, offset) \
(((unsigned long)object+(unsigned long)atop(offset))&vm_page_hash_mask)
/*
* vm_page_insert: [ internal use only ]
*
* Inserts the given mem entry into the object/object-page
* table and object list.
*
* The object and page must be locked.
*/
void
vm_page_insert(mem, object, offset)
register vm_page_t mem;
register vm_object_t object;
register vm_offset_t offset;
{
register struct pglist *bucket;
int spl;
VM_PAGE_CHECK(mem);
if (mem->flags & PG_TABLED)
panic("vm_page_insert: already inserted");
/*
* Record the object/offset pair in this page
*/
mem->object = object;
mem->offset = offset;
/*
* Insert it into the object_object/offset hash table
*/
bucket = &vm_page_buckets[vm_page_hash(object, offset)];
spl = splimp();
simple_lock(&bucket_lock);
TAILQ_INSERT_TAIL(bucket, mem, hashq);
simple_unlock(&bucket_lock);
(void) splx(spl);
/*
* Now link into the object's list of backed pages.
*/
TAILQ_INSERT_TAIL(&object->memq, mem, listq);
mem->flags |= PG_TABLED;
/*
* And show that the object has one more resident
* page.
*/
object->resident_page_count++;
}
/*
* vm_page_remove: [ internal use only ]
* NOTE: used by device pager as well -wfj
*
* Removes the given mem entry from the object/offset-page
* table and the object page list.
*
* The object and page must be locked.
*/
void
vm_page_remove(mem)
register vm_page_t mem;
{
register struct pglist *bucket;
int spl;
VM_PAGE_CHECK(mem);
if (!(mem->flags & PG_TABLED))
return;
/*
* Remove from the object_object/offset hash table
*/
bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
spl = splimp();
simple_lock(&bucket_lock);
TAILQ_REMOVE(bucket, mem, hashq);
simple_unlock(&bucket_lock);
(void) splx(spl);
/*
* Now remove from the object's list of backed pages.
*/
TAILQ_REMOVE(&mem->object->memq, mem, listq);
/*
* And show that the object has one fewer resident
* page.
*/
mem->object->resident_page_count--;
mem->flags &= ~PG_TABLED;
}
/*
* vm_page_lookup:
*
* Returns the page associated with the object/offset
* pair specified; if none is found, NULL is returned.
*
* The object must be locked. No side effects.
*/
vm_page_t
vm_page_lookup(object, offset)
register vm_object_t object;
register vm_offset_t offset;
{
register vm_page_t mem;
register struct pglist *bucket;
int spl;
/*
* Search the hash table for this object/offset pair
*/
bucket = &vm_page_buckets[vm_page_hash(object, offset)];
spl = splimp();
simple_lock(&bucket_lock);
for (mem = bucket->tqh_first; mem != NULL; mem = mem->hashq.tqe_next) {
VM_PAGE_CHECK(mem);
if ((mem->object == object) && (mem->offset == offset)) {
simple_unlock(&bucket_lock);
splx(spl);
return(mem);
}
}
simple_unlock(&bucket_lock);
splx(spl);
return(NULL);
}
/*
* vm_page_rename:
*
* Move the given memory entry from its
* current object to the specified target object/offset.
*
* The object must be locked.
*/
void
vm_page_rename(mem, new_object, new_offset)
register vm_page_t mem;
register vm_object_t new_object;
vm_offset_t new_offset;
{
if (mem->object == new_object)
return;
vm_page_lock_queues(); /* keep page from moving out from
under pageout daemon */
vm_page_remove(mem);
vm_page_insert(mem, new_object, new_offset);
vm_page_unlock_queues();
}
/*
* vm_page_alloc:
*
* Allocate and return a memory cell associated
* with this VM object/offset pair.
*
* Object must be locked.
*/
vm_page_t
vm_page_alloc(object, offset)
vm_object_t object;
vm_offset_t offset;
{
register vm_page_t mem;
int spl;
spl = splimp(); /* XXX */
simple_lock(&vm_page_queue_free_lock);
if (vm_page_queue_free.tqh_first == NULL) {
simple_unlock(&vm_page_queue_free_lock);
splx(spl);
return(NULL);
}
mem = vm_page_queue_free.tqh_first;
TAILQ_REMOVE(&vm_page_queue_free, mem, pageq);
cnt.v_free_count--;
simple_unlock(&vm_page_queue_free_lock);
splx(spl);
VM_PAGE_INIT(mem, object, offset);
/*
* Decide if we should poke the pageout daemon.
* We do this if the free count is less than the low
* water mark, or if the free count is less than the high
* water mark (but above the low water mark) and the inactive
* count is less than its target.
*
* We don't have the counts locked ... if they change a little,
* it doesn't really matter.
*/
if (cnt.v_free_count < cnt.v_free_min ||
(cnt.v_free_count < cnt.v_free_target &&
cnt.v_inactive_count < cnt.v_inactive_target))
thread_wakeup(&vm_pages_needed);
return (mem);
}
/*
* vm_page_free:
*
* Returns the given page to the free list,
* disassociating it with any VM object.
*
* Object and page must be locked prior to entry.
*/
void
vm_page_free(mem)
register vm_page_t mem;
{
vm_page_remove(mem);
if (mem->flags & PG_ACTIVE) {
TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
mem->flags &= ~PG_ACTIVE;
cnt.v_active_count--;
}
if (mem->flags & PG_INACTIVE) {
TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
mem->flags &= ~PG_INACTIVE;
cnt.v_inactive_count--;
}
if (!(mem->flags & PG_FICTITIOUS)) {
int spl;
spl = splimp();
simple_lock(&vm_page_queue_free_lock);
TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq);
cnt.v_free_count++;
simple_unlock(&vm_page_queue_free_lock);
splx(spl);
}
}
/*
* vm_page_wire:
*
* Mark this page as wired down by yet
* another map, removing it from paging queues
* as necessary.
*
* The page queues must be locked.
*/
void
vm_page_wire(mem)
register vm_page_t mem;
{
VM_PAGE_CHECK(mem);
if (mem->wire_count == 0) {
if (mem->flags & PG_ACTIVE) {
TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
cnt.v_active_count--;
mem->flags &= ~PG_ACTIVE;
}
if (mem->flags & PG_INACTIVE) {
TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
cnt.v_inactive_count--;
mem->flags &= ~PG_INACTIVE;
}
cnt.v_wire_count++;
}
mem->wire_count++;
}
/*
* vm_page_unwire:
*
* Release one wiring of this page, potentially
* enabling it to be paged again.
*
* The page queues must be locked.
*/
void
vm_page_unwire(mem)
register vm_page_t mem;
{
VM_PAGE_CHECK(mem);
mem->wire_count--;
if (mem->wire_count == 0) {
TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq);
cnt.v_active_count++;
mem->flags |= PG_ACTIVE;
cnt.v_wire_count--;
}
}
/*
* vm_page_deactivate:
*
* Returns the given page to the inactive list,
* indicating that no physical maps have access
* to this page. [Used by the physical mapping system.]
*
* The page queues must be locked.
*/
void
vm_page_deactivate(m)
register vm_page_t m;
{
VM_PAGE_CHECK(m);
/*
* Only move active pages -- ignore locked or already
* inactive ones.
*/
if (m->flags & PG_ACTIVE) {
pmap_clear_reference(VM_PAGE_TO_PHYS(m));
TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
m->flags &= ~PG_ACTIVE;
m->flags |= PG_INACTIVE;
cnt.v_active_count--;
cnt.v_inactive_count++;
if (pmap_is_modified(VM_PAGE_TO_PHYS(m)))
m->flags &= ~PG_CLEAN;
if (m->flags & PG_CLEAN)
m->flags &= ~PG_LAUNDRY;
else
m->flags |= PG_LAUNDRY;
}
}
/*
* vm_page_activate:
*
* Put the specified page on the active list (if appropriate).
*
* The page queues must be locked.
*/
void
vm_page_activate(m)
register vm_page_t m;
{
VM_PAGE_CHECK(m);
if (m->flags & PG_INACTIVE) {
TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
cnt.v_inactive_count--;
m->flags &= ~PG_INACTIVE;
}
if (m->wire_count == 0) {
if (m->flags & PG_ACTIVE)
panic("vm_page_activate: already active");
TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
m->flags |= PG_ACTIVE;
cnt.v_active_count++;
}
}
/*
* vm_page_zero_fill:
*
* Zero-fill the specified page.
* Written as a standard pagein routine, to
* be used by the zero-fill object.
*/
boolean_t
vm_page_zero_fill(m)
vm_page_t m;
{
VM_PAGE_CHECK(m);
m->flags &= ~PG_CLEAN;
pmap_zero_page(VM_PAGE_TO_PHYS(m));
return(TRUE);
}
/*
* vm_page_copy:
*
* Copy one page to another
*/
void
vm_page_copy(src_m, dest_m)
vm_page_t src_m;
vm_page_t dest_m;
{
VM_PAGE_CHECK(src_m);
VM_PAGE_CHECK(dest_m);
dest_m->flags &= ~PG_CLEAN;
pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
}

242
sys/vm/vm_page.h Normal file
View File

@ -0,0 +1,242 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_page.h 8.3 (Berkeley) 1/9/95
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Resident memory system definitions.
*/
#ifndef _VM_PAGE_
#define _VM_PAGE_
/*
* Management of resident (logical) pages.
*
* A small structure is kept for each resident
* page, indexed by page number. Each structure
* is an element of several lists:
*
* A hash table bucket used to quickly
* perform object/offset lookups
*
* A list of all pages for a given object,
* so they can be quickly deactivated at
* time of deallocation.
*
* An ordered list of pages due for pageout.
*
* In addition, the structure contains the object
* and offset to which this page belongs (for pageout),
* and sundry status bits.
*
* Fields in this structure are locked either by the lock on the
* object that the page belongs to (O) or by the lock on the page
* queues (P).
*/
TAILQ_HEAD(pglist, vm_page);
struct vm_page {
TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO
* queue or free list (P) */
TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
vm_object_t object; /* which object am I in (O,P)*/
vm_offset_t offset; /* offset into object (O,P) */
u_short wire_count; /* wired down maps refs (P) */
u_short flags; /* see below */
vm_offset_t phys_addr; /* physical address of page */
};
/*
* These are the flags defined for vm_page.
*
* Note: PG_FILLED and PG_DIRTY are added for the filesystems.
*/
#define PG_INACTIVE 0x0001 /* page is in inactive list (P) */
#define PG_ACTIVE 0x0002 /* page is in active list (P) */
#define PG_LAUNDRY 0x0004 /* page is being cleaned now (P)*/
#define PG_CLEAN 0x0008 /* page has not been modified */
#define PG_BUSY 0x0010 /* page is in transit (O) */
#define PG_WANTED 0x0020 /* someone is waiting for page (O) */
#define PG_TABLED 0x0040 /* page is in VP table (O) */
#define PG_COPYONWRITE 0x0080 /* must copy page before changing (O) */
#define PG_FICTITIOUS 0x0100 /* physical page doesn't exist (O) */
#define PG_FAKE 0x0200 /* page is placeholder for pagein (O) */
#define PG_FILLED 0x0400 /* client flag to set when filled */
#define PG_DIRTY 0x0800 /* client flag to set when dirty */
#define PG_PAGEROWNED 0x4000 /* DEBUG: async paging op in progress */
#define PG_PTPAGE 0x8000 /* DEBUG: is a user page table page */
#if VM_PAGE_DEBUG
#define VM_PAGE_CHECK(mem) { \
if ((((unsigned int) mem) < ((unsigned int) &vm_page_array[0])) || \
(((unsigned int) mem) > \
((unsigned int) &vm_page_array[last_page-first_page])) || \
((mem->flags & (PG_ACTIVE | PG_INACTIVE)) == \
(PG_ACTIVE | PG_INACTIVE))) \
panic("vm_page_check: not valid!"); \
}
#else /* VM_PAGE_DEBUG */
#define VM_PAGE_CHECK(mem)
#endif /* VM_PAGE_DEBUG */
#ifdef KERNEL
/*
* Each pageable resident page falls into one of three lists:
*
* free
* Available for allocation now.
* inactive
* Not referenced in any map, but still has an
* object/offset-page mapping, and may be dirty.
* This is the list of pages that should be
* paged out next.
* active
* A list of pages which have been placed in
* at least one physical map. This list is
* ordered, in LRU-like fashion.
*/
extern
struct pglist vm_page_queue_free; /* memory free queue */
extern
struct pglist vm_page_queue_active; /* active memory queue */
extern
struct pglist vm_page_queue_inactive; /* inactive memory queue */
extern
vm_page_t vm_page_array; /* First resident page in table */
extern
long first_page; /* first physical page number */
/* ... represented in vm_page_array */
extern
long last_page; /* last physical page number */
/* ... represented in vm_page_array */
/* [INCLUSIVE] */
extern
vm_offset_t first_phys_addr; /* physical address for first_page */
extern
vm_offset_t last_phys_addr; /* physical address for last_page */
#define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr)
#define IS_VM_PHYSADDR(pa) \
((pa) >= first_phys_addr && (pa) <= last_phys_addr)
#define PHYS_TO_VM_PAGE(pa) \
(&vm_page_array[atop(pa) - first_page ])
extern
simple_lock_data_t vm_page_queue_lock; /* lock on active and inactive
page queues */
extern /* lock on free page queue */
simple_lock_data_t vm_page_queue_free_lock;
/*
* Functions implemented as macros
*/
#define PAGE_ASSERT_WAIT(m, interruptible) { \
(m)->flags |= PG_WANTED; \
assert_wait((m), (interruptible)); \
}
#define PAGE_WAKEUP(m) { \
(m)->flags &= ~PG_BUSY; \
if ((m)->flags & PG_WANTED) { \
(m)->flags &= ~PG_WANTED; \
thread_wakeup((m)); \
} \
}
#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock)
#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock)
#define vm_page_set_modified(m) { (m)->flags &= ~PG_CLEAN; }
#define VM_PAGE_INIT(mem, object, offset) { \
(mem)->flags = PG_BUSY | PG_CLEAN | PG_FAKE; \
vm_page_insert((mem), (object), (offset)); \
(mem)->wire_count = 0; \
}
void vm_page_activate __P((vm_page_t));
vm_page_t vm_page_alloc __P((vm_object_t, vm_offset_t));
void vm_page_copy __P((vm_page_t, vm_page_t));
void vm_page_deactivate __P((vm_page_t));
void vm_page_free __P((vm_page_t));
void vm_page_insert __P((vm_page_t, vm_object_t, vm_offset_t));
vm_page_t vm_page_lookup __P((vm_object_t, vm_offset_t));
void vm_page_remove __P((vm_page_t));
void vm_page_rename __P((vm_page_t, vm_object_t, vm_offset_t));
void vm_page_startup __P((vm_offset_t *, vm_offset_t *));
void vm_page_unwire __P((vm_page_t));
void vm_page_wire __P((vm_page_t));
boolean_t vm_page_zero_fill __P((vm_page_t));
#endif /* KERNEL */
#endif /* !_VM_PAGE_ */

571
sys/vm/vm_pageout.c Normal file
View File

@ -0,0 +1,571 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_pageout.c 8.7 (Berkeley) 6/19/95
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* The proverbial page-out daemon.
*/
#include <sys/param.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#ifndef VM_PAGE_FREE_MIN
#define VM_PAGE_FREE_MIN (cnt.v_free_count / 20)
#endif
#ifndef VM_PAGE_FREE_TARGET
#define VM_PAGE_FREE_TARGET ((cnt.v_free_min * 4) / 3)
#endif
int vm_page_free_min_min = 16 * 1024;
int vm_page_free_min_max = 256 * 1024;
int vm_pages_needed; /* Event on which pageout daemon sleeps */
int vm_page_max_wired = 0; /* XXX max # of wired pages system-wide */
#ifdef CLUSTERED_PAGEOUT
#define MAXPOCLUSTER (MAXPHYS/NBPG) /* XXX */
int doclustered_pageout = 1;
#endif
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
*/
void
vm_pageout_scan()
{
register vm_page_t m, next;
register int page_shortage;
register int s;
register int pages_freed;
int free;
vm_object_t object;
/*
* Only continue when we want more pages to be "free"
*/
cnt.v_rev++;
s = splimp();
simple_lock(&vm_page_queue_free_lock);
free = cnt.v_free_count;
simple_unlock(&vm_page_queue_free_lock);
splx(s);
if (free < cnt.v_free_target) {
swapout_threads();
/*
* Be sure the pmap system is updated so
* we can scan the inactive queue.
*/
pmap_update();
}
/*
* Acquire the resident page system lock,
* as we may be changing what's resident quite a bit.
*/
vm_page_lock_queues();
/*
* Start scanning the inactive queue for pages we can free.
* We keep scanning until we have enough free pages or
* we have scanned through the entire queue. If we
* encounter dirty pages, we start cleaning them.
*/
pages_freed = 0;
for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
s = splimp();
simple_lock(&vm_page_queue_free_lock);
free = cnt.v_free_count;
simple_unlock(&vm_page_queue_free_lock);
splx(s);
if (free >= cnt.v_free_target)
break;
cnt.v_scan++;
next = m->pageq.tqe_next;
/*
* If the page has been referenced, move it back to the
* active queue.
*/
if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
vm_page_activate(m);
cnt.v_reactivated++;
continue;
}
/*
* If the page is clean, free it up.
*/
if (m->flags & PG_CLEAN) {
object = m->object;
if (vm_object_lock_try(object)) {
pmap_page_protect(VM_PAGE_TO_PHYS(m),
VM_PROT_NONE);
vm_page_free(m);
pages_freed++;
cnt.v_dfree++;
vm_object_unlock(object);
}
continue;
}
/*
* If the page is dirty but already being washed, skip it.
*/
if ((m->flags & PG_LAUNDRY) == 0)
continue;
/*
* Otherwise the page is dirty and still in the laundry,
* so we start the cleaning operation and remove it from
* the laundry.
*/
object = m->object;
if (!vm_object_lock_try(object))
continue;
cnt.v_pageouts++;
#ifdef CLUSTERED_PAGEOUT
if (object->pager &&
vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
vm_pageout_cluster(m, object);
else
#endif
vm_pageout_page(m, object);
thread_wakeup(object);
vm_object_unlock(object);
/*
* Former next page may no longer even be on the inactive
* queue (due to potential blocking in the pager with the
* queues unlocked). If it isn't, we just start over.
*/
if (next && (next->flags & PG_INACTIVE) == 0)
next = vm_page_queue_inactive.tqh_first;
}
/*
* Compute the page shortage. If we are still very low on memory
* be sure that we will move a minimal amount of pages from active
* to inactive.
*/
page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
if (page_shortage <= 0 && pages_freed == 0)
page_shortage = 1;
while (page_shortage > 0) {
/*
* Move some more pages from active to inactive.
*/
if ((m = vm_page_queue_active.tqh_first) == NULL)
break;
vm_page_deactivate(m);
page_shortage--;
}
vm_page_unlock_queues();
}
/*
* Called with object and page queues locked.
* If reactivate is TRUE, a pager error causes the page to be
* put back on the active queue, ow it is left on the inactive queue.
*/
void
vm_pageout_page(m, object)
vm_page_t m;
vm_object_t object;
{
vm_pager_t pager;
int pageout_status;
/*
* We set the busy bit to cause potential page faults on
* this page to block.
*
* We also set pageout-in-progress to keep the object from
* disappearing during pageout. This guarantees that the
* page won't move from the inactive queue. (However, any
* other page on the inactive queue may move!)
*/
pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
m->flags |= PG_BUSY;
/*
* Try to collapse the object before making a pager for it.
* We must unlock the page queues first.
*/
vm_page_unlock_queues();
if (object->pager == NULL)
vm_object_collapse(object);
object->paging_in_progress++;
vm_object_unlock(object);
/*
* Do a wakeup here in case the following operations block.
*/
thread_wakeup(&cnt.v_free_count);
/*
* If there is no pager for the page, use the default pager.
* If there is no place to put the page at the moment,
* leave it in the laundry and hope that there will be
* paging space later.
*/
if ((pager = object->pager) == NULL) {
pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
VM_PROT_ALL, (vm_offset_t)0);
if (pager != NULL)
vm_object_setpager(object, pager, 0, FALSE);
}
pageout_status = pager ? vm_pager_put(pager, m, FALSE) : VM_PAGER_FAIL;
vm_object_lock(object);
vm_page_lock_queues();
switch (pageout_status) {
case VM_PAGER_OK:
case VM_PAGER_PEND:
cnt.v_pgpgout++;
m->flags &= ~PG_LAUNDRY;
break;
case VM_PAGER_BAD:
/*
* Page outside of range of object. Right now we
* essentially lose the changes by pretending it
* worked.
*
* XXX dubious, what should we do?
*/
m->flags &= ~PG_LAUNDRY;
m->flags |= PG_CLEAN;
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
break;
case VM_PAGER_AGAIN:
{
extern int lbolt;
/*
* FAIL on a write is interpreted to mean a resource
* shortage, so we put pause for awhile and try again.
* XXX could get stuck here.
*/
vm_page_unlock_queues();
vm_object_unlock(object);
(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
vm_object_lock(object);
vm_page_lock_queues();
break;
}
case VM_PAGER_FAIL:
case VM_PAGER_ERROR:
/*
* If page couldn't be paged out, then reactivate
* the page so it doesn't clog the inactive list.
* (We will try paging out it again later).
*/
vm_page_activate(m);
cnt.v_reactivated++;
break;
}
pmap_clear_reference(VM_PAGE_TO_PHYS(m));
/*
* If the operation is still going, leave the page busy
* to block all other accesses. Also, leave the paging
* in progress indicator set so that we don't attempt an
* object collapse.
*/
if (pageout_status != VM_PAGER_PEND) {
m->flags &= ~PG_BUSY;
PAGE_WAKEUP(m);
object->paging_in_progress--;
}
}
#ifdef CLUSTERED_PAGEOUT
#define PAGEOUTABLE(p) \
((((p)->flags & (PG_INACTIVE|PG_CLEAN|PG_LAUNDRY)) == \
(PG_INACTIVE|PG_LAUNDRY)) && !pmap_is_referenced(VM_PAGE_TO_PHYS(p)))
/*
* Attempt to pageout as many contiguous (to ``m'') dirty pages as possible
* from ``object''. Using information returned from the pager, we assemble
* a sorted list of contiguous dirty pages and feed them to the pager in one
* chunk. Called with paging queues and object locked. Also, object must
* already have a pager.
*/
void
vm_pageout_cluster(m, object)
vm_page_t m;
vm_object_t object;
{
vm_offset_t offset, loff, hoff;
vm_page_t plist[MAXPOCLUSTER], *plistp, p;
int postatus, ix, count;
/*
* Determine the range of pages that can be part of a cluster
* for this object/offset. If it is only our single page, just
* do it normally.
*/
vm_pager_cluster(object->pager, m->offset, &loff, &hoff);
if (hoff - loff == PAGE_SIZE) {
vm_pageout_page(m, object);
return;
}
plistp = plist;
/*
* Target page is always part of the cluster.
*/
pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
m->flags |= PG_BUSY;
plistp[atop(m->offset - loff)] = m;
count = 1;
/*
* Backup from the given page til we find one not fulfilling
* the pageout criteria or we hit the lower bound for the
* cluster. For each page determined to be part of the
* cluster, unmap it and busy it out so it won't change.
*/
ix = atop(m->offset - loff);
offset = m->offset;
while (offset > loff && count < MAXPOCLUSTER-1) {
p = vm_page_lookup(object, offset - PAGE_SIZE);
if (p == NULL || !PAGEOUTABLE(p))
break;
pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
p->flags |= PG_BUSY;
plistp[--ix] = p;
offset -= PAGE_SIZE;
count++;
}
plistp += atop(offset - loff);
loff = offset;
/*
* Now do the same moving forward from the target.
*/
ix = atop(m->offset - loff) + 1;
offset = m->offset + PAGE_SIZE;
while (offset < hoff && count < MAXPOCLUSTER) {
p = vm_page_lookup(object, offset);
if (p == NULL || !PAGEOUTABLE(p))
break;
pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
p->flags |= PG_BUSY;
plistp[ix++] = p;
offset += PAGE_SIZE;
count++;
}
hoff = offset;
/*
* Pageout the page.
* Unlock everything and do a wakeup prior to the pager call
* in case it blocks.
*/
vm_page_unlock_queues();
object->paging_in_progress++;
vm_object_unlock(object);
again:
thread_wakeup(&cnt.v_free_count);
postatus = vm_pager_put_pages(object->pager, plistp, count, FALSE);
/*
* XXX rethink this
*/
if (postatus == VM_PAGER_AGAIN) {
extern int lbolt;
(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
goto again;
} else if (postatus == VM_PAGER_BAD)
panic("vm_pageout_cluster: VM_PAGER_BAD");
vm_object_lock(object);
vm_page_lock_queues();
/*
* Loop through the affected pages, reflecting the outcome of
* the operation.
*/
for (ix = 0; ix < count; ix++) {
p = *plistp++;
switch (postatus) {
case VM_PAGER_OK:
case VM_PAGER_PEND:
cnt.v_pgpgout++;
p->flags &= ~PG_LAUNDRY;
break;
case VM_PAGER_FAIL:
case VM_PAGER_ERROR:
/*
* Pageout failed, reactivate the target page so it
* doesn't clog the inactive list. Other pages are
* left as they are.
*/
if (p == m) {
vm_page_activate(p);
cnt.v_reactivated++;
}
break;
}
pmap_clear_reference(VM_PAGE_TO_PHYS(p));
/*
* If the operation is still going, leave the page busy
* to block all other accesses.
*/
if (postatus != VM_PAGER_PEND) {
p->flags &= ~PG_BUSY;
PAGE_WAKEUP(p);
}
}
/*
* If the operation is still going, leave the paging in progress
* indicator set so that we don't attempt an object collapse.
*/
if (postatus != VM_PAGER_PEND)
object->paging_in_progress--;
}
#endif
/*
* vm_pageout is the high level pageout daemon.
*/
void
vm_pageout()
{
(void) spl0();
/*
* Initialize some paging parameters.
*/
if (cnt.v_free_min == 0) {
cnt.v_free_min = VM_PAGE_FREE_MIN;
vm_page_free_min_min /= cnt.v_page_size;
vm_page_free_min_max /= cnt.v_page_size;
if (cnt.v_free_min < vm_page_free_min_min)
cnt.v_free_min = vm_page_free_min_min;
if (cnt.v_free_min > vm_page_free_min_max)
cnt.v_free_min = vm_page_free_min_max;
}
if (cnt.v_free_target == 0)
cnt.v_free_target = VM_PAGE_FREE_TARGET;
if (cnt.v_free_target <= cnt.v_free_min)
cnt.v_free_target = cnt.v_free_min + 1;
/* XXX does not really belong here */
if (vm_page_max_wired == 0)
vm_page_max_wired = cnt.v_free_count / 3;
/*
* The pageout daemon is never done, so loop
* forever.
*/
simple_lock(&vm_pages_needed_lock);
while (TRUE) {
thread_sleep(&vm_pages_needed, &vm_pages_needed_lock, FALSE);
/*
* Compute the inactive target for this scan.
* We need to keep a reasonable amount of memory in the
* inactive list to better simulate LRU behavior.
*/
cnt.v_inactive_target =
(cnt.v_active_count + cnt.v_inactive_count) / 3;
if (cnt.v_inactive_target <= cnt.v_free_target)
cnt.v_inactive_target = cnt.v_free_target + 1;
/*
* Only make a scan if we are likely to do something.
* Otherwise we might have been awakened by a pager
* to clean up async pageouts.
*/
if (cnt.v_free_count < cnt.v_free_target ||
cnt.v_inactive_count < cnt.v_inactive_target)
vm_pageout_scan();
vm_pager_sync();
simple_lock(&vm_pages_needed_lock);
thread_wakeup(&cnt.v_free_count);
}
}

96
sys/vm/vm_pageout.h Normal file
View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_pageout.h 8.3 (Berkeley) 1/9/95
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Avadis Tevanian, Jr.
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Header file for pageout daemon.
*/
/*
* Exported data structures.
*/
extern int vm_pages_needed; /* should be some "event" structure */
simple_lock_data_t vm_pages_needed_lock;
/*
* Exported routines.
*/
/*
* Signal pageout-daemon and wait for it.
*/
#define VM_WAIT { \
simple_lock(&vm_pages_needed_lock); \
thread_wakeup(&vm_pages_needed); \
thread_sleep(&cnt.v_free_count, \
&vm_pages_needed_lock, FALSE); \
}
#ifdef KERNEL
void vm_pageout __P((void));
void vm_pageout_scan __P((void));
void vm_pageout_page __P((vm_page_t, vm_object_t));
void vm_pageout_cluster __P((vm_page_t, vm_object_t));
#endif

401
sys/vm/vm_pager.c Normal file
View File

@ -0,0 +1,401 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_pager.c 8.7 (Berkeley) 7/7/94
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Paging space routine stubs. Emulates a matchmaker-like interface
* for builtin pagers.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_kern.h>
#ifdef SWAPPAGER
extern struct pagerops swappagerops;
#endif
#ifdef VNODEPAGER
extern struct pagerops vnodepagerops;
#endif
#ifdef DEVPAGER
extern struct pagerops devicepagerops;
#endif
struct pagerops *pagertab[] = {
#ifdef SWAPPAGER
&swappagerops, /* PG_SWAP */
#else
NULL,
#endif
#ifdef VNODEPAGER
&vnodepagerops, /* PG_VNODE */
#else
NULL,
#endif
#ifdef DEVPAGER
&devicepagerops, /* PG_DEV */
#else
NULL,
#endif
};
int npagers = sizeof (pagertab) / sizeof (pagertab[0]);
struct pagerops *dfltpagerops = NULL; /* default pager */
/*
* Kernel address space for mapping pages.
* Used by pagers where KVAs are needed for IO.
*
* XXX needs to be large enough to support the number of pending async
* cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
* (MAXPHYS == 64k) if you want to get the most efficiency.
*/
#define PAGER_MAP_SIZE (4 * 1024 * 1024)
vm_map_t pager_map;
boolean_t pager_map_wanted;
vm_offset_t pager_sva, pager_eva;
void
vm_pager_init()
{
struct pagerops **pgops;
/*
* Allocate a kernel submap for tracking get/put page mappings
*/
pager_map = kmem_suballoc(kernel_map, &pager_sva, &pager_eva,
PAGER_MAP_SIZE, FALSE);
/*
* Initialize known pagers
*/
for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
if (pgops)
(*(*pgops)->pgo_init)();
if (dfltpagerops == NULL)
panic("no default pager");
}
/*
* Allocate an instance of a pager of the given type.
* Size, protection and offset parameters are passed in for pagers that
* need to perform page-level validation (e.g. the device pager).
*/
vm_pager_t
vm_pager_allocate(type, handle, size, prot, off)
int type;
caddr_t handle;
vm_size_t size;
vm_prot_t prot;
vm_offset_t off;
{
struct pagerops *ops;
ops = (type == PG_DFLT) ? dfltpagerops : pagertab[type];
if (ops)
return ((*ops->pgo_alloc)(handle, size, prot, off));
return (NULL);
}
void
vm_pager_deallocate(pager)
vm_pager_t pager;
{
if (pager == NULL)
panic("vm_pager_deallocate: null pager");
(*pager->pg_ops->pgo_dealloc)(pager);
}
int
vm_pager_get_pages(pager, mlist, npages, sync)
vm_pager_t pager;
vm_page_t *mlist;
int npages;
boolean_t sync;
{
int rv;
if (pager == NULL) {
rv = VM_PAGER_OK;
while (npages--)
if (!vm_page_zero_fill(*mlist)) {
rv = VM_PAGER_FAIL;
break;
} else
mlist++;
return (rv);
}
return ((*pager->pg_ops->pgo_getpages)(pager, mlist, npages, sync));
}
int
vm_pager_put_pages(pager, mlist, npages, sync)
vm_pager_t pager;
vm_page_t *mlist;
int npages;
boolean_t sync;
{
if (pager == NULL)
panic("vm_pager_put_pages: null pager");
return ((*pager->pg_ops->pgo_putpages)(pager, mlist, npages, sync));
}
/* XXX compatibility*/
int
vm_pager_get(pager, m, sync)
vm_pager_t pager;
vm_page_t m;
boolean_t sync;
{
return vm_pager_get_pages(pager, &m, 1, sync);
}
/* XXX compatibility*/
int
vm_pager_put(pager, m, sync)
vm_pager_t pager;
vm_page_t m;
boolean_t sync;
{
return vm_pager_put_pages(pager, &m, 1, sync);
}
boolean_t
vm_pager_has_page(pager, offset)
vm_pager_t pager;
vm_offset_t offset;
{
if (pager == NULL)
panic("vm_pager_has_page: null pager");
return ((*pager->pg_ops->pgo_haspage)(pager, offset));
}
/*
* Called by pageout daemon before going back to sleep.
* Gives pagers a chance to clean up any completed async pageing operations.
*/
void
vm_pager_sync()
{
struct pagerops **pgops;
for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
if (pgops)
(*(*pgops)->pgo_putpages)(NULL, NULL, 0, FALSE);
}
void
vm_pager_cluster(pager, offset, loff, hoff)
vm_pager_t pager;
vm_offset_t offset;
vm_offset_t *loff;
vm_offset_t *hoff;
{
if (pager == NULL)
panic("vm_pager_cluster: null pager");
((*pager->pg_ops->pgo_cluster)(pager, offset, loff, hoff));
}
void
vm_pager_clusternull(pager, offset, loff, hoff)
vm_pager_t pager;
vm_offset_t offset;
vm_offset_t *loff;
vm_offset_t *hoff;
{
panic("vm_pager_nullcluster called");
}
vm_offset_t
vm_pager_map_pages(mlist, npages, canwait)
vm_page_t *mlist;
int npages;
boolean_t canwait;
{
vm_offset_t kva, va;
vm_size_t size;
vm_page_t m;
/*
* Allocate space in the pager map, if none available return 0.
* This is basically an expansion of kmem_alloc_wait with optional
* blocking on no space.
*/
size = npages * PAGE_SIZE;
vm_map_lock(pager_map);
while (vm_map_findspace(pager_map, 0, size, &kva)) {
if (!canwait) {
vm_map_unlock(pager_map);
return (0);
}
pager_map_wanted = TRUE;
vm_map_unlock(pager_map);
(void) tsleep(pager_map, PVM, "pager_map", 0);
vm_map_lock(pager_map);
}
vm_map_insert(pager_map, NULL, 0, kva, kva + size);
vm_map_unlock(pager_map);
for (va = kva; npages--; va += PAGE_SIZE) {
m = *mlist++;
#ifdef DEBUG
if ((m->flags & PG_BUSY) == 0)
panic("vm_pager_map_pages: page not busy");
if (m->flags & PG_PAGEROWNED)
panic("vm_pager_map_pages: page already in pager");
#endif
#ifdef DEBUG
m->flags |= PG_PAGEROWNED;
#endif
pmap_enter(vm_map_pmap(pager_map), va, VM_PAGE_TO_PHYS(m),
VM_PROT_DEFAULT, TRUE);
}
return (kva);
}
void
vm_pager_unmap_pages(kva, npages)
vm_offset_t kva;
int npages;
{
vm_size_t size = npages * PAGE_SIZE;
#ifdef DEBUG
vm_offset_t va;
vm_page_t m;
int np = npages;
for (va = kva; np--; va += PAGE_SIZE) {
m = vm_pager_atop(va);
if (m->flags & PG_PAGEROWNED)
m->flags &= ~PG_PAGEROWNED;
else
printf("vm_pager_unmap_pages: %x(%x/%x) not owned\n",
m, va, VM_PAGE_TO_PHYS(m));
}
#endif
pmap_remove(vm_map_pmap(pager_map), kva, kva + size);
vm_map_lock(pager_map);
(void) vm_map_delete(pager_map, kva, kva + size);
if (pager_map_wanted)
wakeup(pager_map);
vm_map_unlock(pager_map);
}
vm_page_t
vm_pager_atop(kva)
vm_offset_t kva;
{
vm_offset_t pa;
pa = pmap_extract(vm_map_pmap(pager_map), kva);
if (pa == 0)
panic("vm_pager_atop");
return (PHYS_TO_VM_PAGE(pa));
}
vm_pager_t
vm_pager_lookup(pglist, handle)
register struct pagerlst *pglist;
caddr_t handle;
{
register vm_pager_t pager;
for (pager = pglist->tqh_first; pager; pager = pager->pg_list.tqe_next)
if (pager->pg_handle == handle)
return (pager);
return (NULL);
}
/*
* This routine gains a reference to the object.
* Explicit deallocation is necessary.
*/
int
pager_cache(object, should_cache)
vm_object_t object;
boolean_t should_cache;
{
if (object == NULL)
return (KERN_INVALID_ARGUMENT);
vm_object_cache_lock();
vm_object_lock(object);
if (should_cache)
object->flags |= OBJ_CANPERSIST;
else
object->flags &= ~OBJ_CANPERSIST;
vm_object_unlock(object);
vm_object_cache_unlock();
vm_object_deallocate(object);
return (KERN_SUCCESS);
}

137
sys/vm/vm_pager.h Normal file
View File

@ -0,0 +1,137 @@
/*
* Copyright (c) 1990 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_pager.h 8.5 (Berkeley) 7/7/94
*/
/*
* Pager routine interface definition.
* For BSD we use a cleaner version of the internal pager interface.
*/
#ifndef _VM_PAGER_
#define _VM_PAGER_
TAILQ_HEAD(pagerlst, pager_struct);
struct pager_struct {
TAILQ_ENTRY(pager_struct) pg_list; /* links for list management */
caddr_t pg_handle; /* ext. handle (vp, dev, fp) */
int pg_type; /* type of pager */
int pg_flags; /* flags */
struct pagerops *pg_ops; /* pager operations */
void *pg_data; /* private pager data */
};
/* pager types */
#define PG_DFLT -1
#define PG_SWAP 0
#define PG_VNODE 1
#define PG_DEVICE 2
/* flags */
#define PG_CLUSTERGET 1
#define PG_CLUSTERPUT 2
struct pagerops {
void (*pgo_init) /* Initialize pager. */
__P((void));
vm_pager_t (*pgo_alloc) /* Allocate pager. */
__P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
void (*pgo_dealloc) /* Disassociate. */
__P((vm_pager_t));
int (*pgo_getpages) /* Get (read) page. */
__P((vm_pager_t, vm_page_t *, int, boolean_t));
int (*pgo_putpages) /* Put (write) page. */
__P((vm_pager_t, vm_page_t *, int, boolean_t));
boolean_t (*pgo_haspage) /* Does pager have page? */
__P((vm_pager_t, vm_offset_t));
void (*pgo_cluster) /* Return range of cluster. */
__P((vm_pager_t, vm_offset_t,
vm_offset_t *, vm_offset_t *));
};
/*
* get/put return values
* OK operation was successful
* BAD specified data was out of the accepted range
* FAIL specified data was in range, but doesn't exist
* PEND operations was initiated but not completed
* ERROR error while accessing data that is in range and exists
* AGAIN temporary resource shortage prevented operation from happening
*/
#define VM_PAGER_OK 0
#define VM_PAGER_BAD 1
#define VM_PAGER_FAIL 2
#define VM_PAGER_PEND 3
#define VM_PAGER_ERROR 4
#define VM_PAGER_AGAIN 5
#ifdef KERNEL
extern struct pagerops *dfltpagerops;
vm_pager_t vm_pager_allocate
__P((int, caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
vm_page_t vm_pager_atop __P((vm_offset_t));
void vm_pager_cluster
__P((vm_pager_t, vm_offset_t,
vm_offset_t *, vm_offset_t *));
void vm_pager_clusternull
__P((vm_pager_t, vm_offset_t,
vm_offset_t *, vm_offset_t *));
void vm_pager_deallocate __P((vm_pager_t));
int vm_pager_get_pages
__P((vm_pager_t, vm_page_t *, int, boolean_t));
boolean_t vm_pager_has_page __P((vm_pager_t, vm_offset_t));
void vm_pager_init __P((void));
vm_pager_t vm_pager_lookup __P((struct pagerlst *, caddr_t));
vm_offset_t vm_pager_map_pages __P((vm_page_t *, int, boolean_t));
int vm_pager_put_pages
__P((vm_pager_t, vm_page_t *, int, boolean_t));
void vm_pager_sync __P((void));
void vm_pager_unmap_pages __P((vm_offset_t, int));
#define vm_pager_cancluster(p, b) ((p)->pg_flags & (b))
/*
* XXX compat with old interface
*/
int vm_pager_get __P((vm_pager_t, vm_page_t, boolean_t));
int vm_pager_put __P((vm_pager_t, vm_page_t, boolean_t));
#endif
#endif /* _VM_PAGER_ */

160
sys/vm/vm_param.h Normal file
View File

@ -0,0 +1,160 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_param.h 8.2 (Berkeley) 1/9/95
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Machine independent virtual memory parameters.
*/
#ifndef _VM_PARAM_
#define _VM_PARAM_
#include <machine/vmparam.h>
/*
* This belongs in types.h, but breaks too many existing programs.
*/
typedef int boolean_t;
#define TRUE 1
#define FALSE 0
/*
* The machine independent pages are refered to as PAGES. A page
* is some number of hardware pages, depending on the target machine.
*/
#define DEFAULT_PAGE_SIZE 4096
/*
* All references to the size of a page should be done with PAGE_SIZE
* or PAGE_SHIFT. The fact they are variables is hidden here so that
* we can easily make them constant if we so desire.
*/
#define PAGE_SIZE cnt.v_page_size /* size of page */
#define PAGE_MASK page_mask /* size of page - 1 */
#define PAGE_SHIFT page_shift /* bits to shift for pages */
#ifdef KERNEL
extern vm_size_t page_mask;
extern int page_shift;
#endif
/*
* CTL_VM identifiers
*/
#define VM_METER 1 /* struct vmmeter */
#define VM_LOADAVG 2 /* struct loadavg */
#define VM_MAXID 3 /* number of valid vm ids */
#define CTL_VM_NAMES { \
{ 0, 0 }, \
{ "vmmeter", CTLTYPE_STRUCT }, \
{ "loadavg", CTLTYPE_STRUCT }, \
}
/*
* Return values from the VM routines.
*/
#define KERN_SUCCESS 0
#define KERN_INVALID_ADDRESS 1
#define KERN_PROTECTION_FAILURE 2
#define KERN_NO_SPACE 3
#define KERN_INVALID_ARGUMENT 4
#define KERN_FAILURE 5
#define KERN_RESOURCE_SHORTAGE 6
#define KERN_NOT_RECEIVER 7
#define KERN_NO_ACCESS 8
#ifndef ASSEMBLER
/*
* Convert addresses to pages and vice versa.
* No rounding is used.
*/
#ifdef KERNEL
#define atop(x) (((unsigned long)(x)) >> PAGE_SHIFT)
#define ptoa(x) ((vm_offset_t)((x) << PAGE_SHIFT))
/*
* Round off or truncate to the nearest page. These will work
* for either addresses or counts (i.e., 1 byte rounds to 1 page).
*/
#define round_page(x) \
((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) & ~PAGE_MASK))
#define trunc_page(x) \
((vm_offset_t)(((vm_offset_t)(x)) & ~PAGE_MASK))
#define num_pages(x) \
((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
extern vm_size_t mem_size; /* size of physical memory (bytes) */
extern vm_offset_t first_addr; /* first physical page */
extern vm_offset_t last_addr; /* last physical page */
#else
/* out-of-kernel versions of round_page and trunc_page */
#define round_page(x) \
((((vm_offset_t)(x) + (vm_page_size - 1)) / vm_page_size) * \
vm_page_size)
#define trunc_page(x) \
((((vm_offset_t)(x)) / vm_page_size) * vm_page_size)
#endif /* KERNEL */
#endif /* ASSEMBLER */
#endif /* _VM_PARAM_ */

137
sys/vm/vm_unix.c Normal file
View File

@ -0,0 +1,137 @@
/*
* Copyright (c) 1988 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: Utah $Hdr: vm_unix.c 1.1 89/11/07$
*
* @(#)vm_unix.c 8.2 (Berkeley) 1/9/95
*/
/*
* Traditional sbrk/grow interface to VM
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <vm/vm.h>
struct obreak_args {
char *nsiz;
};
/* ARGSUSED */
int
obreak(p, uap, retval)
struct proc *p;
struct obreak_args *uap;
int *retval;
{
register struct vmspace *vm = p->p_vmspace;
vm_offset_t new, old;
int rv;
register int diff;
old = (vm_offset_t)vm->vm_daddr;
new = round_page(uap->nsiz);
if ((int)(new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur)
return(ENOMEM);
old = round_page(old + ctob(vm->vm_dsize));
diff = new - old;
if (diff > 0) {
rv = vm_allocate(&vm->vm_map, &old, diff, FALSE);
if (rv != KERN_SUCCESS) {
uprintf("sbrk: grow failed, return = %d\n", rv);
return(ENOMEM);
}
vm->vm_dsize += btoc(diff);
} else if (diff < 0) {
diff = -diff;
rv = vm_deallocate(&vm->vm_map, new, diff);
if (rv != KERN_SUCCESS) {
uprintf("sbrk: shrink failed, return = %d\n", rv);
return(ENOMEM);
}
vm->vm_dsize -= btoc(diff);
}
return(0);
}
/*
* Enlarge the "stack segment" to include the specified
* stack pointer for the process.
*/
int
grow(p, sp)
struct proc *p;
vm_offset_t sp;
{
register struct vmspace *vm = p->p_vmspace;
register int si;
/*
* For user defined stacks (from sendsig).
*/
if (sp < (vm_offset_t)vm->vm_maxsaddr)
return (0);
/*
* For common case of already allocated (from trap).
*/
if (sp >= USRSTACK - ctob(vm->vm_ssize))
return (1);
/*
* Really need to check vs limit and increment stack size if ok.
*/
si = clrnd(btoc(USRSTACK-sp) - vm->vm_ssize);
if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
return (0);
vm->vm_ssize += si;
return (1);
}
struct ovadvise_args {
int anom;
};
/* ARGSUSED */
int
ovadvise(p, uap, retval)
struct proc *p;
struct ovadvise_args *uap;
int *retval;
{
return (EINVAL);
}

582
sys/vm/vnode_pager.c Normal file
View File

@ -0,0 +1,582 @@
/*
* Copyright (c) 1990 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vnode_pager.c 8.10 (Berkeley) 5/14/95
*/
/*
* Page to/from files (vnodes).
*
* TODO:
* pageouts
* fix credential use (uses current process credentials now)
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/vnode.h>
#include <sys/uio.h>
#include <sys/mount.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vnode_pager.h>
struct pagerlst vnode_pager_list; /* list of managed vnodes */
#ifdef DEBUG
int vpagerdebug = 0x00;
#define VDB_FOLLOW 0x01
#define VDB_INIT 0x02
#define VDB_IO 0x04
#define VDB_FAIL 0x08
#define VDB_ALLOC 0x10
#define VDB_SIZE 0x20
#endif
static vm_pager_t vnode_pager_alloc
__P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
static void vnode_pager_cluster
__P((vm_pager_t, vm_offset_t,
vm_offset_t *, vm_offset_t *));
static void vnode_pager_dealloc __P((vm_pager_t));
static int vnode_pager_getpage
__P((vm_pager_t, vm_page_t *, int, boolean_t));
static boolean_t vnode_pager_haspage __P((vm_pager_t, vm_offset_t));
static void vnode_pager_init __P((void));
static int vnode_pager_io
__P((vn_pager_t, vm_page_t *, int,
boolean_t, enum uio_rw));
static boolean_t vnode_pager_putpage
__P((vm_pager_t, vm_page_t *, int, boolean_t));
struct pagerops vnodepagerops = {
vnode_pager_init,
vnode_pager_alloc,
vnode_pager_dealloc,
vnode_pager_getpage,
vnode_pager_putpage,
vnode_pager_haspage,
vnode_pager_cluster
};
static void
vnode_pager_init()
{
#ifdef DEBUG
if (vpagerdebug & VDB_FOLLOW)
printf("vnode_pager_init()\n");
#endif
TAILQ_INIT(&vnode_pager_list);
}
/*
* Allocate (or lookup) pager for a vnode.
* Handle is a vnode pointer.
*/
static vm_pager_t
vnode_pager_alloc(handle, size, prot, foff)
caddr_t handle;
vm_size_t size;
vm_prot_t prot;
vm_offset_t foff;
{
register vm_pager_t pager;
register vn_pager_t vnp;
vm_object_t object;
struct vattr vattr;
struct vnode *vp;
struct proc *p = curproc; /* XXX */
#ifdef DEBUG
if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot);
#endif
/*
* Pageout to vnode, no can do yet.
*/
if (handle == NULL)
return(NULL);
/*
* Vnodes keep a pointer to any associated pager so no need to
* lookup with vm_pager_lookup.
*/
vp = (struct vnode *)handle;
pager = (vm_pager_t)vp->v_vmdata;
if (pager == NULL) {
/*
* Allocate pager structures
*/
pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
if (pager == NULL)
return(NULL);
vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
if (vnp == NULL) {
free((caddr_t)pager, M_VMPAGER);
return(NULL);
}
/*
* And an object of the appropriate size
*/
if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) {
object = vm_object_allocate(round_page(vattr.va_size));
vm_object_enter(object, pager);
vm_object_setpager(object, pager, 0, TRUE);
} else {
free((caddr_t)vnp, M_VMPGDATA);
free((caddr_t)pager, M_VMPAGER);
return(NULL);
}
/*
* Hold a reference to the vnode and initialize pager data.
*/
VREF(vp);
vnp->vnp_flags = 0;
vnp->vnp_vp = vp;
vnp->vnp_size = vattr.va_size;
TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
pager->pg_handle = handle;
pager->pg_type = PG_VNODE;
pager->pg_flags = 0;
pager->pg_ops = &vnodepagerops;
pager->pg_data = vnp;
vp->v_vmdata = (caddr_t)pager;
} else {
/*
* vm_object_lookup() will remove the object from the
* cache if found and also gain a reference to the object.
*/
object = vm_object_lookup(pager);
#ifdef DEBUG
vnp = (vn_pager_t)pager->pg_data;
#endif
}
#ifdef DEBUG
if (vpagerdebug & VDB_ALLOC)
printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n",
vp, vnp->vnp_size, pager, object);
#endif
return(pager);
}
static void
vnode_pager_dealloc(pager)
vm_pager_t pager;
{
register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
register struct vnode *vp;
#ifdef NOTDEF
struct proc *p = curproc; /* XXX */
#endif
#ifdef DEBUG
if (vpagerdebug & VDB_FOLLOW)
printf("vnode_pager_dealloc(%x)\n", pager);
#endif
if (vp = vnp->vnp_vp) {
vp->v_vmdata = NULL;
vp->v_flag &= ~VTEXT;
#if NOTDEF
/* can hang if done at reboot on NFS FS */
(void) VOP_FSYNC(vp, p->p_ucred, p);
#endif
vrele(vp);
}
TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
free((caddr_t)vnp, M_VMPGDATA);
free((caddr_t)pager, M_VMPAGER);
}
static int
vnode_pager_getpage(pager, mlist, npages, sync)
vm_pager_t pager;
vm_page_t *mlist;
int npages;
boolean_t sync;
{
#ifdef DEBUG
if (vpagerdebug & VDB_FOLLOW)
printf("vnode_pager_getpage(%x, %x, %x, %x)\n",
pager, mlist, npages, sync);
#endif
return(vnode_pager_io((vn_pager_t)pager->pg_data,
mlist, npages, sync, UIO_READ));
}
static boolean_t
vnode_pager_putpage(pager, mlist, npages, sync)
vm_pager_t pager;
vm_page_t *mlist;
int npages;
boolean_t sync;
{
int err;
#ifdef DEBUG
if (vpagerdebug & VDB_FOLLOW)
printf("vnode_pager_putpage(%x, %x, %x, %x)\n",
pager, mlist, npages, sync);
#endif
if (pager == NULL)
return (FALSE); /* ??? */
err = vnode_pager_io((vn_pager_t)pager->pg_data,
mlist, npages, sync, UIO_WRITE);
/*
* If the operation was successful, mark the pages clean.
*/
if (err == VM_PAGER_OK) {
while (npages--) {
(*mlist)->flags |= PG_CLEAN;
pmap_clear_modify(VM_PAGE_TO_PHYS(*mlist));
mlist++;
}
}
return(err);
}
static boolean_t
vnode_pager_haspage(pager, offset)
vm_pager_t pager;
vm_offset_t offset;
{
struct proc *p = curproc; /* XXX */
vn_pager_t vnp = (vn_pager_t)pager->pg_data;
daddr_t bn;
int err;
#ifdef DEBUG
if (vpagerdebug & VDB_FOLLOW)
printf("vnode_pager_haspage(%x, %x)\n", pager, offset);
#endif
/*
* Offset beyond end of file, do not have the page
* Lock the vnode first to make sure we have the most recent
* version of the size.
*/
vn_lock(vnp->vnp_vp, LK_EXCLUSIVE | LK_RETRY, p);
if (offset >= vnp->vnp_size) {
VOP_UNLOCK(vnp->vnp_vp, 0, p);
#ifdef DEBUG
if (vpagerdebug & (VDB_FAIL|VDB_SIZE))
printf("vnode_pager_haspage: pg %x, off %x, size %x\n",
pager, offset, vnp->vnp_size);
#endif
return(FALSE);
}
/*
* Read the index to find the disk block to read
* from. If there is no block, report that we don't
* have this data.
*
* Assumes that the vnode has whole page or nothing.
*/
err = VOP_BMAP(vnp->vnp_vp,
offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize,
(struct vnode **)0, &bn, NULL);
VOP_UNLOCK(vnp->vnp_vp, 0, p);
if (err) {
#ifdef DEBUG
if (vpagerdebug & VDB_FAIL)
printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n",
err, pager, offset);
#endif
return(TRUE);
}
return((long)bn < 0 ? FALSE : TRUE);
}
static void
vnode_pager_cluster(pager, offset, loffset, hoffset)
vm_pager_t pager;
vm_offset_t offset;
vm_offset_t *loffset;
vm_offset_t *hoffset;
{
vn_pager_t vnp = (vn_pager_t)pager->pg_data;
vm_offset_t loff, hoff;
#ifdef DEBUG
if (vpagerdebug & VDB_FOLLOW)
printf("vnode_pager_cluster(%x, %x) ", pager, offset);
#endif
loff = offset;
if (loff >= vnp->vnp_size)
panic("vnode_pager_cluster: bad offset");
/*
* XXX could use VOP_BMAP to get maxcontig value
*/
hoff = loff + MAXBSIZE;
if (hoff > round_page(vnp->vnp_size))
hoff = round_page(vnp->vnp_size);
*loffset = loff;
*hoffset = hoff;
#ifdef DEBUG
if (vpagerdebug & VDB_FOLLOW)
printf("returns [%x-%x]\n", loff, hoff);
#endif
}
/*
* (XXX)
* Lets the VM system know about a change in size for a file.
* If this vnode is mapped into some address space (i.e. we have a pager
* for it) we adjust our own internal size and flush any cached pages in
* the associated object that are affected by the size change.
*
* Note: this routine may be invoked as a result of a pager put
* operation (possibly at object termination time), so we must be careful.
*/
void
vnode_pager_setsize(vp, nsize)
struct vnode *vp;
u_long nsize;
{
register vn_pager_t vnp;
register vm_object_t object;
vm_pager_t pager;
/*
* Not a mapped vnode
*/
if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
return;
/*
* Hasn't changed size
*/
pager = (vm_pager_t)vp->v_vmdata;
vnp = (vn_pager_t)pager->pg_data;
if (nsize == vnp->vnp_size)
return;
/*
* No object.
* This can happen during object termination since
* vm_object_page_clean is called after the object
* has been removed from the hash table, and clean
* may cause vnode write operations which can wind
* up back here.
*/
object = vm_object_lookup(pager);
if (object == NULL)
return;
#ifdef DEBUG
if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE))
printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n",
vp, object, vnp->vnp_size, nsize);
#endif
/*
* File has shrunk.
* Toss any cached pages beyond the new EOF.
*/
if (nsize < vnp->vnp_size) {
vm_object_lock(object);
vm_object_page_remove(object,
(vm_offset_t)nsize, vnp->vnp_size);
vm_object_unlock(object);
}
vnp->vnp_size = (vm_offset_t)nsize;
vm_object_deallocate(object);
}
void
vnode_pager_umount(mp)
register struct mount *mp;
{
struct proc *p = curproc; /* XXX */
vm_pager_t pager, npager;
struct vnode *vp;
for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager){
/*
* Save the next pointer now since uncaching may
* terminate the object and render pager invalid
*/
npager = pager->pg_list.tqe_next;
vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
if (mp == (struct mount *)0 || vp->v_mount == mp) {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
(void) vnode_pager_uncache(vp);
VOP_UNLOCK(vp, 0, p);
}
}
}
/*
* Remove vnode associated object from the object cache.
*
* XXX unlock the vnode if it is currently locked.
* We must do this since uncaching the object may result in its
* destruction which may initiate paging activity which may necessitate
* re-locking the vnode.
*/
boolean_t
vnode_pager_uncache(vp)
register struct vnode *vp;
{
struct proc *p = curproc; /* XXX */
vm_object_t object;
boolean_t uncached;
vm_pager_t pager;
/*
* Not a mapped vnode
*/
if (vp->v_type != VREG || (pager = (vm_pager_t)vp->v_vmdata) == NULL)
return (TRUE);
#ifdef DEBUG
if (!VOP_ISLOCKED(vp)) {
extern int (**nfsv2_vnodeop_p)();
if (vp->v_op != nfsv2_vnodeop_p)
panic("vnode_pager_uncache: vnode not locked!");
}
#endif
/*
* Must use vm_object_lookup() as it actually removes
* the object from the cache list.
*/
object = vm_object_lookup(pager);
if (object) {
uncached = (object->ref_count <= 1);
VOP_UNLOCK(vp, 0, p);
pager_cache(object, FALSE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
} else
uncached = TRUE;
return(uncached);
}
static int
vnode_pager_io(vnp, mlist, npages, sync, rw)
register vn_pager_t vnp;
vm_page_t *mlist;
int npages;
boolean_t sync;
enum uio_rw rw;
{
struct uio auio;
struct iovec aiov;
vm_offset_t kva, foff;
int error, size;
struct proc *p = curproc; /* XXX */
/* XXX */
vm_page_t m;
if (npages != 1)
panic("vnode_pager_io: cannot handle multiple pages");
m = *mlist;
/* XXX */
#ifdef DEBUG
if (vpagerdebug & VDB_FOLLOW)
printf("vnode_pager_io(%x, %x, %c): vnode %x\n",
vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp);
#endif
foff = m->offset + m->object->paging_offset;
/*
* Allocate a kernel virtual address and initialize so that
* we can use VOP_READ/WRITE routines.
*/
kva = vm_pager_map_pages(mlist, npages, sync);
if (kva == NULL)
return(VM_PAGER_AGAIN);
/*
* After all of the potentially blocking operations have been
* performed, we can do the size checks:
* read beyond EOF (returns error)
* short read
*/
vn_lock(vnp->vnp_vp, LK_EXCLUSIVE | LK_RETRY, p);
if (foff >= vnp->vnp_size) {
VOP_UNLOCK(vnp->vnp_vp, 0, p);
vm_pager_unmap_pages(kva, npages);
#ifdef DEBUG
if (vpagerdebug & VDB_SIZE)
printf("vnode_pager_io: vp %x, off %d size %d\n",
vnp->vnp_vp, foff, vnp->vnp_size);
#endif
return(VM_PAGER_BAD);
}
if (foff + PAGE_SIZE > vnp->vnp_size)
size = vnp->vnp_size - foff;
else
size = PAGE_SIZE;
aiov.iov_base = (caddr_t)kva;
aiov.iov_len = size;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = foff;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = rw;
auio.uio_resid = size;
auio.uio_procp = (struct proc *)0;
#ifdef DEBUG
if (vpagerdebug & VDB_IO)
printf("vnode_pager_io: vp %x kva %x foff %x size %x",
vnp->vnp_vp, kva, foff, size);
#endif
if (rw == UIO_READ)
error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred);
else
error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred);
VOP_UNLOCK(vnp->vnp_vp, 0, p);
#ifdef DEBUG
if (vpagerdebug & VDB_IO) {
if (error || auio.uio_resid)
printf(" returns error %x, resid %x",
error, auio.uio_resid);
printf("\n");
}
#endif
if (!error) {
register int count = size - auio.uio_resid;
if (count == 0)
error = EINVAL;
else if (count != PAGE_SIZE && rw == UIO_READ)
bzero((void *)(kva + count), PAGE_SIZE - count);
}
vm_pager_unmap_pages(kva, npages);
return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
}