1
0
mirror of https://git.FreeBSD.org/src.git synced 2025-01-04 12:52:15 +00:00

This is a major fixup of unionfs. At least 30 serious bugs have been

fixed (many due to changing semantics in other parts of the kernel and not
    the original author's fault), including one critical one: unionfs could
    cause UFS corruption in the fronting store due to calling VOP_OPEN for
    writing without turning on vmio for the UFS vnode.

    Most of the bugs were related to semantics changes in VOP calls, lock
    ordering problems (causing deadlocks), improper handling of a read-only
    backing store (such as an NFS mount), improper referencing and locking
    of vnodes, not using real struct locks for vnode locking, not using
    recursive locks when accessing the fronting store, and things like that.

    New functionality has been added:  unionfs now has mmap() support, but
    only partially tested, and rename has been enhanced considerably.

    There are still some things that unionfs cannot do.   You cannot
    rename a directory without confusing unionfs, and there are issues
    with softlinks, hardlinks, and special files.  unionfs mostly doesn't
    understand them (and never did).

    There are probably still panic situations, but hopefully no where near
    as many as before this commit.

    The unionfs in this commit has been tested overlayed on /usr/src
    (backing /usr/src being a read-only NFS mount, fronting /usr/src being
    a local filesystem).  kernel builds have been tested, buildworld is
    undergoing testing.  More testing is necessary.
This commit is contained in:
Matthew Dillon 1999-09-26 20:52:41 +00:00
parent bcf7336218
commit 2a31267e43
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=51688
8 changed files with 2530 additions and 1732 deletions

View File

@ -49,8 +49,8 @@ struct union_args {
#define UNMNT_OPMASK 0x0003
struct union_mount {
struct vnode *um_uppervp;
struct vnode *um_lowervp;
struct vnode *um_uppervp; /* UN_ULOCK holds locking state */
struct vnode *um_lowervp; /* Left unlocked */
struct ucred *um_cred; /* Credentials of user calling mount */
int um_cmode; /* cmask from mount process */
int um_op; /* Operation mode */
@ -58,6 +58,10 @@ struct union_mount {
#ifdef KERNEL
#ifndef DIAGNOSTIC
#define DIAGNOSTIC
#endif
/*
* DEFDIRMODE is the mode bits used to create a shadow directory.
*/
@ -67,9 +71,14 @@ struct union_mount {
#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
/*
* A cache of vnode references
* A cache of vnode references (hangs off v_data)
*
* Placing un_lock as the first elements theoretically allows us to
* use the vop_stdlock functions. However, we need to make sure of
* certain side effects so we will still punch in our own code.
*/
struct union_node {
struct lock un_lock;
LIST_ENTRY(union_node) un_cache; /* Hash chain */
struct vnode *un_vnode; /* Back pointer */
struct vnode *un_uppervp; /* overlaying object */
@ -79,6 +88,7 @@ struct union_node {
char *un_path; /* saved component name */
int un_hash; /* saved un_path hash value */
int un_openl; /* # of opens on lowervp */
int un_exclcnt; /* exclusive count */
unsigned int un_flags;
struct vnode **un_dircache; /* cached union stack */
off_t un_uppersz; /* size of upper object */
@ -88,14 +98,25 @@ struct union_node {
#endif
};
#define UN_WANT 0x01
#define UN_LOCKED 0x02
#define UN_ULOCK 0x04 /* Upper node is locked */
#define UN_KLOCK 0x08 /* Keep upper node locked on vput */
#define UN_CACHED 0x10 /* In union cache */
/*
* XXX UN_ULOCK - indicates that the uppervp is locked
*
* UN_CACHED - node is in the union cache
*/
/*#define UN_ULOCK 0x04*/ /* Upper node is locked */
#define UN_CACHED 0x10 /* In union cache */
/*
* Hash table locking flags
*/
#define UNVP_WANT 0x01
#define UNVP_LOCKED 0x02
extern int union_allocvp __P((struct vnode **, struct mount *,
struct vnode *, struct vnode *,
struct vnode *,
struct vnode *,
struct componentname *, struct vnode *,
struct vnode *, int));
extern int union_freevp __P((struct vnode *));
@ -113,6 +134,7 @@ extern int union_cn_close __P((struct vnode *, int, struct ucred *,
extern void union_removed_upper __P((struct union_node *un));
extern struct vnode *union_lowervp __P((struct vnode *));
extern void union_newsize __P((struct vnode *, off_t, off_t));
extern void union_vm_coherency __P((struct vnode *, struct uio *, int));
extern int (*union_dircheckp) __P((struct proc *, struct vnode **,
struct file *));
@ -124,6 +146,11 @@ extern int (*union_dircheckp) __P((struct proc *, struct vnode **,
#define UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
#define UDEBUG(x) if (uniondebug) printf x
#define UDEBUG_ENABLED 1
extern vop_t **union_vnodeop_p;
extern struct vfsops union_vfsops;
extern int uniondebug;
#endif /* KERNEL */

View File

@ -53,6 +53,7 @@
#include <vm/vm.h>
#include <vm/vm_extern.h> /* for vnode_pager_setsize */
#include <vm/vm_zone.h>
#include <vm/vm_object.h> /* for vm cache coherency */
#include <miscfs/union/union.h>
#include <sys/proc.h>
@ -97,7 +98,7 @@ union_init()
for (i = 0; i < NHASH; i++)
LIST_INIT(&unhead[i]);
bzero((caddr_t) unvplock, sizeof(unvplock));
bzero((caddr_t)unvplock, sizeof(unvplock));
return (0);
}
@ -105,15 +106,12 @@ static int
union_list_lock(ix)
int ix;
{
if (unvplock[ix] & UN_LOCKED) {
unvplock[ix] |= UN_WANT;
if (unvplock[ix] & UNVP_LOCKED) {
unvplock[ix] |= UNVP_WANT;
(void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0);
return (1);
}
unvplock[ix] |= UN_LOCKED;
unvplock[ix] |= UNVP_LOCKED;
return (0);
}
@ -121,15 +119,25 @@ static void
union_list_unlock(ix)
int ix;
{
unvplock[ix] &= ~UNVP_LOCKED;
unvplock[ix] &= ~UN_LOCKED;
if (unvplock[ix] & UN_WANT) {
unvplock[ix] &= ~UN_WANT;
if (unvplock[ix] & UNVP_WANT) {
unvplock[ix] &= ~UNVP_WANT;
wakeup((caddr_t) &unvplock[ix]);
}
}
/*
* union_updatevp:
*
* The uppervp, if not NULL, must be referenced and not locked by us
* The lowervp, if not NULL, must be referenced.
*
* if uppervp and lowervp match pointers already installed, nothing
* happens. The passed vp's (when matching) are not adjusted. This
* routine may only be called by union_newupper() and union_newlower().
*/
static void
union_updatevp(un, uppervp, lowervp)
struct union_node *un;
@ -153,9 +161,10 @@ union_updatevp(un, uppervp, lowervp)
uhash = nhash;
}
if (lhash != uhash)
if (lhash != uhash) {
while (union_list_lock(lhash))
continue;
}
while (union_list_lock(uhash))
continue;
@ -177,10 +186,6 @@ union_updatevp(un, uppervp, lowervp)
free(un->un_path, M_TEMP);
un->un_path = 0;
}
if (un->un_dirvp) {
vrele(un->un_dirvp);
un->un_dirvp = NULLVP;
}
}
un->un_lowervp = lowervp;
un->un_lowersz = VNOVAL;
@ -189,7 +194,6 @@ union_updatevp(un, uppervp, lowervp)
if (un->un_uppervp != uppervp) {
if (un->un_uppervp)
vrele(un->un_uppervp);
un->un_uppervp = uppervp;
un->un_uppersz = VNOVAL;
}
@ -202,21 +206,30 @@ union_updatevp(un, uppervp, lowervp)
union_list_unlock(nhash);
}
/*
* Set a new lowervp. The passed lowervp must be referenced and will be
* stored in the vp in a referenced state.
*/
static void
union_newlower(un, lowervp)
struct union_node *un;
struct vnode *lowervp;
{
union_updatevp(un, un->un_uppervp, lowervp);
}
/*
* Set a new uppervp. The passed uppervp must be locked and will be
* stored in the vp in a locked state. The caller should not unlock
* uppervp.
*/
static void
union_newupper(un, uppervp)
struct union_node *un;
struct vnode *uppervp;
{
union_updatevp(un, uppervp, un->un_lowervp);
}
@ -253,27 +266,51 @@ union_newsize(vp, uppersz, lowersz)
}
if (sz != VNOVAL) {
#ifdef DEBUG
printf("union: %s size now %ld\n",
uppersz != VNOVAL ? "upper" : "lower", (long) sz);
#endif
UDEBUG(("union: %s size now %ld\n",
(uppersz != VNOVAL ? "upper" : "lower"), (long)sz));
vnode_pager_setsize(vp, sz);
}
}
/*
* allocate a union_node/vnode pair. the vnode is
* referenced and locked. the new vnode is returned
* via (vpp). (mp) is the mountpoint of the union filesystem,
* (dvp) is the parent directory where the upper layer object
* should exist (but doesn't) and (cnp) is the componentname
* information which is partially copied to allow the upper
* layer object to be created at a later time. (uppervp)
* and (lowervp) reference the upper and lower layer objects
* being mapped. either, but not both, can be nil.
* if supplied, (uppervp) is locked.
* the reference is either maintained in the new union_node
* object which is allocated, or they are vrele'd.
* union_allocvp: allocate a union_node and associate it with a
* parent union_node and one or two vnodes.
*
* vpp Holds the returned vnode locked and referenced if no
* error occurs.
*
* mp Holds the mount point. mp may or may not be busied.
* allocvp makes no changes to mp.
*
* dvp Holds the parent union_node to the one we wish to create.
* XXX may only be used to traverse an uncopied lowervp-based
* tree? XXX
*
* dvp may or may not be locked. allocvp makes no changes
* to dvp.
*
* upperdvp Holds the parent vnode to uppervp, generally used along
* with path component information to create a shadow of
* lowervp when uppervp does not exist.
*
* upperdvp is referenced but unlocked on entry, and will be
* dereferenced on return.
*
* uppervp Holds the new uppervp vnode to be stored in the
* union_node we are allocating. uppervp is referenced but
* not locked, and will be dereferenced on return.
*
* lowervp Holds the new lowervp vnode to be stored in the
* union_node we are allocating. uppervp is referenced but
* not locked, and will be dereferenced on return.
*
* cnp Holds path component information to be coupled with
* lowervp and upperdvp to allow unionfs to create an uppervp
* later on. Only used if lowervp is valid. The conents
* of cnp is only valid for the duration of the call.
*
* docache Determine whether this node should be entered in the
* cache or whether it should be destroyed as soon as possible.
*
* all union_nodes are maintained on a singly-linked
* list. new nodes are only allocated when they cannot
@ -292,12 +329,13 @@ union_newsize(vp, uppersz, lowersz)
* zero references to it and so it needs to removed from
* the vnode free list.
*/
int
union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache)
struct vnode **vpp;
struct mount *mp;
struct vnode *undvp; /* parent union vnode */
struct vnode *dvp; /* may be null */
struct vnode *dvp; /* parent union vnode */
struct vnode *upperdvp; /* parent vnode of uppervp */
struct componentname *cnp; /* may be null */
struct vnode *uppervp; /* may be null */
struct vnode *lowervp; /* may be null */
@ -307,6 +345,7 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
struct union_node *un = 0;
struct vnode *xlowervp = NULLVP;
struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
struct proc *p = (cnp) ? cnp->cn_proc : curproc;
int hash = 0;
int vflag;
int try;
@ -382,65 +421,76 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
if (un) {
/*
* Obtain a lock on the union_node.
* uppervp is locked, though un->un_uppervp
* may not be. this doesn't break the locking
* hierarchy since in the case that un->un_uppervp
* is not yet locked it will be vrele'd and replaced
* with uppervp.
* Obtain a lock on the union_node. Everything is unlocked
* except for dvp, so check that case. If they match, our
* new un is already locked. Otherwise we have to lock our
* new un.
*
* A potential deadlock situation occurs when we are holding
* one lock while trying to get another. We must follow
* strict ordering rules to avoid it. We try to locate dvp
* by scanning up from un_vnode, since the most likely
* scenario is un being under dvp.
*/
if ((dvp != NULLVP) && (uppervp == dvp)) {
/*
* Access ``.'', so (un) will already
* be locked. Since this process has
* the lock on (uppervp) no other
* process can hold the lock on (un).
*/
#ifdef DIAGNOSTIC
if ((un->un_flags & UN_LOCKED) == 0)
panic("union: . not locked");
else if (curproc && un->un_pid != curproc->p_pid &&
un->un_pid > -1 && curproc->p_pid > -1)
panic("union: allocvp not lock owner");
#endif
} else {
if (un->un_flags & UN_LOCKED) {
vrele(UNIONTOV(un));
un->un_flags |= UN_WANT;
(void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0);
goto loop;
}
un->un_flags |= UN_LOCKED;
if (dvp && un->un_vnode != dvp) {
struct vnode *scan = un->un_vnode;
#ifdef DIAGNOSTIC
if (curproc)
un->un_pid = curproc->p_pid;
else
un->un_pid = -1;
#endif
do {
scan = VTOUNION(scan)->un_pvp;
} while (scan && scan->v_tag == VT_UNION && scan != dvp);
if (scan != dvp) {
/*
* our new un is above dvp (we never saw dvp
* while moving up the tree).
*/
VREF(dvp);
VOP_UNLOCK(dvp, 0, p);
error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
vrele(dvp);
} else {
/*
* our new un is under dvp
*/
error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
}
} else if (dvp == NULLVP) {
/*
* dvp is NULL, we need to lock un.
*/
error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
} else {
/*
* dvp == un->un_vnode, we are already locked.
*/
error = 0;
}
/*
* At this point, the union_node is locked,
* un->un_uppervp may not be locked, and uppervp
* is locked or nil.
*/
if (error)
goto loop;
/*
* Save information about the upper layer.
* At this point, the union_node is locked and referenced.
*
* uppervp is locked and referenced or NULL, lowervp is
* referenced or NULL.
*/
UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n",
un, un->un_vnode, un->un_uppervp,
(un->un_uppervp ? un->un_uppervp->v_usecount : -99),
uppervp,
(uppervp ? uppervp->v_usecount : -99)
));
if (uppervp != un->un_uppervp) {
KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount));
union_newupper(un, uppervp);
} else if (uppervp) {
KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount));
vrele(uppervp);
}
if (un->un_uppervp) {
un->un_flags |= UN_ULOCK;
un->un_flags &= ~UN_KLOCK;
}
/*
* Save information about the lower layer.
* This needs to keep track of pathname
@ -456,12 +506,22 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
bcopy(cnp->cn_nameptr, un->un_path,
cnp->cn_namelen);
un->un_path[cnp->cn_namelen] = '\0';
VREF(dvp);
un->un_dirvp = dvp;
}
} else if (lowervp) {
vrele(lowervp);
}
/*
* and upperdvp
*/
if (upperdvp != un->un_dirvp) {
if (un->un_dirvp)
vrele(un->un_dirvp);
un->un_dirvp = upperdvp;
} else if (upperdvp) {
vrele(upperdvp);
}
*vpp = UNIONTOV(un);
return (0);
}
@ -477,17 +537,22 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
goto loop;
}
/*
* Create new node rather then replace old node
*/
error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
if (error) {
if (uppervp) {
if (dvp == uppervp)
vrele(uppervp);
else
vput(uppervp);
}
/*
* If an error occurs clear out vnodes.
*/
if (lowervp)
vrele(lowervp);
if (uppervp)
vrele(uppervp);
if (upperdvp)
vrele(upperdvp);
*vpp = NULL;
goto out;
}
@ -499,37 +564,34 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
(*vpp)->v_type = uppervp->v_type;
else
(*vpp)->v_type = lowervp->v_type;
un = VTOUNION(*vpp);
bzero(un, sizeof(*un));
lockinit(&un->un_lock, PVFS, "unlock", 0, 0);
vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
un->un_vnode = *vpp;
un->un_uppervp = uppervp;
un->un_uppersz = VNOVAL;
un->un_lowervp = lowervp;
un->un_lowersz = VNOVAL;
un->un_pvp = undvp;
if (undvp != NULLVP)
VREF(undvp);
un->un_dirvp = upperdvp;
un->un_pvp = dvp; /* only parent dir in new allocation */
if (dvp != NULLVP)
VREF(dvp);
un->un_dircache = 0;
un->un_openl = 0;
un->un_flags = UN_LOCKED;
if (un->un_uppervp)
un->un_flags |= UN_ULOCK;
#ifdef DIAGNOSTIC
if (curproc)
un->un_pid = curproc->p_pid;
else
un->un_pid = -1;
#endif
if (cnp && (lowervp != NULLVP)) {
un->un_hash = cnp->cn_hash;
un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
un->un_path[cnp->cn_namelen] = '\0';
VREF(dvp);
un->un_dirvp = dvp;
} else {
un->un_hash = 0;
un->un_path = 0;
un->un_dirvp = 0;
un->un_dirvp = NULL;
}
if (docache) {
@ -537,10 +599,10 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
un->un_flags |= UN_CACHED;
}
out:
if (xlowervp)
vrele(xlowervp);
out:
if (docache)
union_list_unlock(hash);
@ -558,16 +620,26 @@ union_freevp(vp)
LIST_REMOVE(un, un_cache);
}
if (un->un_pvp != NULLVP)
if (un->un_pvp != NULLVP) {
vrele(un->un_pvp);
if (un->un_uppervp != NULLVP)
un->un_pvp = NULL;
}
if (un->un_uppervp != NULLVP) {
vrele(un->un_uppervp);
if (un->un_lowervp != NULLVP)
un->un_uppervp = NULL;
}
if (un->un_lowervp != NULLVP) {
vrele(un->un_lowervp);
if (un->un_dirvp != NULLVP)
un->un_lowervp = NULL;
}
if (un->un_dirvp != NULLVP) {
vrele(un->un_dirvp);
if (un->un_path)
un->un_dirvp = NULL;
}
if (un->un_path) {
free(un->un_path, M_TEMP);
un->un_path = NULL;
}
FREE(vp->v_data, M_TEMP);
vp->v_data = 0;
@ -579,6 +651,9 @@ union_freevp(vp)
* copyfile. copy the vnode (fvp) to the vnode (tvp)
* using a sequence of reads and writes. both (fvp)
* and (tvp) are locked on entry and exit.
*
* fvp and tvp are both exclusive locked on call, but their refcount's
* haven't been bumped at all.
*/
static int
union_copyfile(fvp, tvp, cred, p)
@ -600,48 +675,62 @@ union_copyfile(fvp, tvp, cred, p)
* give up at the first sign of trouble.
*/
bzero(&uio, sizeof(uio));
uio.uio_procp = p;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_offset = 0;
VOP_UNLOCK(fvp, 0, p); /* XXX */
VOP_LEASE(fvp, p, cred, LEASE_READ);
vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
VOP_UNLOCK(tvp, 0, p); /* XXX */
VOP_LEASE(tvp, p, cred, LEASE_WRITE);
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
/* ugly loop follows... */
do {
off_t offset = uio.uio_offset;
int count;
int bufoffset;
/*
* Setup for big read
*/
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
iov.iov_base = buf;
iov.iov_len = MAXBSIZE;
uio.uio_resid = iov.iov_len;
uio.uio_rw = UIO_READ;
error = VOP_READ(fvp, &uio, 0, cred);
if (error == 0) {
if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0)
break;
/*
* Get bytes read, handle read eof case and setup for
* write loop
*/
if ((count = MAXBSIZE - uio.uio_resid) == 0)
break;
bufoffset = 0;
/*
* Write until an error occurs or our buffer has been
* exhausted, then update the offset for the next read.
*/
while (bufoffset < count) {
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
iov.iov_base = buf;
iov.iov_len = MAXBSIZE - uio.uio_resid;
uio.uio_offset = offset;
iov.iov_base = buf + bufoffset;
iov.iov_len = count - bufoffset;
uio.uio_offset = offset + bufoffset;
uio.uio_rw = UIO_WRITE;
uio.uio_resid = iov.iov_len;
if (uio.uio_resid == 0)
if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0)
break;
do {
error = VOP_WRITE(tvp, &uio, 0, cred);
} while ((uio.uio_resid > 0) && (error == 0));
bufoffset += (count - bufoffset) - uio.uio_resid;
}
uio.uio_offset = offset + bufoffset;
} while (error == 0);
free(buf, M_TEMP);
@ -649,9 +738,10 @@ union_copyfile(fvp, tvp, cred, p)
}
/*
* (un) is assumed to be locked on entry and remains
* locked on exit.
*
* un's vnode is assumed to be locked on entry and remains locked on exit.
*/
int
union_copyup(un, docopy, cred, p)
struct union_node *un;
@ -676,12 +766,9 @@ union_copyup(un, docopy, cred, p)
if (error)
return (error);
/* at this point, uppervp is locked */
union_newupper(un, uvp);
un->un_flags |= UN_ULOCK;
lvp = un->un_lowervp;
KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
if (docopy) {
/*
* XX - should not ignore errors
@ -689,23 +776,22 @@ union_copyup(un, docopy, cred, p)
*/
vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_OPEN(lvp, FREAD, cred, p);
if (error == 0 && vn_canvmio(lvp) == TRUE)
error = vfs_object_create(lvp, p, cred);
if (error == 0) {
error = union_copyfile(lvp, uvp, cred, p);
VOP_UNLOCK(lvp, 0, p);
(void) VOP_CLOSE(lvp, FREAD, cred, p);
}
#ifdef DEBUG
if (error == 0)
uprintf("union: copied up %s\n", un->un_path);
#endif
UDEBUG(("union: copied up %s\n", un->un_path));
}
un->un_flags &= ~UN_ULOCK;
VOP_UNLOCK(uvp, 0, p);
union_newupper(un, uvp);
KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
union_vn_close(uvp, FWRITE, cred, p);
vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p);
un->un_flags |= UN_ULOCK;
KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
/*
* Subsequent IOs will go to the top layer, so
* call close on the lower vnode and open on the
@ -721,6 +807,8 @@ union_copyup(un, docopy, cred, p)
(void) VOP_CLOSE(lvp, FREAD, cred, p);
(void) VOP_OPEN(uvp, FREAD, cred, p);
}
if (vn_canvmio(uvp) == TRUE)
error = vfs_object_create(uvp, p, cred);
un->un_openl = 0;
}
@ -728,6 +816,17 @@ union_copyup(un, docopy, cred, p)
}
/*
* union_relookup:
*
* dvp should be locked on entry and will be locked on return. No
* net change in the ref count will occur.
*
* If an error is returned, *vpp will be invalid, otherwise it
* will hold a locked, referenced vnode. If *vpp == dvp then
* remember that only one exclusive lock is held.
*/
static int
union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
struct union_mount *um;
@ -757,7 +856,7 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
cn->cn_pnbuf[cn->cn_namelen] = '\0';
cn->cn_nameiop = CREATE;
cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
cn->cn_proc = cnp->cn_proc;
if (um->um_op == UNMNT_ABOVE)
cn->cn_cred = cnp->cn_cred;
@ -768,15 +867,30 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
cn->cn_consume = cnp->cn_consume;
VREF(dvp);
error = relookup(dvp, vpp, cn);
if (!error)
vrele(dvp);
else {
VOP_UNLOCK(dvp, 0, cnp->cn_proc);
/*
* Pass dvp unlocked and referenced on call to relookup().
*
* If an error occurs, dvp will be returned unlocked and dereferenced.
*/
if ((error = relookup(dvp, vpp, cn)) != 0) {
zfree(namei_zone, cn->cn_pnbuf);
cn->cn_pnbuf = NULL;
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc);
return(error);
}
return (error);
/*
* If no error occurs, dvp will be returned locked with the reference
* left as before, and vpp will be returned referenced and locked.
*
* We want to return with dvp as it was passed to us, so we get
* rid of our reference.
*/
vrele(dvp);
return (0);
}
/*
@ -785,11 +899,11 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
*
* (um) points to the union mount structure for access to the
* the mounting process's credentials.
* (dvp) is the directory in which to create the shadow directory.
* it is unlocked on entry and exit.
* (dvp) is the directory in which to create the shadow directory,
* it is locked (but not ref'd) on entry and return.
* (cnp) is the componentname to be created.
* (vpp) is the returned newly created shadow directory, which
* is returned locked.
* is returned locked and ref'd
*/
int
union_mkshadow(um, dvp, cnp, vpp)
@ -810,8 +924,10 @@ union_mkshadow(um, dvp, cnp, vpp)
if (*vpp) {
VOP_ABORTOP(dvp, &cn);
VOP_UNLOCK(dvp, 0, p);
vrele(*vpp);
if (dvp == *vpp)
vrele(*vpp);
else
vput(*vpp);
*vpp = NULLVP;
return (EEXIST);
}
@ -832,7 +948,7 @@ union_mkshadow(um, dvp, cnp, vpp)
VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
error = VOP_MKDIR(dvp, vpp, &cn, &va);
vput(dvp);
/*vput(dvp);*/
return (error);
}
@ -842,7 +958,7 @@ union_mkshadow(um, dvp, cnp, vpp)
* (um) points to the union mount structure for access to the
* the mounting process's credentials.
* (dvp) is the directory in which to create the whiteout.
* it is locked on entry and exit.
* it is locked on entry and return.
* (cnp) is the componentname to be created.
*/
int
@ -857,17 +973,16 @@ union_mkwhiteout(um, dvp, cnp, path)
struct vnode *wvp;
struct componentname cn;
VOP_UNLOCK(dvp, 0, p);
error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
if (error) {
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
if (error)
return (error);
}
if (wvp) {
VOP_ABORTOP(dvp, &cn);
vrele(dvp);
vrele(wvp);
if (wvp == dvp)
vrele(wvp);
else
vput(wvp);
return (EEXIST);
}
@ -877,9 +992,6 @@ union_mkwhiteout(um, dvp, cnp, path)
error = VOP_WHITEOUT(dvp, &cn, CREATE);
if (error)
VOP_ABORTOP(dvp, &cn);
vrele(dvp);
return (error);
}
@ -890,6 +1002,12 @@ union_mkwhiteout(um, dvp, cnp, path)
* the problem with calling namei is that a) it locks too many
* things, and b) it doesn't start at the "right" directory,
* whereas relookup is told where to start.
*
* On entry, the vnode associated with un is locked. It remains locked
* on return.
*
* If no error occurs, *vpp contains a locked referenced vnode for your
* use. If an error occurs *vpp iis undefined.
*/
static int
union_vn_create(vpp, un, p)
@ -921,26 +1039,34 @@ union_vn_create(vpp, un, p)
cn.cn_pnbuf = zalloc(namei_zone);
bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
cn.cn_nameiop = CREATE;
cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
cn.cn_proc = p;
cn.cn_cred = p->p_ucred;
cn.cn_nameptr = cn.cn_pnbuf;
cn.cn_hash = un->un_hash;
cn.cn_consume = 0;
/*
* Pass dvp unlocked and referenced on call to relookup().
*
* If an error occurs, dvp will be returned unlocked and dereferenced.
*/
VREF(un->un_dirvp);
error = relookup(un->un_dirvp, &vp, &cn);
if (error)
return (error);
vrele(un->un_dirvp);
/*
* If no error occurs, dvp will be returned locked with the reference
* left as before, and vpp will be returned referenced and locked.
*/
if (vp) {
vput(un->un_dirvp);
VOP_ABORTOP(un->un_dirvp, &cn);
if (un->un_dirvp == vp)
vrele(un->un_dirvp);
if (vp == un->un_dirvp)
vrele(vp);
else
vput(un->un_dirvp);
vrele(vp);
vput(vp);
return (EEXIST);
}
@ -964,11 +1090,12 @@ union_vn_create(vpp, un, p)
return (error);
error = VOP_OPEN(vp, fmode, cred, p);
if (error == 0 && vn_canvmio(vp) == TRUE)
error = vfs_object_create(vp, p, cred);
if (error) {
vput(vp);
return (error);
}
vp->v_writecount++;
*vpp = vp;
return (0);
@ -987,6 +1114,14 @@ union_vn_close(vp, fmode, cred, p)
return (VOP_CLOSE(vp, fmode, cred, p));
}
#if 0
/*
* union_removed_upper:
*
* called with union_node unlocked. XXX
*/
void
union_removed_upper(un)
struct union_node *un;
@ -999,9 +1134,7 @@ union_removed_upper(un)
* union node will have neither uppervp nor lowervp. We remove
* the union node from cache, so that it will not be referrenced.
*/
#if 0
union_newupper(un, NULLVP);
#endif
if (un->un_dircache != 0) {
for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
vrele(*vpp);
@ -1013,28 +1146,8 @@ union_removed_upper(un)
un->un_flags &= ~UN_CACHED;
LIST_REMOVE(un, un_cache);
}
if (un->un_flags & UN_ULOCK) {
un->un_flags &= ~UN_ULOCK;
VOP_UNLOCK(un->un_uppervp, 0, p);
}
}
#if 0
struct vnode *
union_lowervp(vp)
struct vnode *vp;
{
struct union_node *un = VTOUNION(vp);
if ((un->un_lowervp != NULLVP) &&
(vp->v_type == un->un_lowervp->v_type)) {
if (vget(un->un_lowervp, 0) == 0)
return (un->un_lowervp);
}
return (NULLVP);
}
#endif
/*
@ -1104,13 +1217,12 @@ union_dircache(vp, p)
nvp = NULLVP;
if (dircache == 0) {
if (dircache == NULL) {
cnt = 0;
union_dircache_r(vp, 0, &cnt);
cnt++;
dircache = (struct vnode **)
malloc(cnt * sizeof(struct vnode *),
M_TEMP, M_WAITOK);
dircache = malloc(cnt * sizeof(struct vnode *),
M_TEMP, M_WAITOK);
vpp = dircache;
union_dircache_r(vp, &vpp, &cnt);
*vpp = NULLVP;
@ -1126,9 +1238,11 @@ union_dircache(vp, p)
if (*vpp == NULLVP)
goto out;
vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
/*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/
UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99)));
VREF(*vpp);
error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0);
UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99)));
if (error)
goto out;
@ -1141,6 +1255,40 @@ union_dircache(vp, p)
return (nvp);
}
/*
* Guarentee coherency with the VM cache by invalidating any clean VM pages
* associated with this write and updating any dirty VM pages. Since our
* vnode is locked, other processes will not be able to read the pages in
* again until after our write completes.
*
* We also have to be coherent with reads, by flushing any pending dirty
* pages prior to issuing the read.
*
* XXX this is somewhat of a hack at the moment. To support this properly
* we would have to be able to run VOP_READ and VOP_WRITE through the VM
* cache. Then we wouldn't need to worry about coherency.
*/
void
union_vm_coherency(struct vnode *vp, struct uio *uio, int cleanfls)
{
vm_object_t object;
vm_pindex_t pstart;
vm_pindex_t pend;
int pgoff;
if ((object = vp->v_object) == NULL)
return;
pgoff = uio->uio_offset & PAGE_MASK;
pstart = uio->uio_offset / PAGE_SIZE;
pend = pstart + (uio->uio_resid + pgoff + PAGE_MASK) / PAGE_SIZE;
vm_object_page_clean(object, pstart, pend, OBJPC_SYNC);
if (cleanfls)
vm_object_page_remove(object, pstart, pend, TRUE);
}
/*
* Module glue to remove #ifdef UNION from vfs_syscalls.c
*/
@ -1169,6 +1317,8 @@ union_dircheck(struct proc *p, struct vnode **vp, struct file *fp)
if (lvp != NULLVP) {
error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
if (error == 0 && vn_canvmio(lvp) == TRUE)
error = vfs_object_create(lvp, p, fp->f_cred);
if (error) {
vput(lvp);
return (error);
@ -1201,9 +1351,11 @@ union_modevent(module_t mod, int type, void *data)
}
return 0;
}
static moduledata_t union_mod = {
"union_dircheck",
union_modevent,
NULL
};
DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);

View File

@ -85,9 +85,7 @@ union_mount(mp, path, data, ndp, p)
int len;
u_int size;
#ifdef DEBUG
printf("union_mount(mp = %p)\n", (void *)mp);
#endif
UDEBUG(("union_mount(mp = %p)\n", (void *)mp));
/*
* Disable clustered write, otherwise system becomes unstable.
@ -114,24 +112,35 @@ union_mount(mp, path, data, ndp, p)
if (error)
goto bad;
/*
* Obtain lower vnode. Vnode is stored in mp->mnt_vnodecovered.
* We need to reference it but not lock it.
*/
lowerrootvp = mp->mnt_vnodecovered;
VREF(lowerrootvp);
#if 0
/*
* Unlock lower node to avoid deadlock.
*/
if (lowerrootvp->v_op == union_vnodeop_p)
VOP_UNLOCK(lowerrootvp, 0, p);
#endif
/*
* Find upper node.
* Obtain upper vnode by calling namei() on the path. The
* upperrootvp will be turned referenced but not locked.
*/
NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
UIO_USERSPACE, args.target, p);
error = namei(ndp);
#if 0
if (lowerrootvp->v_op == union_vnodeop_p)
vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p);
#endif
if (error)
goto bad;
@ -139,8 +148,11 @@ union_mount(mp, path, data, ndp, p)
vrele(ndp->ni_dvp);
ndp->ni_dvp = NULL;
UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp, VOP_ISLOCKED(upperrootvp)));
/*
* Check multi union mount to avoid `lock myself again' panic.
* Also require that it be a directory.
*/
if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) {
#ifdef DIAGNOSTIC
@ -155,35 +167,43 @@ union_mount(mp, path, data, ndp, p)
goto bad;
}
um = (struct union_mount *) malloc(sizeof(struct union_mount),
M_UNIONFSMNT, M_WAITOK); /* XXX */
/*
* Keep a held reference to the target vnodes.
* They are vrele'd in union_unmount.
*
* Depending on the _BELOW flag, the filesystems are
* viewed in a different order. In effect, this is the
* same as providing a mount under option to the mount syscall.
* Allocate our union_mount structure and populate the fields.
* The vnode references are stored in the union_mount as held,
* unlocked references. Depending on the _BELOW flag, the
* filesystems are viewed in a different order. In effect this
* is the same as providing a mount-under option to the mount
* syscall.
*/
um = (struct union_mount *) malloc(sizeof(struct union_mount),
M_UNIONFSMNT, M_WAITOK);
bzero(um, sizeof(struct union_mount));
um->um_op = args.mntflags & UNMNT_OPMASK;
switch (um->um_op) {
case UNMNT_ABOVE:
um->um_lowervp = lowerrootvp;
um->um_uppervp = upperrootvp;
upperrootvp = NULL;
lowerrootvp = NULL;
break;
case UNMNT_BELOW:
um->um_lowervp = upperrootvp;
um->um_uppervp = lowerrootvp;
upperrootvp = NULL;
lowerrootvp = NULL;
break;
case UNMNT_REPLACE:
vrele(lowerrootvp);
lowerrootvp = NULLVP;
lowerrootvp = NULL;
um->um_uppervp = upperrootvp;
um->um_lowervp = lowerrootvp;
upperrootvp = NULL;
break;
default:
@ -196,7 +216,7 @@ union_mount(mp, path, data, ndp, p)
* supports whiteout operations
*/
if ((mp->mnt_flag & MNT_RDONLY) == 0) {
error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP);
error = VOP_WHITEOUT(um->um_uppervp, NULL, LOOKUP);
if (error)
goto bad;
}
@ -258,15 +278,19 @@ union_mount(mp, path, data, ndp, p)
(void)union_statfs(mp, &mp->mnt_stat, p);
#ifdef DEBUG
printf("union_mount: from %s, on %s\n",
mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
#endif
UDEBUG(("union_mount: from %s, on %s\n",
mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname));
return (0);
bad:
if (um)
if (um) {
if (um->um_uppervp)
vrele(um->um_uppervp);
if (um->um_lowervp)
vrele(um->um_lowervp);
/* XXX other fields */
free(um, M_UNIONFSMNT);
}
if (cred)
crfree(cred);
if (upperrootvp)
@ -291,9 +315,7 @@ union_unmount(mp, mntflags, p)
int freeing;
int flags = 0;
#ifdef DEBUG
printf("union_unmount(mp = %p)\n", (void *)mp);
#endif
UDEBUG(("union_unmount(mp = %p)\n", (void *)mp));
if (mntflags & MNT_FORCE)
flags |= FORCECLOSE;
@ -365,55 +387,25 @@ union_root(mp, vpp)
struct mount *mp;
struct vnode **vpp;
{
struct proc *p = curproc; /* XXX */
struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
int error;
int loselock;
int lockadj = 0;
if (um->um_lowervp && um->um_op != UNMNT_BELOW &&
VOP_ISLOCKED(um->um_lowervp)) {
VREF(um->um_lowervp);
VOP_UNLOCK(um->um_lowervp, 0, p);
lockadj = 1;
}
/*
* Return locked reference to root.
* Supply an unlocked reference to um_uppervp and to um_lowervp. It
* is possible for um_uppervp to be locked without the associated
* root union_node being locked. We let union_allocvp() deal with
* it.
*/
UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp)));
VREF(um->um_uppervp);
if ((um->um_op == UNMNT_BELOW) &&
VOP_ISLOCKED(um->um_uppervp)) {
loselock = 1;
} else {
vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p);
loselock = 0;
}
if (um->um_lowervp)
VREF(um->um_lowervp);
error = union_allocvp(vpp, mp,
(struct vnode *) 0,
(struct vnode *) 0,
(struct componentname *) 0,
um->um_uppervp,
um->um_lowervp,
1);
if (error) {
if (loselock)
vrele(um->um_uppervp);
else
vput(um->um_uppervp);
if (um->um_lowervp)
vrele(um->um_lowervp);
} else {
if (loselock)
VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
}
if (lockadj) {
vn_lock(um->um_lowervp, LK_EXCLUSIVE | LK_RETRY, p);
vrele(um->um_lowervp);
}
error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL,
um->um_uppervp, um->um_lowervp, 1);
UDEBUG(("error %d\n", error));
UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp)));
return (error);
}
@ -429,10 +421,8 @@ union_statfs(mp, sbp, p)
struct statfs mstat;
int lbsize;
#ifdef DEBUG
printf("union_statfs(mp = %p, lvp = %p, uvp = %p)\n",
(void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp);
#endif
UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n",
(void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp));
bzero(&mstat, sizeof(mstat));

File diff suppressed because it is too large Load Diff

View File

@ -49,8 +49,8 @@ struct union_args {
#define UNMNT_OPMASK 0x0003
struct union_mount {
struct vnode *um_uppervp;
struct vnode *um_lowervp;
struct vnode *um_uppervp; /* UN_ULOCK holds locking state */
struct vnode *um_lowervp; /* Left unlocked */
struct ucred *um_cred; /* Credentials of user calling mount */
int um_cmode; /* cmask from mount process */
int um_op; /* Operation mode */
@ -58,6 +58,10 @@ struct union_mount {
#ifdef KERNEL
#ifndef DIAGNOSTIC
#define DIAGNOSTIC
#endif
/*
* DEFDIRMODE is the mode bits used to create a shadow directory.
*/
@ -67,9 +71,14 @@ struct union_mount {
#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
/*
* A cache of vnode references
* A cache of vnode references (hangs off v_data)
*
* Placing un_lock as the first elements theoretically allows us to
* use the vop_stdlock functions. However, we need to make sure of
* certain side effects so we will still punch in our own code.
*/
struct union_node {
struct lock un_lock;
LIST_ENTRY(union_node) un_cache; /* Hash chain */
struct vnode *un_vnode; /* Back pointer */
struct vnode *un_uppervp; /* overlaying object */
@ -79,6 +88,7 @@ struct union_node {
char *un_path; /* saved component name */
int un_hash; /* saved un_path hash value */
int un_openl; /* # of opens on lowervp */
int un_exclcnt; /* exclusive count */
unsigned int un_flags;
struct vnode **un_dircache; /* cached union stack */
off_t un_uppersz; /* size of upper object */
@ -88,14 +98,25 @@ struct union_node {
#endif
};
#define UN_WANT 0x01
#define UN_LOCKED 0x02
#define UN_ULOCK 0x04 /* Upper node is locked */
#define UN_KLOCK 0x08 /* Keep upper node locked on vput */
#define UN_CACHED 0x10 /* In union cache */
/*
* XXX UN_ULOCK - indicates that the uppervp is locked
*
* UN_CACHED - node is in the union cache
*/
/*#define UN_ULOCK 0x04*/ /* Upper node is locked */
#define UN_CACHED 0x10 /* In union cache */
/*
* Hash table locking flags
*/
#define UNVP_WANT 0x01
#define UNVP_LOCKED 0x02
extern int union_allocvp __P((struct vnode **, struct mount *,
struct vnode *, struct vnode *,
struct vnode *,
struct vnode *,
struct componentname *, struct vnode *,
struct vnode *, int));
extern int union_freevp __P((struct vnode *));
@ -113,6 +134,7 @@ extern int union_cn_close __P((struct vnode *, int, struct ucred *,
extern void union_removed_upper __P((struct union_node *un));
extern struct vnode *union_lowervp __P((struct vnode *));
extern void union_newsize __P((struct vnode *, off_t, off_t));
extern void union_vm_coherency __P((struct vnode *, struct uio *, int));
extern int (*union_dircheckp) __P((struct proc *, struct vnode **,
struct file *));
@ -124,6 +146,11 @@ extern int (*union_dircheckp) __P((struct proc *, struct vnode **,
#define UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
#define UDEBUG(x) if (uniondebug) printf x
#define UDEBUG_ENABLED 1
extern vop_t **union_vnodeop_p;
extern struct vfsops union_vfsops;
extern int uniondebug;
#endif /* KERNEL */

View File

@ -53,6 +53,7 @@
#include <vm/vm.h>
#include <vm/vm_extern.h> /* for vnode_pager_setsize */
#include <vm/vm_zone.h>
#include <vm/vm_object.h> /* for vm cache coherency */
#include <miscfs/union/union.h>
#include <sys/proc.h>
@ -97,7 +98,7 @@ union_init()
for (i = 0; i < NHASH; i++)
LIST_INIT(&unhead[i]);
bzero((caddr_t) unvplock, sizeof(unvplock));
bzero((caddr_t)unvplock, sizeof(unvplock));
return (0);
}
@ -105,15 +106,12 @@ static int
union_list_lock(ix)
int ix;
{
if (unvplock[ix] & UN_LOCKED) {
unvplock[ix] |= UN_WANT;
if (unvplock[ix] & UNVP_LOCKED) {
unvplock[ix] |= UNVP_WANT;
(void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0);
return (1);
}
unvplock[ix] |= UN_LOCKED;
unvplock[ix] |= UNVP_LOCKED;
return (0);
}
@ -121,15 +119,25 @@ static void
union_list_unlock(ix)
int ix;
{
unvplock[ix] &= ~UNVP_LOCKED;
unvplock[ix] &= ~UN_LOCKED;
if (unvplock[ix] & UN_WANT) {
unvplock[ix] &= ~UN_WANT;
if (unvplock[ix] & UNVP_WANT) {
unvplock[ix] &= ~UNVP_WANT;
wakeup((caddr_t) &unvplock[ix]);
}
}
/*
* union_updatevp:
*
* The uppervp, if not NULL, must be referenced and not locked by us
* The lowervp, if not NULL, must be referenced.
*
* if uppervp and lowervp match pointers already installed, nothing
* happens. The passed vp's (when matching) are not adjusted. This
* routine may only be called by union_newupper() and union_newlower().
*/
static void
union_updatevp(un, uppervp, lowervp)
struct union_node *un;
@ -153,9 +161,10 @@ union_updatevp(un, uppervp, lowervp)
uhash = nhash;
}
if (lhash != uhash)
if (lhash != uhash) {
while (union_list_lock(lhash))
continue;
}
while (union_list_lock(uhash))
continue;
@ -177,10 +186,6 @@ union_updatevp(un, uppervp, lowervp)
free(un->un_path, M_TEMP);
un->un_path = 0;
}
if (un->un_dirvp) {
vrele(un->un_dirvp);
un->un_dirvp = NULLVP;
}
}
un->un_lowervp = lowervp;
un->un_lowersz = VNOVAL;
@ -189,7 +194,6 @@ union_updatevp(un, uppervp, lowervp)
if (un->un_uppervp != uppervp) {
if (un->un_uppervp)
vrele(un->un_uppervp);
un->un_uppervp = uppervp;
un->un_uppersz = VNOVAL;
}
@ -202,21 +206,30 @@ union_updatevp(un, uppervp, lowervp)
union_list_unlock(nhash);
}
/*
* Set a new lowervp. The passed lowervp must be referenced and will be
* stored in the vp in a referenced state.
*/
static void
union_newlower(un, lowervp)
struct union_node *un;
struct vnode *lowervp;
{
union_updatevp(un, un->un_uppervp, lowervp);
}
/*
* Set a new uppervp. The passed uppervp must be locked and will be
* stored in the vp in a locked state. The caller should not unlock
* uppervp.
*/
static void
union_newupper(un, uppervp)
struct union_node *un;
struct vnode *uppervp;
{
union_updatevp(un, uppervp, un->un_lowervp);
}
@ -253,27 +266,51 @@ union_newsize(vp, uppersz, lowersz)
}
if (sz != VNOVAL) {
#ifdef DEBUG
printf("union: %s size now %ld\n",
uppersz != VNOVAL ? "upper" : "lower", (long) sz);
#endif
UDEBUG(("union: %s size now %ld\n",
(uppersz != VNOVAL ? "upper" : "lower"), (long)sz));
vnode_pager_setsize(vp, sz);
}
}
/*
* allocate a union_node/vnode pair. the vnode is
* referenced and locked. the new vnode is returned
* via (vpp). (mp) is the mountpoint of the union filesystem,
* (dvp) is the parent directory where the upper layer object
* should exist (but doesn't) and (cnp) is the componentname
* information which is partially copied to allow the upper
* layer object to be created at a later time. (uppervp)
* and (lowervp) reference the upper and lower layer objects
* being mapped. either, but not both, can be nil.
* if supplied, (uppervp) is locked.
* the reference is either maintained in the new union_node
* object which is allocated, or they are vrele'd.
* union_allocvp: allocate a union_node and associate it with a
* parent union_node and one or two vnodes.
*
* vpp Holds the returned vnode locked and referenced if no
* error occurs.
*
* mp Holds the mount point. mp may or may not be busied.
* allocvp makes no changes to mp.
*
* dvp Holds the parent union_node to the one we wish to create.
* XXX may only be used to traverse an uncopied lowervp-based
* tree? XXX
*
* dvp may or may not be locked. allocvp makes no changes
* to dvp.
*
* upperdvp Holds the parent vnode to uppervp, generally used along
* with path component information to create a shadow of
* lowervp when uppervp does not exist.
*
* upperdvp is referenced but unlocked on entry, and will be
* dereferenced on return.
*
* uppervp Holds the new uppervp vnode to be stored in the
* union_node we are allocating. uppervp is referenced but
* not locked, and will be dereferenced on return.
*
* lowervp Holds the new lowervp vnode to be stored in the
* union_node we are allocating. uppervp is referenced but
* not locked, and will be dereferenced on return.
*
* cnp Holds path component information to be coupled with
* lowervp and upperdvp to allow unionfs to create an uppervp
* later on. Only used if lowervp is valid. The conents
* of cnp is only valid for the duration of the call.
*
* docache Determine whether this node should be entered in the
* cache or whether it should be destroyed as soon as possible.
*
* all union_nodes are maintained on a singly-linked
* list. new nodes are only allocated when they cannot
@ -292,12 +329,13 @@ union_newsize(vp, uppersz, lowersz)
* zero references to it and so it needs to removed from
* the vnode free list.
*/
int
union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache)
struct vnode **vpp;
struct mount *mp;
struct vnode *undvp; /* parent union vnode */
struct vnode *dvp; /* may be null */
struct vnode *dvp; /* parent union vnode */
struct vnode *upperdvp; /* parent vnode of uppervp */
struct componentname *cnp; /* may be null */
struct vnode *uppervp; /* may be null */
struct vnode *lowervp; /* may be null */
@ -307,6 +345,7 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
struct union_node *un = 0;
struct vnode *xlowervp = NULLVP;
struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
struct proc *p = (cnp) ? cnp->cn_proc : curproc;
int hash = 0;
int vflag;
int try;
@ -382,65 +421,76 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
if (un) {
/*
* Obtain a lock on the union_node.
* uppervp is locked, though un->un_uppervp
* may not be. this doesn't break the locking
* hierarchy since in the case that un->un_uppervp
* is not yet locked it will be vrele'd and replaced
* with uppervp.
* Obtain a lock on the union_node. Everything is unlocked
* except for dvp, so check that case. If they match, our
* new un is already locked. Otherwise we have to lock our
* new un.
*
* A potential deadlock situation occurs when we are holding
* one lock while trying to get another. We must follow
* strict ordering rules to avoid it. We try to locate dvp
* by scanning up from un_vnode, since the most likely
* scenario is un being under dvp.
*/
if ((dvp != NULLVP) && (uppervp == dvp)) {
/*
* Access ``.'', so (un) will already
* be locked. Since this process has
* the lock on (uppervp) no other
* process can hold the lock on (un).
*/
#ifdef DIAGNOSTIC
if ((un->un_flags & UN_LOCKED) == 0)
panic("union: . not locked");
else if (curproc && un->un_pid != curproc->p_pid &&
un->un_pid > -1 && curproc->p_pid > -1)
panic("union: allocvp not lock owner");
#endif
} else {
if (un->un_flags & UN_LOCKED) {
vrele(UNIONTOV(un));
un->un_flags |= UN_WANT;
(void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0);
goto loop;
}
un->un_flags |= UN_LOCKED;
if (dvp && un->un_vnode != dvp) {
struct vnode *scan = un->un_vnode;
#ifdef DIAGNOSTIC
if (curproc)
un->un_pid = curproc->p_pid;
else
un->un_pid = -1;
#endif
do {
scan = VTOUNION(scan)->un_pvp;
} while (scan && scan->v_tag == VT_UNION && scan != dvp);
if (scan != dvp) {
/*
* our new un is above dvp (we never saw dvp
* while moving up the tree).
*/
VREF(dvp);
VOP_UNLOCK(dvp, 0, p);
error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
vrele(dvp);
} else {
/*
* our new un is under dvp
*/
error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
}
} else if (dvp == NULLVP) {
/*
* dvp is NULL, we need to lock un.
*/
error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p);
} else {
/*
* dvp == un->un_vnode, we are already locked.
*/
error = 0;
}
/*
* At this point, the union_node is locked,
* un->un_uppervp may not be locked, and uppervp
* is locked or nil.
*/
if (error)
goto loop;
/*
* Save information about the upper layer.
* At this point, the union_node is locked and referenced.
*
* uppervp is locked and referenced or NULL, lowervp is
* referenced or NULL.
*/
UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n",
un, un->un_vnode, un->un_uppervp,
(un->un_uppervp ? un->un_uppervp->v_usecount : -99),
uppervp,
(uppervp ? uppervp->v_usecount : -99)
));
if (uppervp != un->un_uppervp) {
KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount));
union_newupper(un, uppervp);
} else if (uppervp) {
KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount));
vrele(uppervp);
}
if (un->un_uppervp) {
un->un_flags |= UN_ULOCK;
un->un_flags &= ~UN_KLOCK;
}
/*
* Save information about the lower layer.
* This needs to keep track of pathname
@ -456,12 +506,22 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
bcopy(cnp->cn_nameptr, un->un_path,
cnp->cn_namelen);
un->un_path[cnp->cn_namelen] = '\0';
VREF(dvp);
un->un_dirvp = dvp;
}
} else if (lowervp) {
vrele(lowervp);
}
/*
* and upperdvp
*/
if (upperdvp != un->un_dirvp) {
if (un->un_dirvp)
vrele(un->un_dirvp);
un->un_dirvp = upperdvp;
} else if (upperdvp) {
vrele(upperdvp);
}
*vpp = UNIONTOV(un);
return (0);
}
@ -477,17 +537,22 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
goto loop;
}
/*
* Create new node rather then replace old node
*/
error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
if (error) {
if (uppervp) {
if (dvp == uppervp)
vrele(uppervp);
else
vput(uppervp);
}
/*
* If an error occurs clear out vnodes.
*/
if (lowervp)
vrele(lowervp);
if (uppervp)
vrele(uppervp);
if (upperdvp)
vrele(upperdvp);
*vpp = NULL;
goto out;
}
@ -499,37 +564,34 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
(*vpp)->v_type = uppervp->v_type;
else
(*vpp)->v_type = lowervp->v_type;
un = VTOUNION(*vpp);
bzero(un, sizeof(*un));
lockinit(&un->un_lock, PVFS, "unlock", 0, 0);
vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
un->un_vnode = *vpp;
un->un_uppervp = uppervp;
un->un_uppersz = VNOVAL;
un->un_lowervp = lowervp;
un->un_lowersz = VNOVAL;
un->un_pvp = undvp;
if (undvp != NULLVP)
VREF(undvp);
un->un_dirvp = upperdvp;
un->un_pvp = dvp; /* only parent dir in new allocation */
if (dvp != NULLVP)
VREF(dvp);
un->un_dircache = 0;
un->un_openl = 0;
un->un_flags = UN_LOCKED;
if (un->un_uppervp)
un->un_flags |= UN_ULOCK;
#ifdef DIAGNOSTIC
if (curproc)
un->un_pid = curproc->p_pid;
else
un->un_pid = -1;
#endif
if (cnp && (lowervp != NULLVP)) {
un->un_hash = cnp->cn_hash;
un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
un->un_path[cnp->cn_namelen] = '\0';
VREF(dvp);
un->un_dirvp = dvp;
} else {
un->un_hash = 0;
un->un_path = 0;
un->un_dirvp = 0;
un->un_dirvp = NULL;
}
if (docache) {
@ -537,10 +599,10 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
un->un_flags |= UN_CACHED;
}
out:
if (xlowervp)
vrele(xlowervp);
out:
if (docache)
union_list_unlock(hash);
@ -558,16 +620,26 @@ union_freevp(vp)
LIST_REMOVE(un, un_cache);
}
if (un->un_pvp != NULLVP)
if (un->un_pvp != NULLVP) {
vrele(un->un_pvp);
if (un->un_uppervp != NULLVP)
un->un_pvp = NULL;
}
if (un->un_uppervp != NULLVP) {
vrele(un->un_uppervp);
if (un->un_lowervp != NULLVP)
un->un_uppervp = NULL;
}
if (un->un_lowervp != NULLVP) {
vrele(un->un_lowervp);
if (un->un_dirvp != NULLVP)
un->un_lowervp = NULL;
}
if (un->un_dirvp != NULLVP) {
vrele(un->un_dirvp);
if (un->un_path)
un->un_dirvp = NULL;
}
if (un->un_path) {
free(un->un_path, M_TEMP);
un->un_path = NULL;
}
FREE(vp->v_data, M_TEMP);
vp->v_data = 0;
@ -579,6 +651,9 @@ union_freevp(vp)
* copyfile. copy the vnode (fvp) to the vnode (tvp)
* using a sequence of reads and writes. both (fvp)
* and (tvp) are locked on entry and exit.
*
* fvp and tvp are both exclusive locked on call, but their refcount's
* haven't been bumped at all.
*/
static int
union_copyfile(fvp, tvp, cred, p)
@ -600,48 +675,62 @@ union_copyfile(fvp, tvp, cred, p)
* give up at the first sign of trouble.
*/
bzero(&uio, sizeof(uio));
uio.uio_procp = p;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_offset = 0;
VOP_UNLOCK(fvp, 0, p); /* XXX */
VOP_LEASE(fvp, p, cred, LEASE_READ);
vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
VOP_UNLOCK(tvp, 0, p); /* XXX */
VOP_LEASE(tvp, p, cred, LEASE_WRITE);
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
/* ugly loop follows... */
do {
off_t offset = uio.uio_offset;
int count;
int bufoffset;
/*
* Setup for big read
*/
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
iov.iov_base = buf;
iov.iov_len = MAXBSIZE;
uio.uio_resid = iov.iov_len;
uio.uio_rw = UIO_READ;
error = VOP_READ(fvp, &uio, 0, cred);
if (error == 0) {
if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0)
break;
/*
* Get bytes read, handle read eof case and setup for
* write loop
*/
if ((count = MAXBSIZE - uio.uio_resid) == 0)
break;
bufoffset = 0;
/*
* Write until an error occurs or our buffer has been
* exhausted, then update the offset for the next read.
*/
while (bufoffset < count) {
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
iov.iov_base = buf;
iov.iov_len = MAXBSIZE - uio.uio_resid;
uio.uio_offset = offset;
iov.iov_base = buf + bufoffset;
iov.iov_len = count - bufoffset;
uio.uio_offset = offset + bufoffset;
uio.uio_rw = UIO_WRITE;
uio.uio_resid = iov.iov_len;
if (uio.uio_resid == 0)
if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0)
break;
do {
error = VOP_WRITE(tvp, &uio, 0, cred);
} while ((uio.uio_resid > 0) && (error == 0));
bufoffset += (count - bufoffset) - uio.uio_resid;
}
uio.uio_offset = offset + bufoffset;
} while (error == 0);
free(buf, M_TEMP);
@ -649,9 +738,10 @@ union_copyfile(fvp, tvp, cred, p)
}
/*
* (un) is assumed to be locked on entry and remains
* locked on exit.
*
* un's vnode is assumed to be locked on entry and remains locked on exit.
*/
int
union_copyup(un, docopy, cred, p)
struct union_node *un;
@ -676,12 +766,9 @@ union_copyup(un, docopy, cred, p)
if (error)
return (error);
/* at this point, uppervp is locked */
union_newupper(un, uvp);
un->un_flags |= UN_ULOCK;
lvp = un->un_lowervp;
KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
if (docopy) {
/*
* XX - should not ignore errors
@ -689,23 +776,22 @@ union_copyup(un, docopy, cred, p)
*/
vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_OPEN(lvp, FREAD, cred, p);
if (error == 0 && vn_canvmio(lvp) == TRUE)
error = vfs_object_create(lvp, p, cred);
if (error == 0) {
error = union_copyfile(lvp, uvp, cred, p);
VOP_UNLOCK(lvp, 0, p);
(void) VOP_CLOSE(lvp, FREAD, cred, p);
}
#ifdef DEBUG
if (error == 0)
uprintf("union: copied up %s\n", un->un_path);
#endif
UDEBUG(("union: copied up %s\n", un->un_path));
}
un->un_flags &= ~UN_ULOCK;
VOP_UNLOCK(uvp, 0, p);
union_newupper(un, uvp);
KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
union_vn_close(uvp, FWRITE, cred, p);
vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p);
un->un_flags |= UN_ULOCK;
KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
/*
* Subsequent IOs will go to the top layer, so
* call close on the lower vnode and open on the
@ -721,6 +807,8 @@ union_copyup(un, docopy, cred, p)
(void) VOP_CLOSE(lvp, FREAD, cred, p);
(void) VOP_OPEN(uvp, FREAD, cred, p);
}
if (vn_canvmio(uvp) == TRUE)
error = vfs_object_create(uvp, p, cred);
un->un_openl = 0;
}
@ -728,6 +816,17 @@ union_copyup(un, docopy, cred, p)
}
/*
* union_relookup:
*
* dvp should be locked on entry and will be locked on return. No
* net change in the ref count will occur.
*
* If an error is returned, *vpp will be invalid, otherwise it
* will hold a locked, referenced vnode. If *vpp == dvp then
* remember that only one exclusive lock is held.
*/
static int
union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
struct union_mount *um;
@ -757,7 +856,7 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
cn->cn_pnbuf[cn->cn_namelen] = '\0';
cn->cn_nameiop = CREATE;
cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
cn->cn_proc = cnp->cn_proc;
if (um->um_op == UNMNT_ABOVE)
cn->cn_cred = cnp->cn_cred;
@ -768,15 +867,30 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
cn->cn_consume = cnp->cn_consume;
VREF(dvp);
error = relookup(dvp, vpp, cn);
if (!error)
vrele(dvp);
else {
VOP_UNLOCK(dvp, 0, cnp->cn_proc);
/*
* Pass dvp unlocked and referenced on call to relookup().
*
* If an error occurs, dvp will be returned unlocked and dereferenced.
*/
if ((error = relookup(dvp, vpp, cn)) != 0) {
zfree(namei_zone, cn->cn_pnbuf);
cn->cn_pnbuf = NULL;
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc);
return(error);
}
return (error);
/*
* If no error occurs, dvp will be returned locked with the reference
* left as before, and vpp will be returned referenced and locked.
*
* We want to return with dvp as it was passed to us, so we get
* rid of our reference.
*/
vrele(dvp);
return (0);
}
/*
@ -785,11 +899,11 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
*
* (um) points to the union mount structure for access to the
* the mounting process's credentials.
* (dvp) is the directory in which to create the shadow directory.
* it is unlocked on entry and exit.
* (dvp) is the directory in which to create the shadow directory,
* it is locked (but not ref'd) on entry and return.
* (cnp) is the componentname to be created.
* (vpp) is the returned newly created shadow directory, which
* is returned locked.
* is returned locked and ref'd
*/
int
union_mkshadow(um, dvp, cnp, vpp)
@ -810,8 +924,10 @@ union_mkshadow(um, dvp, cnp, vpp)
if (*vpp) {
VOP_ABORTOP(dvp, &cn);
VOP_UNLOCK(dvp, 0, p);
vrele(*vpp);
if (dvp == *vpp)
vrele(*vpp);
else
vput(*vpp);
*vpp = NULLVP;
return (EEXIST);
}
@ -832,7 +948,7 @@ union_mkshadow(um, dvp, cnp, vpp)
VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
error = VOP_MKDIR(dvp, vpp, &cn, &va);
vput(dvp);
/*vput(dvp);*/
return (error);
}
@ -842,7 +958,7 @@ union_mkshadow(um, dvp, cnp, vpp)
* (um) points to the union mount structure for access to the
* the mounting process's credentials.
* (dvp) is the directory in which to create the whiteout.
* it is locked on entry and exit.
* it is locked on entry and return.
* (cnp) is the componentname to be created.
*/
int
@ -857,17 +973,16 @@ union_mkwhiteout(um, dvp, cnp, path)
struct vnode *wvp;
struct componentname cn;
VOP_UNLOCK(dvp, 0, p);
error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
if (error) {
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
if (error)
return (error);
}
if (wvp) {
VOP_ABORTOP(dvp, &cn);
vrele(dvp);
vrele(wvp);
if (wvp == dvp)
vrele(wvp);
else
vput(wvp);
return (EEXIST);
}
@ -877,9 +992,6 @@ union_mkwhiteout(um, dvp, cnp, path)
error = VOP_WHITEOUT(dvp, &cn, CREATE);
if (error)
VOP_ABORTOP(dvp, &cn);
vrele(dvp);
return (error);
}
@ -890,6 +1002,12 @@ union_mkwhiteout(um, dvp, cnp, path)
* the problem with calling namei is that a) it locks too many
* things, and b) it doesn't start at the "right" directory,
* whereas relookup is told where to start.
*
* On entry, the vnode associated with un is locked. It remains locked
* on return.
*
* If no error occurs, *vpp contains a locked referenced vnode for your
* use. If an error occurs *vpp iis undefined.
*/
static int
union_vn_create(vpp, un, p)
@ -921,26 +1039,34 @@ union_vn_create(vpp, un, p)
cn.cn_pnbuf = zalloc(namei_zone);
bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
cn.cn_nameiop = CREATE;
cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
cn.cn_proc = p;
cn.cn_cred = p->p_ucred;
cn.cn_nameptr = cn.cn_pnbuf;
cn.cn_hash = un->un_hash;
cn.cn_consume = 0;
/*
* Pass dvp unlocked and referenced on call to relookup().
*
* If an error occurs, dvp will be returned unlocked and dereferenced.
*/
VREF(un->un_dirvp);
error = relookup(un->un_dirvp, &vp, &cn);
if (error)
return (error);
vrele(un->un_dirvp);
/*
* If no error occurs, dvp will be returned locked with the reference
* left as before, and vpp will be returned referenced and locked.
*/
if (vp) {
vput(un->un_dirvp);
VOP_ABORTOP(un->un_dirvp, &cn);
if (un->un_dirvp == vp)
vrele(un->un_dirvp);
if (vp == un->un_dirvp)
vrele(vp);
else
vput(un->un_dirvp);
vrele(vp);
vput(vp);
return (EEXIST);
}
@ -964,11 +1090,12 @@ union_vn_create(vpp, un, p)
return (error);
error = VOP_OPEN(vp, fmode, cred, p);
if (error == 0 && vn_canvmio(vp) == TRUE)
error = vfs_object_create(vp, p, cred);
if (error) {
vput(vp);
return (error);
}
vp->v_writecount++;
*vpp = vp;
return (0);
@ -987,6 +1114,14 @@ union_vn_close(vp, fmode, cred, p)
return (VOP_CLOSE(vp, fmode, cred, p));
}
#if 0
/*
* union_removed_upper:
*
* called with union_node unlocked. XXX
*/
void
union_removed_upper(un)
struct union_node *un;
@ -999,9 +1134,7 @@ union_removed_upper(un)
* union node will have neither uppervp nor lowervp. We remove
* the union node from cache, so that it will not be referrenced.
*/
#if 0
union_newupper(un, NULLVP);
#endif
if (un->un_dircache != 0) {
for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
vrele(*vpp);
@ -1013,28 +1146,8 @@ union_removed_upper(un)
un->un_flags &= ~UN_CACHED;
LIST_REMOVE(un, un_cache);
}
if (un->un_flags & UN_ULOCK) {
un->un_flags &= ~UN_ULOCK;
VOP_UNLOCK(un->un_uppervp, 0, p);
}
}
#if 0
struct vnode *
union_lowervp(vp)
struct vnode *vp;
{
struct union_node *un = VTOUNION(vp);
if ((un->un_lowervp != NULLVP) &&
(vp->v_type == un->un_lowervp->v_type)) {
if (vget(un->un_lowervp, 0) == 0)
return (un->un_lowervp);
}
return (NULLVP);
}
#endif
/*
@ -1104,13 +1217,12 @@ union_dircache(vp, p)
nvp = NULLVP;
if (dircache == 0) {
if (dircache == NULL) {
cnt = 0;
union_dircache_r(vp, 0, &cnt);
cnt++;
dircache = (struct vnode **)
malloc(cnt * sizeof(struct vnode *),
M_TEMP, M_WAITOK);
dircache = malloc(cnt * sizeof(struct vnode *),
M_TEMP, M_WAITOK);
vpp = dircache;
union_dircache_r(vp, &vpp, &cnt);
*vpp = NULLVP;
@ -1126,9 +1238,11 @@ union_dircache(vp, p)
if (*vpp == NULLVP)
goto out;
vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
/*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/
UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99)));
VREF(*vpp);
error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0);
UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99)));
if (error)
goto out;
@ -1141,6 +1255,40 @@ union_dircache(vp, p)
return (nvp);
}
/*
* Guarentee coherency with the VM cache by invalidating any clean VM pages
* associated with this write and updating any dirty VM pages. Since our
* vnode is locked, other processes will not be able to read the pages in
* again until after our write completes.
*
* We also have to be coherent with reads, by flushing any pending dirty
* pages prior to issuing the read.
*
* XXX this is somewhat of a hack at the moment. To support this properly
* we would have to be able to run VOP_READ and VOP_WRITE through the VM
* cache. Then we wouldn't need to worry about coherency.
*/
void
union_vm_coherency(struct vnode *vp, struct uio *uio, int cleanfls)
{
vm_object_t object;
vm_pindex_t pstart;
vm_pindex_t pend;
int pgoff;
if ((object = vp->v_object) == NULL)
return;
pgoff = uio->uio_offset & PAGE_MASK;
pstart = uio->uio_offset / PAGE_SIZE;
pend = pstart + (uio->uio_resid + pgoff + PAGE_MASK) / PAGE_SIZE;
vm_object_page_clean(object, pstart, pend, OBJPC_SYNC);
if (cleanfls)
vm_object_page_remove(object, pstart, pend, TRUE);
}
/*
* Module glue to remove #ifdef UNION from vfs_syscalls.c
*/
@ -1169,6 +1317,8 @@ union_dircheck(struct proc *p, struct vnode **vp, struct file *fp)
if (lvp != NULLVP) {
error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
if (error == 0 && vn_canvmio(lvp) == TRUE)
error = vfs_object_create(lvp, p, fp->f_cred);
if (error) {
vput(lvp);
return (error);
@ -1201,9 +1351,11 @@ union_modevent(module_t mod, int type, void *data)
}
return 0;
}
static moduledata_t union_mod = {
"union_dircheck",
union_modevent,
NULL
};
DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);

View File

@ -85,9 +85,7 @@ union_mount(mp, path, data, ndp, p)
int len;
u_int size;
#ifdef DEBUG
printf("union_mount(mp = %p)\n", (void *)mp);
#endif
UDEBUG(("union_mount(mp = %p)\n", (void *)mp));
/*
* Disable clustered write, otherwise system becomes unstable.
@ -114,24 +112,35 @@ union_mount(mp, path, data, ndp, p)
if (error)
goto bad;
/*
* Obtain lower vnode. Vnode is stored in mp->mnt_vnodecovered.
* We need to reference it but not lock it.
*/
lowerrootvp = mp->mnt_vnodecovered;
VREF(lowerrootvp);
#if 0
/*
* Unlock lower node to avoid deadlock.
*/
if (lowerrootvp->v_op == union_vnodeop_p)
VOP_UNLOCK(lowerrootvp, 0, p);
#endif
/*
* Find upper node.
* Obtain upper vnode by calling namei() on the path. The
* upperrootvp will be turned referenced but not locked.
*/
NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
UIO_USERSPACE, args.target, p);
error = namei(ndp);
#if 0
if (lowerrootvp->v_op == union_vnodeop_p)
vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p);
#endif
if (error)
goto bad;
@ -139,8 +148,11 @@ union_mount(mp, path, data, ndp, p)
vrele(ndp->ni_dvp);
ndp->ni_dvp = NULL;
UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp, VOP_ISLOCKED(upperrootvp)));
/*
* Check multi union mount to avoid `lock myself again' panic.
* Also require that it be a directory.
*/
if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) {
#ifdef DIAGNOSTIC
@ -155,35 +167,43 @@ union_mount(mp, path, data, ndp, p)
goto bad;
}
um = (struct union_mount *) malloc(sizeof(struct union_mount),
M_UNIONFSMNT, M_WAITOK); /* XXX */
/*
* Keep a held reference to the target vnodes.
* They are vrele'd in union_unmount.
*
* Depending on the _BELOW flag, the filesystems are
* viewed in a different order. In effect, this is the
* same as providing a mount under option to the mount syscall.
* Allocate our union_mount structure and populate the fields.
* The vnode references are stored in the union_mount as held,
* unlocked references. Depending on the _BELOW flag, the
* filesystems are viewed in a different order. In effect this
* is the same as providing a mount-under option to the mount
* syscall.
*/
um = (struct union_mount *) malloc(sizeof(struct union_mount),
M_UNIONFSMNT, M_WAITOK);
bzero(um, sizeof(struct union_mount));
um->um_op = args.mntflags & UNMNT_OPMASK;
switch (um->um_op) {
case UNMNT_ABOVE:
um->um_lowervp = lowerrootvp;
um->um_uppervp = upperrootvp;
upperrootvp = NULL;
lowerrootvp = NULL;
break;
case UNMNT_BELOW:
um->um_lowervp = upperrootvp;
um->um_uppervp = lowerrootvp;
upperrootvp = NULL;
lowerrootvp = NULL;
break;
case UNMNT_REPLACE:
vrele(lowerrootvp);
lowerrootvp = NULLVP;
lowerrootvp = NULL;
um->um_uppervp = upperrootvp;
um->um_lowervp = lowerrootvp;
upperrootvp = NULL;
break;
default:
@ -196,7 +216,7 @@ union_mount(mp, path, data, ndp, p)
* supports whiteout operations
*/
if ((mp->mnt_flag & MNT_RDONLY) == 0) {
error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP);
error = VOP_WHITEOUT(um->um_uppervp, NULL, LOOKUP);
if (error)
goto bad;
}
@ -258,15 +278,19 @@ union_mount(mp, path, data, ndp, p)
(void)union_statfs(mp, &mp->mnt_stat, p);
#ifdef DEBUG
printf("union_mount: from %s, on %s\n",
mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
#endif
UDEBUG(("union_mount: from %s, on %s\n",
mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname));
return (0);
bad:
if (um)
if (um) {
if (um->um_uppervp)
vrele(um->um_uppervp);
if (um->um_lowervp)
vrele(um->um_lowervp);
/* XXX other fields */
free(um, M_UNIONFSMNT);
}
if (cred)
crfree(cred);
if (upperrootvp)
@ -291,9 +315,7 @@ union_unmount(mp, mntflags, p)
int freeing;
int flags = 0;
#ifdef DEBUG
printf("union_unmount(mp = %p)\n", (void *)mp);
#endif
UDEBUG(("union_unmount(mp = %p)\n", (void *)mp));
if (mntflags & MNT_FORCE)
flags |= FORCECLOSE;
@ -365,55 +387,25 @@ union_root(mp, vpp)
struct mount *mp;
struct vnode **vpp;
{
struct proc *p = curproc; /* XXX */
struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
int error;
int loselock;
int lockadj = 0;
if (um->um_lowervp && um->um_op != UNMNT_BELOW &&
VOP_ISLOCKED(um->um_lowervp)) {
VREF(um->um_lowervp);
VOP_UNLOCK(um->um_lowervp, 0, p);
lockadj = 1;
}
/*
* Return locked reference to root.
* Supply an unlocked reference to um_uppervp and to um_lowervp. It
* is possible for um_uppervp to be locked without the associated
* root union_node being locked. We let union_allocvp() deal with
* it.
*/
UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp)));
VREF(um->um_uppervp);
if ((um->um_op == UNMNT_BELOW) &&
VOP_ISLOCKED(um->um_uppervp)) {
loselock = 1;
} else {
vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p);
loselock = 0;
}
if (um->um_lowervp)
VREF(um->um_lowervp);
error = union_allocvp(vpp, mp,
(struct vnode *) 0,
(struct vnode *) 0,
(struct componentname *) 0,
um->um_uppervp,
um->um_lowervp,
1);
if (error) {
if (loselock)
vrele(um->um_uppervp);
else
vput(um->um_uppervp);
if (um->um_lowervp)
vrele(um->um_lowervp);
} else {
if (loselock)
VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
}
if (lockadj) {
vn_lock(um->um_lowervp, LK_EXCLUSIVE | LK_RETRY, p);
vrele(um->um_lowervp);
}
error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL,
um->um_uppervp, um->um_lowervp, 1);
UDEBUG(("error %d\n", error));
UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp)));
return (error);
}
@ -429,10 +421,8 @@ union_statfs(mp, sbp, p)
struct statfs mstat;
int lbsize;
#ifdef DEBUG
printf("union_statfs(mp = %p, lvp = %p, uvp = %p)\n",
(void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp);
#endif
UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n",
(void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp));
bzero(&mstat, sizeof(mstat));

File diff suppressed because it is too large Load Diff