From 2a31267e436da0fa9ba9a869356e4cbb52fe703f Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 26 Sep 1999 20:52:41 +0000 Subject: [PATCH] This is a major fixup of unionfs. At least 30 serious bugs have been fixed (many due to changing semantics in other parts of the kernel and not the original author's fault), including one critical one: unionfs could cause UFS corruption in the fronting store due to calling VOP_OPEN for writing without turning on vmio for the UFS vnode. Most of the bugs were related to semantics changes in VOP calls, lock ordering problems (causing deadlocks), improper handling of a read-only backing store (such as an NFS mount), improper referencing and locking of vnodes, not using real struct locks for vnode locking, not using recursive locks when accessing the fronting store, and things like that. New functionality has been added: unionfs now has mmap() support, but only partially tested, and rename has been enhanced considerably. There are still some things that unionfs cannot do. You cannot rename a directory without confusing unionfs, and there are issues with softlinks, hardlinks, and special files. unionfs mostly doesn't understand them (and never did). There are probably still panic situations, but hopefully no where near as many as before this commit. The unionfs in this commit has been tested overlayed on /usr/src (backing /usr/src being a read-only NFS mount, fronting /usr/src being a local filesystem). kernel builds have been tested, buildworld is undergoing testing. More testing is necessary. --- sys/fs/unionfs/union.h | 45 +- sys/fs/unionfs/union_subr.c | 548 +++++++----- sys/fs/unionfs/union_vfsops.c | 124 ++- sys/fs/unionfs/union_vnops.c | 1414 ++++++++++++++++++------------- sys/miscfs/union/union.h | 45 +- sys/miscfs/union/union_subr.c | 548 +++++++----- sys/miscfs/union/union_vfsops.c | 124 ++- sys/miscfs/union/union_vnops.c | 1414 ++++++++++++++++++------------- 8 files changed, 2530 insertions(+), 1732 deletions(-) diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h index 1fc5f996cab0..02a897101472 100644 --- a/sys/fs/unionfs/union.h +++ b/sys/fs/unionfs/union.h @@ -49,8 +49,8 @@ struct union_args { #define UNMNT_OPMASK 0x0003 struct union_mount { - struct vnode *um_uppervp; - struct vnode *um_lowervp; + struct vnode *um_uppervp; /* UN_ULOCK holds locking state */ + struct vnode *um_lowervp; /* Left unlocked */ struct ucred *um_cred; /* Credentials of user calling mount */ int um_cmode; /* cmask from mount process */ int um_op; /* Operation mode */ @@ -58,6 +58,10 @@ struct union_mount { #ifdef KERNEL +#ifndef DIAGNOSTIC +#define DIAGNOSTIC +#endif + /* * DEFDIRMODE is the mode bits used to create a shadow directory. */ @@ -67,9 +71,14 @@ struct union_mount { #define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6)) /* - * A cache of vnode references + * A cache of vnode references (hangs off v_data) + * + * Placing un_lock as the first elements theoretically allows us to + * use the vop_stdlock functions. However, we need to make sure of + * certain side effects so we will still punch in our own code. */ struct union_node { + struct lock un_lock; LIST_ENTRY(union_node) un_cache; /* Hash chain */ struct vnode *un_vnode; /* Back pointer */ struct vnode *un_uppervp; /* overlaying object */ @@ -79,6 +88,7 @@ struct union_node { char *un_path; /* saved component name */ int un_hash; /* saved un_path hash value */ int un_openl; /* # of opens on lowervp */ + int un_exclcnt; /* exclusive count */ unsigned int un_flags; struct vnode **un_dircache; /* cached union stack */ off_t un_uppersz; /* size of upper object */ @@ -88,14 +98,25 @@ struct union_node { #endif }; -#define UN_WANT 0x01 -#define UN_LOCKED 0x02 -#define UN_ULOCK 0x04 /* Upper node is locked */ -#define UN_KLOCK 0x08 /* Keep upper node locked on vput */ -#define UN_CACHED 0x10 /* In union cache */ +/* + * XXX UN_ULOCK - indicates that the uppervp is locked + * + * UN_CACHED - node is in the union cache + */ + +/*#define UN_ULOCK 0x04*/ /* Upper node is locked */ +#define UN_CACHED 0x10 /* In union cache */ + +/* + * Hash table locking flags + */ + +#define UNVP_WANT 0x01 +#define UNVP_LOCKED 0x02 extern int union_allocvp __P((struct vnode **, struct mount *, - struct vnode *, struct vnode *, + struct vnode *, + struct vnode *, struct componentname *, struct vnode *, struct vnode *, int)); extern int union_freevp __P((struct vnode *)); @@ -113,6 +134,7 @@ extern int union_cn_close __P((struct vnode *, int, struct ucred *, extern void union_removed_upper __P((struct union_node *un)); extern struct vnode *union_lowervp __P((struct vnode *)); extern void union_newsize __P((struct vnode *, off_t, off_t)); +extern void union_vm_coherency __P((struct vnode *, struct uio *, int)); extern int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *)); @@ -124,6 +146,11 @@ extern int (*union_dircheckp) __P((struct proc *, struct vnode **, #define UPPERVP(vp) (VTOUNION(vp)->un_uppervp) #define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp)) +#define UDEBUG(x) if (uniondebug) printf x +#define UDEBUG_ENABLED 1 + extern vop_t **union_vnodeop_p; extern struct vfsops union_vfsops; +extern int uniondebug; + #endif /* KERNEL */ diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c index ed09a65fbb4a..c03153c4894b 100644 --- a/sys/fs/unionfs/union_subr.c +++ b/sys/fs/unionfs/union_subr.c @@ -53,6 +53,7 @@ #include #include /* for vnode_pager_setsize */ #include +#include /* for vm cache coherency */ #include #include @@ -97,7 +98,7 @@ union_init() for (i = 0; i < NHASH; i++) LIST_INIT(&unhead[i]); - bzero((caddr_t) unvplock, sizeof(unvplock)); + bzero((caddr_t)unvplock, sizeof(unvplock)); return (0); } @@ -105,15 +106,12 @@ static int union_list_lock(ix) int ix; { - - if (unvplock[ix] & UN_LOCKED) { - unvplock[ix] |= UN_WANT; + if (unvplock[ix] & UNVP_LOCKED) { + unvplock[ix] |= UNVP_WANT; (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); return (1); } - - unvplock[ix] |= UN_LOCKED; - + unvplock[ix] |= UNVP_LOCKED; return (0); } @@ -121,15 +119,25 @@ static void union_list_unlock(ix) int ix; { + unvplock[ix] &= ~UNVP_LOCKED; - unvplock[ix] &= ~UN_LOCKED; - - if (unvplock[ix] & UN_WANT) { - unvplock[ix] &= ~UN_WANT; + if (unvplock[ix] & UNVP_WANT) { + unvplock[ix] &= ~UNVP_WANT; wakeup((caddr_t) &unvplock[ix]); } } +/* + * union_updatevp: + * + * The uppervp, if not NULL, must be referenced and not locked by us + * The lowervp, if not NULL, must be referenced. + * + * if uppervp and lowervp match pointers already installed, nothing + * happens. The passed vp's (when matching) are not adjusted. This + * routine may only be called by union_newupper() and union_newlower(). + */ + static void union_updatevp(un, uppervp, lowervp) struct union_node *un; @@ -153,9 +161,10 @@ union_updatevp(un, uppervp, lowervp) uhash = nhash; } - if (lhash != uhash) + if (lhash != uhash) { while (union_list_lock(lhash)) continue; + } while (union_list_lock(uhash)) continue; @@ -177,10 +186,6 @@ union_updatevp(un, uppervp, lowervp) free(un->un_path, M_TEMP); un->un_path = 0; } - if (un->un_dirvp) { - vrele(un->un_dirvp); - un->un_dirvp = NULLVP; - } } un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; @@ -189,7 +194,6 @@ union_updatevp(un, uppervp, lowervp) if (un->un_uppervp != uppervp) { if (un->un_uppervp) vrele(un->un_uppervp); - un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; } @@ -202,21 +206,30 @@ union_updatevp(un, uppervp, lowervp) union_list_unlock(nhash); } +/* + * Set a new lowervp. The passed lowervp must be referenced and will be + * stored in the vp in a referenced state. + */ + static void union_newlower(un, lowervp) struct union_node *un; struct vnode *lowervp; { - union_updatevp(un, un->un_uppervp, lowervp); } +/* + * Set a new uppervp. The passed uppervp must be locked and will be + * stored in the vp in a locked state. The caller should not unlock + * uppervp. + */ + static void union_newupper(un, uppervp) struct union_node *un; struct vnode *uppervp; { - union_updatevp(un, uppervp, un->un_lowervp); } @@ -253,27 +266,51 @@ union_newsize(vp, uppersz, lowersz) } if (sz != VNOVAL) { -#ifdef DEBUG - printf("union: %s size now %ld\n", - uppersz != VNOVAL ? "upper" : "lower", (long) sz); -#endif + UDEBUG(("union: %s size now %ld\n", + (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); vnode_pager_setsize(vp, sz); } } /* - * allocate a union_node/vnode pair. the vnode is - * referenced and locked. the new vnode is returned - * via (vpp). (mp) is the mountpoint of the union filesystem, - * (dvp) is the parent directory where the upper layer object - * should exist (but doesn't) and (cnp) is the componentname - * information which is partially copied to allow the upper - * layer object to be created at a later time. (uppervp) - * and (lowervp) reference the upper and lower layer objects - * being mapped. either, but not both, can be nil. - * if supplied, (uppervp) is locked. - * the reference is either maintained in the new union_node - * object which is allocated, or they are vrele'd. + * union_allocvp: allocate a union_node and associate it with a + * parent union_node and one or two vnodes. + * + * vpp Holds the returned vnode locked and referenced if no + * error occurs. + * + * mp Holds the mount point. mp may or may not be busied. + * allocvp makes no changes to mp. + * + * dvp Holds the parent union_node to the one we wish to create. + * XXX may only be used to traverse an uncopied lowervp-based + * tree? XXX + * + * dvp may or may not be locked. allocvp makes no changes + * to dvp. + * + * upperdvp Holds the parent vnode to uppervp, generally used along + * with path component information to create a shadow of + * lowervp when uppervp does not exist. + * + * upperdvp is referenced but unlocked on entry, and will be + * dereferenced on return. + * + * uppervp Holds the new uppervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * lowervp Holds the new lowervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * cnp Holds path component information to be coupled with + * lowervp and upperdvp to allow unionfs to create an uppervp + * later on. Only used if lowervp is valid. The conents + * of cnp is only valid for the duration of the call. + * + * docache Determine whether this node should be entered in the + * cache or whether it should be destroyed as soon as possible. * * all union_nodes are maintained on a singly-linked * list. new nodes are only allocated when they cannot @@ -292,12 +329,13 @@ union_newsize(vp, uppersz, lowersz) * zero references to it and so it needs to removed from * the vnode free list. */ + int -union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) +union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) struct vnode **vpp; struct mount *mp; - struct vnode *undvp; /* parent union vnode */ - struct vnode *dvp; /* may be null */ + struct vnode *dvp; /* parent union vnode */ + struct vnode *upperdvp; /* parent vnode of uppervp */ struct componentname *cnp; /* may be null */ struct vnode *uppervp; /* may be null */ struct vnode *lowervp; /* may be null */ @@ -307,6 +345,7 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) struct union_node *un = 0; struct vnode *xlowervp = NULLVP; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + struct proc *p = (cnp) ? cnp->cn_proc : curproc; int hash = 0; int vflag; int try; @@ -382,65 +421,76 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) if (un) { /* - * Obtain a lock on the union_node. - * uppervp is locked, though un->un_uppervp - * may not be. this doesn't break the locking - * hierarchy since in the case that un->un_uppervp - * is not yet locked it will be vrele'd and replaced - * with uppervp. + * Obtain a lock on the union_node. Everything is unlocked + * except for dvp, so check that case. If they match, our + * new un is already locked. Otherwise we have to lock our + * new un. + * + * A potential deadlock situation occurs when we are holding + * one lock while trying to get another. We must follow + * strict ordering rules to avoid it. We try to locate dvp + * by scanning up from un_vnode, since the most likely + * scenario is un being under dvp. */ - if ((dvp != NULLVP) && (uppervp == dvp)) { - /* - * Access ``.'', so (un) will already - * be locked. Since this process has - * the lock on (uppervp) no other - * process can hold the lock on (un). - */ -#ifdef DIAGNOSTIC - if ((un->un_flags & UN_LOCKED) == 0) - panic("union: . not locked"); - else if (curproc && un->un_pid != curproc->p_pid && - un->un_pid > -1 && curproc->p_pid > -1) - panic("union: allocvp not lock owner"); -#endif - } else { - if (un->un_flags & UN_LOCKED) { - vrele(UNIONTOV(un)); - un->un_flags |= UN_WANT; - (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); - goto loop; - } - un->un_flags |= UN_LOCKED; + if (dvp && un->un_vnode != dvp) { + struct vnode *scan = un->un_vnode; -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif + do { + scan = VTOUNION(scan)->un_pvp; + } while (scan && scan->v_tag == VT_UNION && scan != dvp); + if (scan != dvp) { + /* + * our new un is above dvp (we never saw dvp + * while moving up the tree). + */ + VREF(dvp); + VOP_UNLOCK(dvp, 0, p); + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + vrele(dvp); + } else { + /* + * our new un is under dvp + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } + } else if (dvp == NULLVP) { + /* + * dvp is NULL, we need to lock un. + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } else { + /* + * dvp == un->un_vnode, we are already locked. + */ + error = 0; } - /* - * At this point, the union_node is locked, - * un->un_uppervp may not be locked, and uppervp - * is locked or nil. - */ + if (error) + goto loop; /* - * Save information about the upper layer. + * At this point, the union_node is locked and referenced. + * + * uppervp is locked and referenced or NULL, lowervp is + * referenced or NULL. */ + UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", + un, un->un_vnode, un->un_uppervp, + (un->un_uppervp ? un->un_uppervp->v_usecount : -99), + uppervp, + (uppervp ? uppervp->v_usecount : -99) + )); + if (uppervp != un->un_uppervp) { + KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount)); union_newupper(un, uppervp); } else if (uppervp) { + KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount)); vrele(uppervp); } - if (un->un_uppervp) { - un->un_flags |= UN_ULOCK; - un->un_flags &= ~UN_KLOCK; - } - /* * Save information about the lower layer. * This needs to keep track of pathname @@ -456,12 +506,22 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); - un->un_dirvp = dvp; } } else if (lowervp) { vrele(lowervp); } + + /* + * and upperdvp + */ + if (upperdvp != un->un_dirvp) { + if (un->un_dirvp) + vrele(un->un_dirvp); + un->un_dirvp = upperdvp; + } else if (upperdvp) { + vrele(upperdvp); + } + *vpp = UNIONTOV(un); return (0); } @@ -477,17 +537,22 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) goto loop; } + /* + * Create new node rather then replace old node + */ + error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); if (error) { - if (uppervp) { - if (dvp == uppervp) - vrele(uppervp); - else - vput(uppervp); - } + /* + * If an error occurs clear out vnodes. + */ if (lowervp) vrele(lowervp); - + if (uppervp) + vrele(uppervp); + if (upperdvp) + vrele(upperdvp); + *vpp = NULL; goto out; } @@ -499,37 +564,34 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) (*vpp)->v_type = uppervp->v_type; else (*vpp)->v_type = lowervp->v_type; + un = VTOUNION(*vpp); + bzero(un, sizeof(*un)); + + lockinit(&un->un_lock, PVFS, "unlock", 0, 0); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + un->un_vnode = *vpp; un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; - un->un_pvp = undvp; - if (undvp != NULLVP) - VREF(undvp); + un->un_dirvp = upperdvp; + un->un_pvp = dvp; /* only parent dir in new allocation */ + if (dvp != NULLVP) + VREF(dvp); un->un_dircache = 0; un->un_openl = 0; - un->un_flags = UN_LOCKED; - if (un->un_uppervp) - un->un_flags |= UN_ULOCK; -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif + if (cnp && (lowervp != NULLVP)) { un->un_hash = cnp->cn_hash; un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); - un->un_dirvp = dvp; } else { un->un_hash = 0; un->un_path = 0; - un->un_dirvp = 0; + un->un_dirvp = NULL; } if (docache) { @@ -537,10 +599,10 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) un->un_flags |= UN_CACHED; } +out: if (xlowervp) vrele(xlowervp); -out: if (docache) union_list_unlock(hash); @@ -558,16 +620,26 @@ union_freevp(vp) LIST_REMOVE(un, un_cache); } - if (un->un_pvp != NULLVP) + if (un->un_pvp != NULLVP) { vrele(un->un_pvp); - if (un->un_uppervp != NULLVP) + un->un_pvp = NULL; + } + if (un->un_uppervp != NULLVP) { vrele(un->un_uppervp); - if (un->un_lowervp != NULLVP) + un->un_uppervp = NULL; + } + if (un->un_lowervp != NULLVP) { vrele(un->un_lowervp); - if (un->un_dirvp != NULLVP) + un->un_lowervp = NULL; + } + if (un->un_dirvp != NULLVP) { vrele(un->un_dirvp); - if (un->un_path) + un->un_dirvp = NULL; + } + if (un->un_path) { free(un->un_path, M_TEMP); + un->un_path = NULL; + } FREE(vp->v_data, M_TEMP); vp->v_data = 0; @@ -579,6 +651,9 @@ union_freevp(vp) * copyfile. copy the vnode (fvp) to the vnode (tvp) * using a sequence of reads and writes. both (fvp) * and (tvp) are locked on entry and exit. + * + * fvp and tvp are both exclusive locked on call, but their refcount's + * haven't been bumped at all. */ static int union_copyfile(fvp, tvp, cred, p) @@ -600,48 +675,62 @@ union_copyfile(fvp, tvp, cred, p) * give up at the first sign of trouble. */ + bzero(&uio, sizeof(uio)); + uio.uio_procp = p; uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; - VOP_UNLOCK(fvp, 0, p); /* XXX */ VOP_LEASE(fvp, p, cred, LEASE_READ); - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - VOP_UNLOCK(tvp, 0, p); /* XXX */ VOP_LEASE(tvp, p, cred, LEASE_WRITE); - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); /* ugly loop follows... */ do { off_t offset = uio.uio_offset; + int count; + int bufoffset; + /* + * Setup for big read + */ uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_READ; - error = VOP_READ(fvp, &uio, 0, cred); - if (error == 0) { + if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) + break; + + /* + * Get bytes read, handle read eof case and setup for + * write loop + */ + if ((count = MAXBSIZE - uio.uio_resid) == 0) + break; + bufoffset = 0; + + /* + * Write until an error occurs or our buffer has been + * exhausted, then update the offset for the next read. + */ + while (bufoffset < count) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; - iov.iov_base = buf; - iov.iov_len = MAXBSIZE - uio.uio_resid; - uio.uio_offset = offset; + iov.iov_base = buf + bufoffset; + iov.iov_len = count - bufoffset; + uio.uio_offset = offset + bufoffset; uio.uio_rw = UIO_WRITE; uio.uio_resid = iov.iov_len; - if (uio.uio_resid == 0) + if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) break; - - do { - error = VOP_WRITE(tvp, &uio, 0, cred); - } while ((uio.uio_resid > 0) && (error == 0)); + bufoffset += (count - bufoffset) - uio.uio_resid; } - + uio.uio_offset = offset + bufoffset; } while (error == 0); free(buf, M_TEMP); @@ -649,9 +738,10 @@ union_copyfile(fvp, tvp, cred, p) } /* - * (un) is assumed to be locked on entry and remains - * locked on exit. + * + * un's vnode is assumed to be locked on entry and remains locked on exit. */ + int union_copyup(un, docopy, cred, p) struct union_node *un; @@ -676,12 +766,9 @@ union_copyup(un, docopy, cred, p) if (error) return (error); - /* at this point, uppervp is locked */ - union_newupper(un, uvp); - un->un_flags |= UN_ULOCK; - lvp = un->un_lowervp; + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); if (docopy) { /* * XX - should not ignore errors @@ -689,23 +776,22 @@ union_copyup(un, docopy, cred, p) */ vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(lvp, FREAD, cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, cred); if (error == 0) { error = union_copyfile(lvp, uvp, cred, p); VOP_UNLOCK(lvp, 0, p); (void) VOP_CLOSE(lvp, FREAD, cred, p); } -#ifdef DEBUG if (error == 0) - uprintf("union: copied up %s\n", un->un_path); -#endif + UDEBUG(("union: copied up %s\n", un->un_path)); } - un->un_flags &= ~UN_ULOCK; VOP_UNLOCK(uvp, 0, p); + union_newupper(un, uvp); + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); union_vn_close(uvp, FWRITE, cred, p); - vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; - + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); /* * Subsequent IOs will go to the top layer, so * call close on the lower vnode and open on the @@ -721,6 +807,8 @@ union_copyup(un, docopy, cred, p) (void) VOP_CLOSE(lvp, FREAD, cred, p); (void) VOP_OPEN(uvp, FREAD, cred, p); } + if (vn_canvmio(uvp) == TRUE) + error = vfs_object_create(uvp, p, cred); un->un_openl = 0; } @@ -728,6 +816,17 @@ union_copyup(un, docopy, cred, p) } +/* + * union_relookup: + * + * dvp should be locked on entry and will be locked on return. No + * net change in the ref count will occur. + * + * If an error is returned, *vpp will be invalid, otherwise it + * will hold a locked, referenced vnode. If *vpp == dvp then + * remember that only one exclusive lock is held. + */ + static int union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) struct union_mount *um; @@ -757,7 +856,7 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) cn->cn_pnbuf[cn->cn_namelen] = '\0'; cn->cn_nameiop = CREATE; - cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn->cn_proc = cnp->cn_proc; if (um->um_op == UNMNT_ABOVE) cn->cn_cred = cnp->cn_cred; @@ -768,15 +867,30 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) cn->cn_consume = cnp->cn_consume; VREF(dvp); - error = relookup(dvp, vpp, cn); - if (!error) - vrele(dvp); - else { + VOP_UNLOCK(dvp, 0, cnp->cn_proc); + + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ + + if ((error = relookup(dvp, vpp, cn)) != 0) { zfree(namei_zone, cn->cn_pnbuf); cn->cn_pnbuf = NULL; + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc); + return(error); } - return (error); + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + * + * We want to return with dvp as it was passed to us, so we get + * rid of our reference. + */ + vrele(dvp); + return (0); } /* @@ -785,11 +899,11 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) * * (um) points to the union mount structure for access to the * the mounting process's credentials. - * (dvp) is the directory in which to create the shadow directory. - * it is unlocked on entry and exit. + * (dvp) is the directory in which to create the shadow directory, + * it is locked (but not ref'd) on entry and return. * (cnp) is the componentname to be created. * (vpp) is the returned newly created shadow directory, which - * is returned locked. + * is returned locked and ref'd */ int union_mkshadow(um, dvp, cnp, vpp) @@ -810,8 +924,10 @@ union_mkshadow(um, dvp, cnp, vpp) if (*vpp) { VOP_ABORTOP(dvp, &cn); - VOP_UNLOCK(dvp, 0, p); - vrele(*vpp); + if (dvp == *vpp) + vrele(*vpp); + else + vput(*vpp); *vpp = NULLVP; return (EEXIST); } @@ -832,7 +948,7 @@ union_mkshadow(um, dvp, cnp, vpp) VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); error = VOP_MKDIR(dvp, vpp, &cn, &va); - vput(dvp); + /*vput(dvp);*/ return (error); } @@ -842,7 +958,7 @@ union_mkshadow(um, dvp, cnp, vpp) * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the whiteout. - * it is locked on entry and exit. + * it is locked on entry and return. * (cnp) is the componentname to be created. */ int @@ -857,17 +973,16 @@ union_mkwhiteout(um, dvp, cnp, path) struct vnode *wvp; struct componentname cn; - VOP_UNLOCK(dvp, 0, p); error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); - if (error) { - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) return (error); - } if (wvp) { VOP_ABORTOP(dvp, &cn); - vrele(dvp); - vrele(wvp); + if (wvp == dvp) + vrele(wvp); + else + vput(wvp); return (EEXIST); } @@ -877,9 +992,6 @@ union_mkwhiteout(um, dvp, cnp, path) error = VOP_WHITEOUT(dvp, &cn, CREATE); if (error) VOP_ABORTOP(dvp, &cn); - - vrele(dvp); - return (error); } @@ -890,6 +1002,12 @@ union_mkwhiteout(um, dvp, cnp, path) * the problem with calling namei is that a) it locks too many * things, and b) it doesn't start at the "right" directory, * whereas relookup is told where to start. + * + * On entry, the vnode associated with un is locked. It remains locked + * on return. + * + * If no error occurs, *vpp contains a locked referenced vnode for your + * use. If an error occurs *vpp iis undefined. */ static int union_vn_create(vpp, un, p) @@ -921,26 +1039,34 @@ union_vn_create(vpp, un, p) cn.cn_pnbuf = zalloc(namei_zone); bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); cn.cn_nameiop = CREATE; - cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn.cn_proc = p; cn.cn_cred = p->p_ucred; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_hash = un->un_hash; cn.cn_consume = 0; + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ VREF(un->un_dirvp); error = relookup(un->un_dirvp, &vp, &cn); if (error) return (error); - vrele(un->un_dirvp); + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + */ if (vp) { + vput(un->un_dirvp); VOP_ABORTOP(un->un_dirvp, &cn); - if (un->un_dirvp == vp) - vrele(un->un_dirvp); + if (vp == un->un_dirvp) + vrele(vp); else - vput(un->un_dirvp); - vrele(vp); + vput(vp); return (EEXIST); } @@ -964,11 +1090,12 @@ union_vn_create(vpp, un, p) return (error); error = VOP_OPEN(vp, fmode, cred, p); + if (error == 0 && vn_canvmio(vp) == TRUE) + error = vfs_object_create(vp, p, cred); if (error) { vput(vp); return (error); } - vp->v_writecount++; *vpp = vp; return (0); @@ -987,6 +1114,14 @@ union_vn_close(vp, fmode, cred, p) return (VOP_CLOSE(vp, fmode, cred, p)); } +#if 0 + +/* + * union_removed_upper: + * + * called with union_node unlocked. XXX + */ + void union_removed_upper(un) struct union_node *un; @@ -999,9 +1134,7 @@ union_removed_upper(un) * union node will have neither uppervp nor lowervp. We remove * the union node from cache, so that it will not be referrenced. */ -#if 0 union_newupper(un, NULLVP); -#endif if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); @@ -1013,28 +1146,8 @@ union_removed_upper(un) un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } - - if (un->un_flags & UN_ULOCK) { - un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp, 0, p); - } } -#if 0 -struct vnode * -union_lowervp(vp) - struct vnode *vp; -{ - struct union_node *un = VTOUNION(vp); - - if ((un->un_lowervp != NULLVP) && - (vp->v_type == un->un_lowervp->v_type)) { - if (vget(un->un_lowervp, 0) == 0) - return (un->un_lowervp); - } - - return (NULLVP); -} #endif /* @@ -1104,13 +1217,12 @@ union_dircache(vp, p) nvp = NULLVP; - if (dircache == 0) { + if (dircache == NULL) { cnt = 0; union_dircache_r(vp, 0, &cnt); cnt++; - dircache = (struct vnode **) - malloc(cnt * sizeof(struct vnode *), - M_TEMP, M_WAITOK); + dircache = malloc(cnt * sizeof(struct vnode *), + M_TEMP, M_WAITOK); vpp = dircache; union_dircache_r(vp, &vpp, &cnt); *vpp = NULLVP; @@ -1126,9 +1238,11 @@ union_dircache(vp, p) if (*vpp == NULLVP) goto out; - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/ + UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99))); VREF(*vpp); - error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); + error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); + UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99))); if (error) goto out; @@ -1141,6 +1255,40 @@ union_dircache(vp, p) return (nvp); } +/* + * Guarentee coherency with the VM cache by invalidating any clean VM pages + * associated with this write and updating any dirty VM pages. Since our + * vnode is locked, other processes will not be able to read the pages in + * again until after our write completes. + * + * We also have to be coherent with reads, by flushing any pending dirty + * pages prior to issuing the read. + * + * XXX this is somewhat of a hack at the moment. To support this properly + * we would have to be able to run VOP_READ and VOP_WRITE through the VM + * cache. Then we wouldn't need to worry about coherency. + */ + +void +union_vm_coherency(struct vnode *vp, struct uio *uio, int cleanfls) +{ + vm_object_t object; + vm_pindex_t pstart; + vm_pindex_t pend; + int pgoff; + + if ((object = vp->v_object) == NULL) + return; + + pgoff = uio->uio_offset & PAGE_MASK; + pstart = uio->uio_offset / PAGE_SIZE; + pend = pstart + (uio->uio_resid + pgoff + PAGE_MASK) / PAGE_SIZE; + + vm_object_page_clean(object, pstart, pend, OBJPC_SYNC); + if (cleanfls) + vm_object_page_remove(object, pstart, pend, TRUE); +} + /* * Module glue to remove #ifdef UNION from vfs_syscalls.c */ @@ -1169,6 +1317,8 @@ union_dircheck(struct proc *p, struct vnode **vp, struct file *fp) if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, fp->f_cred); if (error) { vput(lvp); return (error); @@ -1201,9 +1351,11 @@ union_modevent(module_t mod, int type, void *data) } return 0; } + static moduledata_t union_mod = { "union_dircheck", union_modevent, NULL }; + DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c index af828ac64a8c..1a53f88bcc7b 100644 --- a/sys/fs/unionfs/union_vfsops.c +++ b/sys/fs/unionfs/union_vfsops.c @@ -85,9 +85,7 @@ union_mount(mp, path, data, ndp, p) int len; u_int size; -#ifdef DEBUG - printf("union_mount(mp = %p)\n", (void *)mp); -#endif + UDEBUG(("union_mount(mp = %p)\n", (void *)mp)); /* * Disable clustered write, otherwise system becomes unstable. @@ -114,24 +112,35 @@ union_mount(mp, path, data, ndp, p) if (error) goto bad; + /* + * Obtain lower vnode. Vnode is stored in mp->mnt_vnodecovered. + * We need to reference it but not lock it. + */ + lowerrootvp = mp->mnt_vnodecovered; VREF(lowerrootvp); +#if 0 /* * Unlock lower node to avoid deadlock. */ if (lowerrootvp->v_op == union_vnodeop_p) VOP_UNLOCK(lowerrootvp, 0, p); +#endif /* - * Find upper node. + * Obtain upper vnode by calling namei() on the path. The + * upperrootvp will be turned referenced but not locked. */ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT, UIO_USERSPACE, args.target, p); error = namei(ndp); + +#if 0 if (lowerrootvp->v_op == union_vnodeop_p) vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p); +#endif if (error) goto bad; @@ -139,8 +148,11 @@ union_mount(mp, path, data, ndp, p) vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; + UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp, VOP_ISLOCKED(upperrootvp))); + /* * Check multi union mount to avoid `lock myself again' panic. + * Also require that it be a directory. */ if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) { #ifdef DIAGNOSTIC @@ -155,35 +167,43 @@ union_mount(mp, path, data, ndp, p) goto bad; } - um = (struct union_mount *) malloc(sizeof(struct union_mount), - M_UNIONFSMNT, M_WAITOK); /* XXX */ - /* - * Keep a held reference to the target vnodes. - * They are vrele'd in union_unmount. - * - * Depending on the _BELOW flag, the filesystems are - * viewed in a different order. In effect, this is the - * same as providing a mount under option to the mount syscall. + * Allocate our union_mount structure and populate the fields. + * The vnode references are stored in the union_mount as held, + * unlocked references. Depending on the _BELOW flag, the + * filesystems are viewed in a different order. In effect this + * is the same as providing a mount-under option to the mount + * syscall. */ + um = (struct union_mount *) malloc(sizeof(struct union_mount), + M_UNIONFSMNT, M_WAITOK); + + bzero(um, sizeof(struct union_mount)); + um->um_op = args.mntflags & UNMNT_OPMASK; + switch (um->um_op) { case UNMNT_ABOVE: um->um_lowervp = lowerrootvp; um->um_uppervp = upperrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; break; case UNMNT_BELOW: um->um_lowervp = upperrootvp; um->um_uppervp = lowerrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; break; case UNMNT_REPLACE: vrele(lowerrootvp); - lowerrootvp = NULLVP; + lowerrootvp = NULL; um->um_uppervp = upperrootvp; um->um_lowervp = lowerrootvp; + upperrootvp = NULL; break; default: @@ -196,7 +216,7 @@ union_mount(mp, path, data, ndp, p) * supports whiteout operations */ if ((mp->mnt_flag & MNT_RDONLY) == 0) { - error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP); + error = VOP_WHITEOUT(um->um_uppervp, NULL, LOOKUP); if (error) goto bad; } @@ -258,15 +278,19 @@ union_mount(mp, path, data, ndp, p) (void)union_statfs(mp, &mp->mnt_stat, p); -#ifdef DEBUG - printf("union_mount: from %s, on %s\n", - mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); -#endif + UDEBUG(("union_mount: from %s, on %s\n", + mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname)); return (0); bad: - if (um) + if (um) { + if (um->um_uppervp) + vrele(um->um_uppervp); + if (um->um_lowervp) + vrele(um->um_lowervp); + /* XXX other fields */ free(um, M_UNIONFSMNT); + } if (cred) crfree(cred); if (upperrootvp) @@ -291,9 +315,7 @@ union_unmount(mp, mntflags, p) int freeing; int flags = 0; -#ifdef DEBUG - printf("union_unmount(mp = %p)\n", (void *)mp); -#endif + UDEBUG(("union_unmount(mp = %p)\n", (void *)mp)); if (mntflags & MNT_FORCE) flags |= FORCECLOSE; @@ -365,55 +387,25 @@ union_root(mp, vpp) struct mount *mp; struct vnode **vpp; { - struct proc *p = curproc; /* XXX */ struct union_mount *um = MOUNTTOUNIONMOUNT(mp); int error; - int loselock; - int lockadj = 0; - - if (um->um_lowervp && um->um_op != UNMNT_BELOW && - VOP_ISLOCKED(um->um_lowervp)) { - VREF(um->um_lowervp); - VOP_UNLOCK(um->um_lowervp, 0, p); - lockadj = 1; - } /* - * Return locked reference to root. + * Supply an unlocked reference to um_uppervp and to um_lowervp. It + * is possible for um_uppervp to be locked without the associated + * root union_node being locked. We let union_allocvp() deal with + * it. */ + UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp))); + VREF(um->um_uppervp); - if ((um->um_op == UNMNT_BELOW) && - VOP_ISLOCKED(um->um_uppervp)) { - loselock = 1; - } else { - vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - loselock = 0; - } if (um->um_lowervp) VREF(um->um_lowervp); - error = union_allocvp(vpp, mp, - (struct vnode *) 0, - (struct vnode *) 0, - (struct componentname *) 0, - um->um_uppervp, - um->um_lowervp, - 1); - if (error) { - if (loselock) - vrele(um->um_uppervp); - else - vput(um->um_uppervp); - if (um->um_lowervp) - vrele(um->um_lowervp); - } else { - if (loselock) - VTOUNION(*vpp)->un_flags &= ~UN_ULOCK; - } - if (lockadj) { - vn_lock(um->um_lowervp, LK_EXCLUSIVE | LK_RETRY, p); - vrele(um->um_lowervp); - } + error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL, + um->um_uppervp, um->um_lowervp, 1); + UDEBUG(("error %d\n", error)); + UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp))); return (error); } @@ -429,10 +421,8 @@ union_statfs(mp, sbp, p) struct statfs mstat; int lbsize; -#ifdef DEBUG - printf("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", - (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp); -#endif + UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", + (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp)); bzero(&mstat, sizeof(mstat)); diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c index 145f8ca6f0ad..128e59ebaa21 100644 --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -50,13 +50,25 @@ #include #include #include +#include #include -#define FIXUP(un, p) { \ - if (((un)->un_flags & UN_ULOCK) == 0) { \ - union_fixup(un, p); \ - } \ -} +#include +#include + +#include +#include +#include +#include +#include + +int uniondebug = 0; + +#if UDEBUG_ENABLED +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, ""); +#else +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, ""); +#endif static int union_abortop __P((struct vop_abortop_args *ap)); static int union_access __P((struct vop_access_args *ap)); @@ -64,17 +76,15 @@ static int union_advlock __P((struct vop_advlock_args *ap)); static int union_bmap __P((struct vop_bmap_args *ap)); static int union_close __P((struct vop_close_args *ap)); static int union_create __P((struct vop_create_args *ap)); -static void union_fixup __P((struct union_node *un, struct proc *p)); static int union_fsync __P((struct vop_fsync_args *ap)); static int union_getattr __P((struct vop_getattr_args *ap)); static int union_inactive __P((struct vop_inactive_args *ap)); static int union_ioctl __P((struct vop_ioctl_args *ap)); -static int union_islocked __P((struct vop_islocked_args *ap)); static int union_lease __P((struct vop_lease_args *ap)); static int union_link __P((struct vop_link_args *ap)); static int union_lock __P((struct vop_lock_args *ap)); static int union_lookup __P((struct vop_lookup_args *ap)); -static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvpp, +static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvp, struct vnode **vpp, struct componentname *cnp)); static int union_mkdir __P((struct vop_mkdir_args *ap)); @@ -94,36 +104,89 @@ static int union_rmdir __P((struct vop_rmdir_args *ap)); static int union_poll __P((struct vop_poll_args *ap)); static int union_setattr __P((struct vop_setattr_args *ap)); static int union_strategy __P((struct vop_strategy_args *ap)); +static int union_getpages __P((struct vop_getpages_args *ap)); +static int union_putpages __P((struct vop_putpages_args *ap)); static int union_symlink __P((struct vop_symlink_args *ap)); static int union_unlock __P((struct vop_unlock_args *ap)); static int union_whiteout __P((struct vop_whiteout_args *ap)); static int union_write __P((struct vop_read_args *ap)); -static void -union_fixup(un, p) - struct union_node *un; - struct proc *p; +static __inline +struct vnode * +union_lock_upper(struct union_node *un, struct proc *p) { + struct vnode *uppervp; - vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; + if ((uppervp = un->un_uppervp) != NULL) { + VREF(uppervp); + vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + KASSERT((uppervp == NULL || uppervp->v_usecount > 0), ("uppervp usecount is 0")); + return(uppervp); } +static __inline +void +union_unlock_upper(struct vnode *uppervp, struct proc *p) +{ + vput(uppervp); +} + +static __inline +struct vnode * +union_lock_other(struct union_node *un, struct proc *p) +{ + struct vnode *vp; + + if (un->un_uppervp != NULL) { + vp = union_lock_upper(un, p); + } else if ((vp = un->un_lowervp) != NULL) { + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + return(vp); +} + +static __inline +void +union_unlock_other(struct vnode *vp, struct proc *p) +{ + vput(vp); +} + +/* + * union_lookup: + * + * udvp must be exclusively locked on call and will remain + * exclusively locked on return. This is the mount point + * for out filesystem. + * + * dvp Our base directory, locked and referenced. + * The passed dvp will be dereferenced and unlocked on return + * and a new dvp will be returned which is locked and + * referenced in the same variable. + * + * vpp is filled in with the result if no error occured, + * locked and ref'd. + * + * If an error is returned, *vpp is set to NULLVP. If no + * error occurs, *vpp is returned with a reference and an + * exclusive lock. + */ + static int -union_lookup1(udvp, dvpp, vpp, cnp) +union_lookup1(udvp, pdvp, vpp, cnp) struct vnode *udvp; - struct vnode **dvpp; + struct vnode **pdvp; struct vnode **vpp; struct componentname *cnp; { int error; struct proc *p = cnp->cn_proc; + struct vnode *dvp = *pdvp; struct vnode *tdvp; - struct vnode *dvp; struct mount *mp; - dvp = *dvpp; - /* * If stepping up the directory tree, check for going * back across the mount point, in which case do what @@ -139,49 +202,79 @@ union_lookup1(udvp, dvpp, vpp, cnp) * filesystems. */ tdvp = dvp; - *dvpp = dvp = dvp->v_mount->mnt_vnodecovered; - vput(tdvp); + dvp = dvp->v_mount->mnt_vnodecovered; VREF(dvp); + vput(tdvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } } + /* + * Set return dvp to be the upperdvp 'parent directory. + */ + *pdvp = dvp; + + /* + * If the VOP_LOOKUP call generates an error, tdvp is invalid and no + * changes will have been made to dvp, so we are set to return. + */ + error = VOP_LOOKUP(dvp, &tdvp, cnp); - if (error) + if (error) { + UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags)); + *vpp = NULL; return (error); + } /* * The parent directory will have been unlocked, unless lookup - * found the last component. In which case, re-lock the node - * here to allow it to be unlocked again (phew) in union_lookup. + * found the last component or if dvp == tdvp (tdvp must be locked). + * + * We want our dvp to remain locked and ref'd. We also want tdvp + * to remain locked and ref'd. */ - if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN)) - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags)); - dvp = tdvp; + if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0) + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); /* * Lastly check if the current node is a mount point in * which case walk up the mount hierarchy making sure not to * bump into the root of the mount tree (ie. dvp != udvp). + * + * We use dvp as a temporary variable here, it is no longer related + * to the dvp above. However, we have to ensure that both *pdvp and + * tdvp are locked on return. */ - while (dvp != udvp && (dvp->v_type == VDIR) && - (mp = dvp->v_mountedhere)) { + + dvp = tdvp; + while ( + dvp != udvp && + (dvp->v_type == VDIR) && + (mp = dvp->v_mountedhere) + ) { + int relock_pdvp = 0; if (vfs_busy(mp, 0, 0, p)) continue; - error = VFS_ROOT(mp, &tdvp); + if (dvp == *pdvp) + relock_pdvp = 1; + vput(dvp); + dvp = NULL; + error = VFS_ROOT(mp, &dvp); + vfs_unbusy(mp, p); + + if (relock_pdvp) + vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) { - vput(dvp); + *vpp = NULL; return (error); } - - vput(dvp); - dvp = tdvp; } - *vpp = dvp; return (0); } @@ -199,8 +292,8 @@ union_lookup(ap) int uerror, lerror; struct vnode *uppervp, *lowervp; struct vnode *upperdvp, *lowerdvp; - struct vnode *dvp = ap->a_dvp; - struct union_node *dun = VTOUNION(dvp); + struct vnode *dvp = ap->a_dvp; /* starting dir */ + struct union_node *dun = VTOUNION(dvp); /* associated union node */ struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; int lockparent = cnp->cn_flags & LOCKPARENT; @@ -209,44 +302,38 @@ union_lookup(ap) int iswhiteout; struct vattr va; + *ap->a_vpp = NULLVP; /* * Disallow write attemps to the filesystem mounted read-only. */ - if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + if ((cnp->cn_flags & ISLASTCN) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { return (EROFS); - -#ifdef notyet - if (cnp->cn_namelen == 3 && - cnp->cn_nameptr[2] == '.' && - cnp->cn_nameptr[1] == '.' && - cnp->cn_nameptr[0] == '.') { - dvp = *ap->a_vpp = LOWERVP(ap->a_dvp); - if (dvp == NULLVP) - return (ENOENT); - VREF(dvp); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(ap->a_dvp, 0, p); - return (0); } -#endif + /* + * For any lookup's we do, always return with the parent locked + */ cnp->cn_flags |= LOCKPARENT; - upperdvp = dun->un_uppervp; lowerdvp = dun->un_lowervp; uppervp = NULLVP; lowervp = NULLVP; iswhiteout = 0; - if (cnp->cn_flags & ISDOTDOT) { - if (upperdvp != NULL) - VREF(upperdvp); - if (lowerdvp != NULL) - VREF(lowerdvp); - } + uerror = ENOENT; + lerror = ENOENT; + + /* + * Get a private lock on uppervp and a reference, effectively + * taking it out of the union_node's control. + * + * We must lock upperdvp while holding our lock on dvp + * to avoid a deadlock. + */ + upperdvp = union_lock_upper(dun, p); /* * do the lookup in the upper level. @@ -255,62 +342,64 @@ union_lookup(ap) * on and just return that vnode. */ if (upperdvp != NULLVP) { - FIXUP(dun, p); /* - * If we're doing `..' in the underlying filesystem, - * we must drop our lock on the union node before - * going up the tree in the lower file system--if we block - * on the lowervp lock, and that's held by someone else - * coming down the tree and who's waiting for our lock, - * we would be hosed. + * We do not have to worry about the DOTDOT case, we've + * already unlocked dvp. */ - if (cnp->cn_flags & ISDOTDOT) { - /* retain lock on underlying VP: */ - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(dvp, 0, p); - } - uerror = union_lookup1(um->um_uppervp, &upperdvp, - &uppervp, cnp); + UDEBUG(("A %p\n", upperdvp)); + + /* + * Do the lookup. We must supply a locked and referenced + * upperdvp to the function and will get a new locked and + * referenced upperdvp back with the old having been + * dereferenced. + * + * If an error is returned, uppervp will be NULLVP. If no + * error occurs, uppervp will be the locked and referenced + * return vnode or possibly NULL, depending on what is being + * requested. It is possible that the returned uppervp + * will be the same as upperdvp. + */ + uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); + UDEBUG(( + "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n", + uerror, + upperdvp, + upperdvp->v_usecount, + VOP_ISLOCKED(upperdvp), + uppervp, + (uppervp ? uppervp->v_usecount : -99), + (uppervp ? VOP_ISLOCKED(uppervp) : -99) + )); + /* * Disallow write attemps to the filesystem mounted read-only. */ if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && - (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - return (EROFS); - } - - if (cnp->cn_flags & ISDOTDOT) { - if (dun->un_uppervp == upperdvp) { - /* - * We got the underlying bugger back locked... - * now take back the union node lock. Since we - * hold the uppervp lock, we can diddle union - * locking flags at will. :) - */ - dun->un_flags |= UN_ULOCK; - } - /* - * If upperdvp got swapped out, it means we did - * some mount point magic, and we do not have - * dun->un_uppervp locked currently--so we get it - * locked here (don't set the UN_ULOCK flag). - */ - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - } - - /*if (uppervp == upperdvp) - dun->un_flags |= UN_KLOCK;*/ - - if (cnp->cn_consume != 0) { - *ap->a_vpp = uppervp; - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - error = uerror; + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { + error = EROFS; goto out; } + + /* + * Special case. If cn_consume != 0 skip out. The result + * of the lookup is transfered to our return variable. If + * an error occured we have to throw away the results. + */ + + if (cnp->cn_consume != 0) { + if ((error = uerror) == 0) { + *ap->a_vpp = uppervp; + uppervp = NULL; + } + goto out; + } + + /* + * Calculate whiteout, fall through + */ + if (uerror == ENOENT || uerror == EJUSTRETURN) { if (cnp->cn_flags & ISWHITEOUT) { iswhiteout = 1; @@ -321,8 +410,6 @@ union_lookup(ap) iswhiteout = 1; } } - } else { - uerror = ENOENT; } /* @@ -332,13 +419,14 @@ union_lookup(ap) * back from the upper layer and return the lower vnode * instead. */ + if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; - vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + UDEBUG(("B %p\n", lowerdvp)); /* - * Only do a LOOKUP on the bottom node, since + * Force only LOOKUPs on the lower node, since * we won't be making changes to it anyway. */ nameiop = cnp->cn_nameiop; @@ -347,42 +435,42 @@ union_lookup(ap) saved_cred = cnp->cn_cred; cnp->cn_cred = um->um_cred; } + /* * We shouldn't have to worry about locking interactions * between the lower layer and our union layer (w.r.t. * `..' processing) because we don't futz with lowervp * locks in the union-node instantiation code path. + * + * union_lookup1() requires lowervp to be locked on entry, + * and it will be unlocked on return. The ref count will + * not change. On return lowervp doesn't represent anything + * to us so we NULL it out. */ - lerror = union_lookup1(um->um_lowervp, &lowerdvp, - &lowervp, cnp); + VREF(lowerdvp); + vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); + if (lowerdvp == lowervp) + vrele(lowerdvp); + else + vput(lowerdvp); + lowerdvp = NULL; /* lowerdvp invalid after vput */ + if (um->um_op == UNMNT_BELOW) cnp->cn_cred = saved_cred; cnp->cn_nameiop = nameiop; - if (lowervp != lowerdvp) - VOP_UNLOCK(lowerdvp, 0, p); - if (cnp->cn_consume != 0 || lerror == EACCES) { - if (lerror == EACCES) - lowervp = NULLVP; - if (uppervp != NULLVP) { - if (uppervp == upperdvp) - vrele(uppervp); - else - vput(uppervp); - uppervp = NULLVP; + if ((error = lerror) == 0) { + *ap->a_vpp = lowervp; + lowervp = NULL; } - *ap->a_vpp = lowervp; - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - error = lerror; goto out; } } else { - lerror = ENOENT; + UDEBUG(("C %p\n", lowerdvp)); if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { - lowervp = LOWERVP(dun->un_pvp); - if (lowervp != NULLVP) { + if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) { VREF(lowervp); vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); lerror = 0; @@ -390,35 +478,27 @@ union_lookup(ap) } } - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - /* - * at this point, we have uerror and lerror indicating - * possible errors with the lookups in the upper and lower - * layers. additionally, uppervp and lowervp are (locked) - * references to existing vnodes in the upper and lower layers. + * Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp. * - * there are now three cases to consider. - * 1. if both layers returned an error, then return whatever - * error the upper layer generated. + * 1. If both layers returned an error, select the upper layer. * - * 2. if the top layer failed and the bottom layer succeeded - * then two subcases occur. - * a. the bottom vnode is not a directory, in which - * case just return a new union vnode referencing - * an empty top layer and the existing bottom layer. - * b. the bottom vnode is a directory, in which case - * create a new directory in the top-level and - * continue as in case 3. + * 2. If the upper layer faile and the bottom layer succeeded, + * two subcases occur: * - * 3. if the top layer succeeded then return a new union + * a. The bottom vnode is not a directory, in which case + * just return a new union vnode referencing an + * empty top layer and the existing bottom layer. + * + * b. The button vnode is a directory, in which case + * create a new directory in the top layer and + * and fall through to case 3. + * + * 3. If the top layer succeeded then return a new union * vnode referencing whatever the new top layer and * whatever the bottom layer returned. */ - *ap->a_vpp = NULLVP; - /* case 1. */ if ((uerror != 0) && (lerror != 0)) { error = uerror; @@ -428,59 +508,126 @@ union_lookup(ap) /* case 2. */ if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ - dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(upperdvp, 0, p); + KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL")); + /* + * oops, uppervp has a problem, we may have to shadow. + */ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); - vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p); - dun->un_flags |= UN_ULOCK; - if (uerror) { - if (lowervp != NULLVP) { - vput(lowervp); - lowervp = NULLVP; - } error = uerror; goto out; } } } - if (lowervp != NULLVP) + /* + * Must call union_allocvp with both the upper and lower vnodes + * referenced and the upper vnode locked. ap->a_vpp is returned + * referenced and locked. lowervp, uppervp, and upperdvp are + * absorbed by union_allocvp() whether it succeeds or fails. + * + * upperdvp is the parent directory of uppervp which may be + * different, depending on the path, from dvp->un_uppervp. That's + * why it is a separate argument. Note that it must be unlocked. + * + * dvp must be locked on entry to the call and will be locked on + * return. + */ + + if (uppervp && uppervp != upperdvp) + VOP_UNLOCK(uppervp, 0, p); + if (lowervp) VOP_UNLOCK(lowervp, 0, p); + if (upperdvp) + VOP_UNLOCK(upperdvp, 0, p); error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); - if (error) { - if (uppervp != NULLVP) - vput(uppervp); - if (lowervp != NULLVP) - vrele(lowervp); - } else { - if (*ap->a_vpp != dvp) - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp, 0, p); -#ifdef DIAGNOSTIC - if (cnp->cn_namelen == 1 && - cnp->cn_nameptr[0] == '.' && - *ap->a_vpp != dvp) { - panic("union_lookup returning . (%p) not same as startdir (%p)", - ap->a_vpp, dvp); - } -#endif - } + UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? ((*ap->a_vpp)->v_usecount) : -99)); + + uppervp = NULL; + upperdvp = NULL; + lowervp = NULL; + + /* + * Termination Code + * + * - put away any extra junk laying around. Note that lowervp + * (if not NULL) will never be the same as *ap->a_vp and + * neither will uppervp, because when we set that state we + * NULL-out lowervp or uppervp. On the otherhand, upperdvp + * may match uppervp or *ap->a_vpp. + * + * - relock/unlock dvp if appropriate. + */ out: - if (cnp->cn_flags & ISDOTDOT) { - if (upperdvp != NULL) - vrele(upperdvp); - if (lowerdvp != NULL) - vrele(lowerdvp); - } + if (upperdvp) { + if (upperdvp == uppervp || upperdvp == *ap->a_vpp) + vrele(upperdvp); + else + vput(upperdvp); + } + + if (uppervp) + vput(uppervp); + + if (lowervp) + vput(lowervp); + + /* + * Restore LOCKPARENT state + */ + + if (!lockparent) + cnp->cn_flags &= ~LOCKPARENT; + + UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp, + ((*ap->a_vpp) ? (*ap->a_vpp)->v_usecount : -99), + lowervp, uppervp)); + + /* + * dvp lock state, determine whether to relock dvp. dvp is expected + * to be locked on return if: + * + * - there was an error (except not EJUSTRETURN), or + * - we hit the last component and lockparent is true + * + * dvp_is_locked is the current state of the dvp lock, not counting + * the possibility that *ap->a_vpp == dvp (in which case it is locked + * anyway). Note that *ap->a_vpp == dvp only if no error occured. + */ + + if (*ap->a_vpp != dvp) { + if ((error == 0 || error == EJUSTRETURN) && + (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) { + VOP_UNLOCK(dvp, 0, p); + } + } + + /* + * Diagnostics + */ + +#ifdef DIAGNOSTIC + if (cnp->cn_namelen == 1 && + cnp->cn_nameptr[0] == '.' && + *ap->a_vpp != dvp) { + panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); + } +#endif return (error); } +/* + * union_create: + * + * a_dvp is locked on entry and remains locked on return. a_vpp is returned + * locked if no error occurs, otherwise it is garbage. + */ + static int union_create(ap) struct vop_create_args /* { @@ -491,36 +638,27 @@ union_create(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, p)) != NULL) { struct vnode *vp; struct mount *mp; - int error; - FIXUP(dun, p); - - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); + if (error == 0) { + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, + cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); } - - mp = ap->a_dvp->v_mount; - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, - NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - return (error); + union_unlock_upper(dvp, p); } - - return (EROFS); + return (error); } static int @@ -533,15 +671,23 @@ union_whiteout(ap) { struct union_node *un = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + struct vnode *uppervp; + int error = EOPNOTSUPP; - if (un->un_uppervp == NULLVP) - return (EOPNOTSUPP); - - FIXUP(un, p); - return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags)); + if ((uppervp = union_lock_upper(un, cnp->cn_proc)) != NULLVP) { + error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags); + union_unlock_upper(uppervp, cnp->cn_proc); + } + return(error); } +/* + * union_mknod: + * + * a_dvp is locked on entry and should remain locked on return. + * a_vpp is garbagre whether an error occurs or not. + */ + static int union_mknod(ap) struct vop_mknod_args /* { @@ -552,42 +698,28 @@ union_mknod(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, cnp->cn_proc)) != NULL) { struct vnode *vp; - struct mount *mp; - int error; - - FIXUP(dun, p); - - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); - } - - if (vp != NULLVP) { - mp = ap->a_dvp->v_mount; - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, - cnp, vp, NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - } else { - dun->un_flags |= UN_ULOCK; - } - return (error); + /* vp is garbage whether an error occurs or not */ + union_unlock_upper(dvp, cnp->cn_proc); } - - return (EROFS); + return (error); } +/* + * union_open: + * + * run open VOP. When opening the underlying vnode we have to mimic + * vn_open. What we *really* need to do to avoid screwups if the + * open semantics change is to call vn_open(). For example, ufs blows + * up if you open a file but do not vmio it prior to writing. + */ + static int union_open(ap) struct vop_open_args /* { @@ -603,13 +735,18 @@ union_open(ap) int mode = ap->a_mode; struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; - int error; + int error = 0; + int tvpisupper = 1; /* * If there is an existing upper vp then simply open that. + * The upper vp takes precedence over the lower vp. When opening + * a lower vp for writing copy it to the uppervp and then open the + * uppervp. + * + * At the end of this section tvp will be left locked. */ - tvp = un->un_uppervp; - if (tvp == NULLVP) { + if ((tvp = union_lock_upper(un, p)) == NULLVP) { /* * If the lower vnode is being opened for writing, then * copy the file contents to the upper vnode and open that, @@ -617,30 +754,50 @@ union_open(ap) */ tvp = un->un_lowervp; if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { - error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p); - if (error == 0) - error = VOP_OPEN(un->un_uppervp, mode, cred, p); - return (error); + int docopy = !(mode & O_TRUNC); + error = union_copyup(un, docopy, cred, p); + tvp = union_lock_upper(un, p); + } else { + un->un_openl++; + VREF(tvp); + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); + tvpisupper = 0; } - - /* - * Just open the lower vnode - */ - un->un_openl++; - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_OPEN(tvp, mode, cred, p); - VOP_UNLOCK(tvp, 0, p); - - return (error); } - FIXUP(un, p); + /* + * We are holding the correct vnode, open it + */ - error = VOP_OPEN(tvp, mode, cred, p); + if (error == 0) + error = VOP_OPEN(tvp, mode, cred, p); + /* + * Absolutely necessary or UFS will blowup + */ + if (error == 0 && vn_canvmio(tvp) == TRUE) { + error = vfs_object_create(tvp, p, cred); + } + + /* + * Release any locks held + */ + if (tvpisupper) { + if (tvp) + union_unlock_upper(tvp, p); + } else { + vput(tvp); + } return (error); } +/* + * union_close: + * + * It is unclear whether a_vp is passed locked or unlocked. Whatever + * the case we do not change it. + */ + static int union_close(ap) struct vop_close_args /* { @@ -661,7 +818,6 @@ union_close(ap) --un->un_openl; vp = un->un_lowervp; } - ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_close), ap)); } @@ -688,12 +844,12 @@ union_access(ap) struct proc *p = ap->a_p; int error = EACCES; struct vnode *vp; - struct vnode *savedvp; /* * Disallow write attempts on filesystems mounted read-only. */ - if (ap->a_mode & VWRITE && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { + if ((ap->a_mode & VWRITE) && + (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (ap->a_vp->v_type) { case VREG: case VDIR: @@ -703,19 +859,30 @@ union_access(ap) break; } } - if ((vp = un->un_uppervp) != NULLVP) { - FIXUP(un, p); + + if ((vp = union_lock_upper(un, p)) != NULLVP) { ap->a_vp = vp; - return (VCALL(vp, VOFFSET(vop_access), ap)); + error = VCALL(vp, VOFFSET(vop_access), ap); + union_unlock_upper(vp, p); + return(error); } if ((vp = un->un_lowervp) != NULLVP) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - savedvp = ap->a_vp; ap->a_vp = vp; + + /* + * Remove VWRITE from a_mode if our mount point is RW, because + * we want to allow writes and lowervp may be read-only. + */ + if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0) + ap->a_mode &= ~VWRITE; + error = VCALL(vp, VOFFSET(vop_access), ap); if (error == 0) { - struct union_mount *um = MOUNTTOUNIONMOUNT(savedvp->v_mount); + struct union_mount *um; + + um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount); if (um->um_op == UNMNT_BELOW) { ap->a_cred = um->um_cred; @@ -723,17 +890,26 @@ union_access(ap) } } VOP_UNLOCK(vp, 0, p); - if (error) - return (error); } - - return (error); + return(error); } /* * We handle getattr only to change the fsid and * track object sizes + * + * It's not clear whether VOP_GETATTR is to be + * called with the vnode locked or not. stat() calls + * it with (vp) locked, and fstat calls it with + * (vp) unlocked. + * + * Because of this we cannot use our normal locking functions + * if we do not intend to lock the main a_vp node. At the moment + * we are running without any specific locking at all, but beware + * to any programmer that care must be taken if locking is added + * to this function. */ + static int union_getattr(ap) struct vop_getattr_args /* { @@ -745,12 +921,10 @@ union_getattr(ap) { int error; struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp = un->un_uppervp; - struct proc *p = ap->a_p; + struct vnode *vp; struct vattr *vap; struct vattr va; - /* * Some programs walk the filesystem hierarchy by counting * links to directories to avoid stat'ing all the time. @@ -762,22 +936,11 @@ union_getattr(ap) vap = ap->a_vap; - vp = un->un_uppervp; - if (vp != NULLVP) { - /* - * It's not clear whether VOP_GETATTR is to be - * called with the vnode locked or not. stat() calls - * it with (vp) locked, and fstat calls it with - * (vp) unlocked. - * In the mean time, compensate here by checking - * the union_node's lock flag. - */ - if (un->un_flags & UN_LOCKED) - FIXUP(un, p); - + if ((vp = un->un_uppervp) != NULLVP) { error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); + /* XXX isn't this dangerouso without a lock? */ union_newsize(ap->a_vp, vap->va_size, VNOVAL); } @@ -794,12 +957,12 @@ union_getattr(ap) error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); + /* XXX isn't this dangerous without a lock? */ union_newsize(ap->a_vp, VNOVAL, vap->va_size); } if ((vap != ap->a_vap) && (vap->va_type == VDIR)) ap->a_vap->va_nlink += vap->va_nlink; - return (0); } @@ -815,27 +978,28 @@ union_setattr(ap) struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; struct vattr *vap = ap->a_vap; + struct vnode *uppervp; int error; /* * Disallow write attempts on filesystems mounted read-only. */ if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && - (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || - vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || - vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)) + (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || + vap->va_mode != (mode_t)VNOVAL)) { return (EROFS); + } /* * Handle case of truncating lower object to zero size, * by creating a zero length upper object. This is to * handle the case of open with O_TRUNC and O_CREAT. */ - if ((un->un_uppervp == NULLVP) && - /* assert(un->un_lowervp != NULLVP) */ - (un->un_lowervp->v_type == VREG)) { + if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) { error = union_copyup(un, (ap->a_vap->va_size != 0), - ap->a_cred, ap->a_p); + ap->a_cred, ap->a_p); if (error) return (error); } @@ -844,19 +1008,45 @@ union_setattr(ap) * Try to set attributes in upper layer, * otherwise return read-only filesystem error. */ - if (un->un_uppervp != NULLVP) { - FIXUP(un, p); + error = EROFS; + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred, ap->a_p); if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); - } else { - error = EROFS; + union_unlock_upper(uppervp, p); } - return (error); } +/* + * union_getpages: + */ + +static int +union_getpages(struct vop_getpages_args *ap) +{ + int r; + + r = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, + ap->a_count, ap->a_reqpage); + return(r); +} + +/* + * union_putpages: + */ + +static int +union_putpages(struct vop_putpages_args *ap) +{ + int r; + + r = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_sync, ap->a_rtvals); + return(r); +} + static int union_read(ap) struct vop_read_args /* { @@ -866,18 +1056,19 @@ union_read(ap) struct ucred *a_cred; } */ *ap; { - int error; + struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct vnode *uvp; + int error; - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); - error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); - if (dolock) - VOP_UNLOCK(vp, 0, p); + uvp = union_lock_other(un, p); + KASSERT(uvp != NULL, ("union_read: backing vnode missing!")); + + if (ap->a_vp->v_flag & VOBJBUF) + union_vm_coherency(ap->a_vp, ap->a_uio, 0); + + error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred); + union_unlock_other(uvp, p); /* * XXX @@ -889,7 +1080,7 @@ union_read(ap) struct union_node *un = VTOUNION(ap->a_vp); off_t cur = ap->a_uio->uio_offset; - if (vp == un->un_uppervp) { + if (uvp == un->un_uppervp) { if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } else { @@ -897,7 +1088,6 @@ union_read(ap) union_newsize(ap->a_vp, VNOVAL, cur); } } - return (error); } @@ -910,17 +1100,36 @@ union_write(ap) struct ucred *a_cred; } */ *ap; { - int error; - struct vnode *vp; struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; + struct vnode *uppervp; + int error; - vp = UPPERVP(ap->a_vp); - if (vp == NULLVP) + if ((uppervp = union_lock_upper(un, p)) == NULLVP) panic("union: missing upper layer in write"); - FIXUP(un, p); - error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); + /* + * Since our VM pages are associated with our vnode rather then + * the real vnode, and since we do not run our reads and writes + * through our own VM cache, we have a VM/VFS coherency problem. + * We solve them by invalidating or flushing the associated VM + * pages prior to allowing a normal read or write to occur. + * + * VM-backed writes (UIO_NOCOPY) have to be converted to normal + * writes because we are not cache-coherent. Normal writes need + * to be made coherent with our VM-backing store, which we do by + * first flushing any dirty VM pages associated with the write + * range, and then destroying any clean VM pages associated with + * the write range. + */ + + if (ap->a_uio->uio_segflg == UIO_NOCOPY) { + ap->a_uio->uio_segflg = UIO_SYSSPACE; + } else if (ap->a_vp->v_flag & VOBJBUF) { + union_vm_coherency(ap->a_vp, ap->a_uio, 1); + } + + error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred); /* * the size of the underlying object may be changed by the @@ -932,7 +1141,7 @@ union_write(ap) if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } - + union_unlock_upper(uppervp, p); return (error); } @@ -945,7 +1154,7 @@ union_lease(ap) int a_flag; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_lease), ap)); @@ -962,7 +1171,7 @@ union_ioctl(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); @@ -977,7 +1186,7 @@ union_poll(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_poll), ap)); @@ -1010,7 +1219,7 @@ union_mmap(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_mmap), ap)); @@ -1027,35 +1236,24 @@ union_fsync(ap) { int error = 0; struct proc *p = ap->a_p; - struct vnode *targetvp = OTHERVP(ap->a_vp); - struct union_node *un; - - if (targetvp != NULLVP) { - int dolock = (targetvp == LOWERVP(ap->a_vp)); - - un = VTOUNION(ap->a_vp); - if (dolock) - vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); - else { - un = VTOUNION(ap->a_vp); - if ((un->un_flags & UN_ULOCK) == 0 && - targetvp->v_data != NULL && - ((struct lock *)targetvp->v_data)->lk_lockholder - == curproc->p_pid && - VOP_ISLOCKED(targetvp) != 0) - return 0; /* XXX */ - - FIXUP(un, p); - } + struct vnode *targetvp; + struct union_node *un = VTOUNION(ap->a_vp); + if ((targetvp = union_lock_other(un, p)) != NULLVP) { error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); - if (dolock) - VOP_UNLOCK(targetvp, 0, p); + union_unlock_other(targetvp, p); } return (error); } +/* + * union_remove: + * + * Remove the specified cnp. The dvp and vp are passed to us locked + * and must remain locked on return. + */ + static int union_remove(ap) struct vop_remove_args /* { @@ -1068,42 +1266,40 @@ union_remove(ap) struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *uppervp; + struct vnode *upperdvp; int error; - if (dun->un_uppervp == NULLVP) + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) panic("union remove: null upper vnode"); - if (un->un_uppervp != NULLVP) { - struct vnode *dvp = dun->un_uppervp; - struct vnode *vp = un->un_uppervp; - - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - FIXUP(un, p); - un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_vp, 0, p); - + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_REMOVE(dvp, vp, cnp); + error = VOP_REMOVE(upperdvp, uppervp, cnp); #if 0 /* XXX */ if (!error) union_removed_upper(un); #endif - dun->un_flags |= UN_ULOCK; - un->un_flags |= UN_ULOCK; + union_unlock_upper(uppervp, p); } else { - FIXUP(dun, p); error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), - dun->un_uppervp, ap->a_cnp, un->un_path); + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + upperdvp, ap->a_cnp, un->un_path); } - + union_unlock_upper(upperdvp, p); return (error); } +/* + * union_link: + * + * tdvp will be locked on entry, vp will not be locked on entry. + * tdvp should remain locked on return and vp should remain unlocked + * on return. + */ + static int union_link(ap) struct vop_link_args /* { @@ -1119,43 +1315,56 @@ union_link(ap) struct vnode *tdvp; int error = 0; - if (ap->a_tdvp->v_op != ap->a_vp->v_op) { vp = ap->a_vp; } else { struct union_node *tun = VTOUNION(ap->a_vp); + if (tun->un_uppervp == NULLVP) { vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); +#if 0 if (dun->un_uppervp == tun->un_dirvp) { - dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(dun->un_uppervp, 0, p); + if (dun->un_flags & UN_ULOCK) { + dun->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(dun->un_uppervp, 0, p); + } } +#endif error = union_copyup(tun, 1, cnp->cn_cred, p); +#if 0 if (dun->un_uppervp == tun->un_dirvp) { vn_lock(dun->un_uppervp, - LK_EXCLUSIVE | LK_RETRY, p); + LK_EXCLUSIVE | LK_RETRY, p); dun->un_flags |= UN_ULOCK; } +#endif VOP_UNLOCK(ap->a_vp, 0, p); } vp = tun->un_uppervp; } - tdvp = dun->un_uppervp; - if (tdvp == NULLVP) - error = EROFS; - if (error) return (error); - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_tdvp, 0, p); + /* + * Make sure upper is locked, then unlock the union directory we were + * called with to avoid a deadlock while we are calling VOP_LINK on + * the upper (with tdvp locked and vp not locked). Our ap->a_tdvp + * is expected to be locked on return. + */ - error = VOP_LINK(tdvp, vp, cnp); + if ((tdvp = union_lock_upper(dun, p)) == NULLVP) + return (EROFS); - dun->un_flags |= UN_ULOCK; + VOP_UNLOCK(ap->a_tdvp, 0, p); /* unlock calling node */ + error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */ + /* + * We have to unlock tdvp prior to relocking our calling node in + * order to avoid a deadlock. + */ + union_unlock_upper(tdvp, p); + vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } @@ -1171,12 +1380,16 @@ union_rename(ap) } */ *ap; { int error; - struct vnode *fdvp = ap->a_fdvp; struct vnode *fvp = ap->a_fvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *tvp = ap->a_tvp; + /* + * Figure out what fdvp to pass to our upper or lower vnode. If we + * replace the fdvp, release the original one and ref the new one. + */ + if (fdvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fdvp); if (un->un_uppervp == NULLVP) { @@ -1189,30 +1402,77 @@ union_rename(ap) error = EXDEV; goto bad; } - fdvp = un->un_uppervp; VREF(fdvp); vrele(ap->a_fdvp); } + /* + * Figure out what fvp to pass to our upper or lower vnode. If we + * replace the fvp, release the original one and ref the new one. + */ + if (fvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fvp); +#if 0 + struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount); +#endif + if (un->un_uppervp == NULLVP) { - /* XXX: should do a copyup */ - error = EXDEV; - goto bad; + switch(fvp->v_type) { + case VREG: + vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_proc); + VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; + case VDIR: + /* + * XXX not yet. + * + * There is only one way to rename a directory + * based in the lowervp, and that is to copy + * the entire directory hierarchy. Otherwise + * it would not last across a reboot. + */ +#if 0 + vrele(fvp); + fvp = NULL; + vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_mkshadow(um, fdvp, + ap->a_fcnp, &un->un_uppervp); + VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_proc); + if (un->un_uppervp) + VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; +#endif + default: + error = EXDEV; + goto bad; + } } if (un->un_lowervp != NULLVP) ap->a_fcnp->cn_flags |= DOWHITEOUT; - fvp = un->un_uppervp; VREF(fvp); vrele(ap->a_fvp); } + /* + * Figure out what tdvp (destination directory) to pass to the + * lower level. If we replace it with uppervp, we need to vput the + * old one. The exclusive lock is transfered to what we will pass + * down in the VOP_RENAME and we replace uppervp with a simple + * reference. + */ + if (tdvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tdvp); + if (un->un_uppervp == NULLVP) { /* * this should never happen in normal @@ -1224,32 +1484,52 @@ union_rename(ap) goto bad; } - tdvp = un->un_uppervp; - VREF(tdvp); - un->un_flags |= UN_KLOCK; + /* + * new tdvp is a lock and reference on uppervp, put away + * the old tdvp. + */ + tdvp = union_lock_upper(un, ap->a_tcnp->cn_proc); vput(ap->a_tdvp); } + /* + * Figure out what tvp (destination file) to pass to the + * lower level. + * + * If the uppervp file does not exist put away the (wrong) + * file and change tvp to NULL. + */ + if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tvp); - tvp = un->un_uppervp; - if (tvp != NULLVP) { - VREF(tvp); - un->un_flags |= UN_KLOCK; - } + tvp = union_lock_upper(un, ap->a_tcnp->cn_proc); vput(ap->a_tvp); + /* note: tvp may be NULL */ } + /* + * VOP_RENAME releases/vputs prior to returning, so we have no + * cleanup to do. + */ + return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); + /* + * Error. We still have to release / vput the various elements. + */ + bad: vrele(fdvp); - vrele(fvp); + if (fvp) + vrele(fvp); vput(tdvp); - if (tvp != NULLVP) - vput(tvp); - + if (tvp != NULLVP) { + if (tvp != tdvp) + vput(tvp); + else + vrele(tvp); + } return (error); } @@ -1263,34 +1543,26 @@ union_mkdir(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((upperdvp = union_lock_upper(dun, p)) != NULLVP) { struct vnode *vp; - int error; - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); + error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap); + union_unlock_upper(upperdvp, p); + + if (error == 0) { + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, + ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); } - - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp, - NULLVP, cnp, vp, NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - - return (error); } - - return (EROFS); + return (error); } static int @@ -1305,42 +1577,34 @@ union_rmdir(ap) struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + struct vnode *uppervp; int error; - if (dun->un_uppervp == NULLVP) + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) panic("union rmdir: null upper vnode"); - if (un->un_uppervp != NULLVP) { - struct vnode *dvp = dun->un_uppervp; - struct vnode *vp = un->un_uppervp; - - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - FIXUP(un, p); - un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_vp, 0, p); - + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_RMDIR(dvp, vp, ap->a_cnp); -#if 0 - /* XXX */ - if (!error) - union_removed_upper(un); -#endif - dun->un_flags |= UN_ULOCK; - un->un_flags |= UN_ULOCK; + error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp); + union_unlock_upper(uppervp, p); } else { - FIXUP(dun, p); error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), - dun->un_uppervp, ap->a_cnp, un->un_path); + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + dun->un_uppervp, ap->a_cnp, un->un_path); } - + union_unlock_upper(upperdvp, p); return (error); } +/* + * union_symlink: + * + * dvp is locked on entry and remains locked on return. a_vpp is garbage + * (unused). + */ + static int union_symlink(ap) struct vop_symlink_args /* { @@ -1352,24 +1616,20 @@ union_symlink(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, p)) != NULLVP) { struct vnode *vp; - int error; - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target); - dun->un_flags |= UN_ULOCK; + /* vp is garbage whether an error occurs or not */ *ap->a_vpp = NULLVP; - return (error); + union_unlock_upper(dvp, p); } - - return (EROFS); + return (error); } /* @@ -1391,15 +1651,16 @@ union_readdir(ap) } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *uvp = un->un_uppervp; struct proc *p = ap->a_uio->uio_procp; + struct vnode *uvp; + int error = 0; - if (uvp == NULLVP) - return (0); - - FIXUP(un, p); - ap->a_vp = uvp; - return (VCALL(uvp, VOFFSET(vop_readdir), ap)); + if ((uvp = union_lock_upper(un, p)) != NULLVP) { + ap->a_vp = uvp; + error = VCALL(uvp, VOFFSET(vop_readdir), ap); + union_unlock_upper(uvp, p); + } + return(error); } static int @@ -1411,23 +1672,28 @@ union_readlink(ap) } */ *ap; { int error; + struct union_node *un = VTOUNION(ap->a_vp); struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_readlink: backing vnode missing!")); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_readlink), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + union_unlock_other(vp, p); return (error); } +/* + * union_abortop: + * + * dvp is locked on entry and left locked on return + * + */ + static int union_abortop(ap) struct vop_abortop_args /* { @@ -1435,28 +1701,35 @@ union_abortop(ap) struct componentname *a_cnp; } */ *ap; { - int error; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; - struct vnode *vp = OTHERVP(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_dvp); - int islocked = un->un_flags & UN_LOCKED; - int dolock = (vp == LOWERVP(ap->a_dvp)); + int islocked = VOP_ISLOCKED(ap->a_dvp); + struct vnode *vp; + int error; if (islocked) { - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_dvp), p); + vp = union_lock_other(un, p); + } else { + vp = OTHERVP(ap->a_dvp); } + KASSERT(vp != NULL, ("union_abortop: backing vnode missing!")); + ap->a_dvp = vp; error = VCALL(vp, VOFFSET(vop_abortop), ap); - if (islocked && dolock) - VOP_UNLOCK(vp, 0, p); + + if (islocked) + union_unlock_other(vp, p); return (error); } +/* + * union_inactive: + * + * Called with the vnode locked. We are expected to unlock the vnode. + */ + static int union_inactive(ap) struct vop_inactive_args /* { @@ -1485,10 +1758,17 @@ union_inactive(ap) if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); - free(un->un_dircache, M_TEMP); + free (un->un_dircache, M_TEMP); un->un_dircache = 0; } +#if 0 + if ((un->un_flags & UN_ULOCK) && un->un_uppervp) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, 0, p); + } +#endif + VOP_UNLOCK(vp, 0, p); if ((un->un_flags & UN_CACHED) == 0) @@ -1503,7 +1783,6 @@ union_reclaim(ap) struct vnode *a_vp; } */ *ap; { - union_freevp(ap->a_vp); return (0); @@ -1513,75 +1792,47 @@ static int union_lock(ap) struct vop_lock_args *ap; { +#if 0 struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; int flags = ap->a_flags; struct union_node *un; +#endif int error; - vop_nolock(ap); - /* - * Need to do real lockmgr-style locking here. - * in the mean time, draining won't work quite right, - * which could lead to a few race conditions. - * the following test was here, but is not quite right, we - * still need to take the lock: - if ((flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - */ - flags &= ~LK_INTERLOCK; - -start: + error = vop_stdlock(ap); +#if 0 un = VTOUNION(vp); - if (un->un_uppervp != NULLVP) { - if (((un->un_flags & UN_ULOCK) == 0) && - (vp->v_usecount != 0)) { - error = vn_lock(un->un_uppervp, flags, p); - if (error) - return (error); - un->un_flags |= UN_ULOCK; + if (error == 0) { + /* + * Lock the upper if it exists and this is an exclusive lock + * request. + */ + if (un->un_uppervp != NULLVP && + (flags & LK_TYPE_MASK) == LK_EXCLUSIVE) { + if ((un->un_flags & UN_ULOCK) == 0 && vp->v_usecount) { + error = vn_lock(un->un_uppervp, flags, p); + if (error) { + struct vop_unlock_args uap = { 0 }; + uap.a_vp = ap->a_vp; + uap.a_flags = ap->a_flags; + uap.a_p = ap->a_p; + vop_stdunlock(&uap); + return (error); + } + un->un_flags |= UN_ULOCK; + } } -#ifdef DIAGNOSTIC - if (un->un_flags & UN_KLOCK) { - vprint("dangling upper lock", vp); - panic("union: dangling upper lock"); - } -#endif } - - if (un->un_flags & UN_LOCKED) { -#ifdef DIAGNOSTIC - if (curproc && un->un_pid == curproc->p_pid && - un->un_pid > -1 && curproc->p_pid > -1) - panic("union: locking against myself"); #endif - un->un_flags |= UN_WANT; - tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0); - goto start; - } - -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif - - un->un_flags |= UN_LOCKED; - return (0); + return (error); } /* - * When operations want to vput() a union node yet retain a lock on - * the upper vnode (say, to do some further operations like link(), - * mkdir(), ...), they set UN_KLOCK on the union node, then call - * vput() which calls VOP_UNLOCK() and comes here. union_unlock() - * unlocks the union node (leaving the upper vnode alone), clears the - * KLOCK flag, and then returns to vput(). The caller then does whatever - * is left to do with the upper vnode, and ensures that it gets unlocked. + * union_unlock: * - * If UN_KLOCK isn't set, then the upper vnode is unlocked here. + * Unlock our union node. This also unlocks uppervp. */ static int union_unlock(ap) @@ -1592,36 +1843,38 @@ union_unlock(ap) } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); - struct proc *p = ap->a_p; + int error; -#ifdef DIAGNOSTIC - if ((un->un_flags & UN_LOCKED) == 0) - panic("union: unlock unlocked node"); - if (curproc && un->un_pid != curproc->p_pid && - curproc->p_pid > -1 && un->un_pid > -1) - panic("union: unlocking other process's union node"); -#endif + KASSERT((un->un_uppervp == NULL || un->un_uppervp->v_usecount > 0), ("uppervp usecount is 0")); - un->un_flags &= ~UN_LOCKED; + error = vop_stdunlock(ap); +#if 0 - if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK) - VOP_UNLOCK(un->un_uppervp, 0, p); + /* + * If no exclusive locks remain and we are holding an uppervp lock, + * remove the uppervp lock. + */ - un->un_flags &= ~(UN_ULOCK|UN_KLOCK); - - if (un->un_flags & UN_WANT) { - un->un_flags &= ~UN_WANT; - wakeup((caddr_t) &un->un_flags); + if ((un->un_flags & UN_ULOCK) && + lockstatus(&un->un_lock) != LK_EXCLUSIVE) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, LK_EXCLUSIVE, p); } - -#ifdef DIAGNOSTIC - un->un_pid = 0; #endif - vop_nounlock(ap); - - return (0); + return(error); } +/* + * union_bmap: + * + * There isn't much we can do. We cannot push through to the real vnode + * to get to the underlying device because this will bypass data + * cached by the real vnode. + * + * For some reason we cannot return the 'real' vnode either, it seems + * to blow up memory maps. + */ + static int union_bmap(ap) struct vop_bmap_args /* { @@ -1633,21 +1886,7 @@ union_bmap(ap) int *a_runb; } */ *ap; { - int error; - struct proc *p = curproc; /* XXX */ - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); - - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); - ap->a_vp = vp; - error = VCALL(vp, VOFFSET(vop_bmap), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); - - return (error); + return(EOPNOTSUPP); } static int @@ -1668,16 +1907,6 @@ union_print(ap) return (0); } -static int -union_islocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0); -} - static int union_pathconf(ap) struct vop_pathconf_args /* { @@ -1688,17 +1917,15 @@ union_pathconf(ap) { int error; struct proc *p = curproc; /* XXX */ - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!")); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_pathconf), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + union_unlock_other(vp, p); return (error); } @@ -1722,6 +1949,8 @@ union_advlock(ap) /* * XXX - vop_strategy must be hand coded because it has no + * YYY - and it is not coherent with anything + * * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ @@ -1742,7 +1971,6 @@ union_strategy(ap) (othervp == LOWERVP(bp->b_vp))) panic("union_strategy: writing to lowervp"); #endif - return (VOP_STRATEGY(othervp, bp)); } @@ -1759,10 +1987,12 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_close_desc, (vop_t *) union_close }, { &vop_create_desc, (vop_t *) union_create }, { &vop_fsync_desc, (vop_t *) union_fsync }, + { &vop_getpages_desc, (vop_t *) union_getpages }, + { &vop_putpages_desc, (vop_t *) union_putpages }, { &vop_getattr_desc, (vop_t *) union_getattr }, { &vop_inactive_desc, (vop_t *) union_inactive }, { &vop_ioctl_desc, (vop_t *) union_ioctl }, - { &vop_islocked_desc, (vop_t *) union_islocked }, + { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_lease_desc, (vop_t *) union_lease }, { &vop_link_desc, (vop_t *) union_link }, { &vop_lock_desc, (vop_t *) union_lock }, diff --git a/sys/miscfs/union/union.h b/sys/miscfs/union/union.h index 1fc5f996cab0..02a897101472 100644 --- a/sys/miscfs/union/union.h +++ b/sys/miscfs/union/union.h @@ -49,8 +49,8 @@ struct union_args { #define UNMNT_OPMASK 0x0003 struct union_mount { - struct vnode *um_uppervp; - struct vnode *um_lowervp; + struct vnode *um_uppervp; /* UN_ULOCK holds locking state */ + struct vnode *um_lowervp; /* Left unlocked */ struct ucred *um_cred; /* Credentials of user calling mount */ int um_cmode; /* cmask from mount process */ int um_op; /* Operation mode */ @@ -58,6 +58,10 @@ struct union_mount { #ifdef KERNEL +#ifndef DIAGNOSTIC +#define DIAGNOSTIC +#endif + /* * DEFDIRMODE is the mode bits used to create a shadow directory. */ @@ -67,9 +71,14 @@ struct union_mount { #define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6)) /* - * A cache of vnode references + * A cache of vnode references (hangs off v_data) + * + * Placing un_lock as the first elements theoretically allows us to + * use the vop_stdlock functions. However, we need to make sure of + * certain side effects so we will still punch in our own code. */ struct union_node { + struct lock un_lock; LIST_ENTRY(union_node) un_cache; /* Hash chain */ struct vnode *un_vnode; /* Back pointer */ struct vnode *un_uppervp; /* overlaying object */ @@ -79,6 +88,7 @@ struct union_node { char *un_path; /* saved component name */ int un_hash; /* saved un_path hash value */ int un_openl; /* # of opens on lowervp */ + int un_exclcnt; /* exclusive count */ unsigned int un_flags; struct vnode **un_dircache; /* cached union stack */ off_t un_uppersz; /* size of upper object */ @@ -88,14 +98,25 @@ struct union_node { #endif }; -#define UN_WANT 0x01 -#define UN_LOCKED 0x02 -#define UN_ULOCK 0x04 /* Upper node is locked */ -#define UN_KLOCK 0x08 /* Keep upper node locked on vput */ -#define UN_CACHED 0x10 /* In union cache */ +/* + * XXX UN_ULOCK - indicates that the uppervp is locked + * + * UN_CACHED - node is in the union cache + */ + +/*#define UN_ULOCK 0x04*/ /* Upper node is locked */ +#define UN_CACHED 0x10 /* In union cache */ + +/* + * Hash table locking flags + */ + +#define UNVP_WANT 0x01 +#define UNVP_LOCKED 0x02 extern int union_allocvp __P((struct vnode **, struct mount *, - struct vnode *, struct vnode *, + struct vnode *, + struct vnode *, struct componentname *, struct vnode *, struct vnode *, int)); extern int union_freevp __P((struct vnode *)); @@ -113,6 +134,7 @@ extern int union_cn_close __P((struct vnode *, int, struct ucred *, extern void union_removed_upper __P((struct union_node *un)); extern struct vnode *union_lowervp __P((struct vnode *)); extern void union_newsize __P((struct vnode *, off_t, off_t)); +extern void union_vm_coherency __P((struct vnode *, struct uio *, int)); extern int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *)); @@ -124,6 +146,11 @@ extern int (*union_dircheckp) __P((struct proc *, struct vnode **, #define UPPERVP(vp) (VTOUNION(vp)->un_uppervp) #define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp)) +#define UDEBUG(x) if (uniondebug) printf x +#define UDEBUG_ENABLED 1 + extern vop_t **union_vnodeop_p; extern struct vfsops union_vfsops; +extern int uniondebug; + #endif /* KERNEL */ diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c index ed09a65fbb4a..c03153c4894b 100644 --- a/sys/miscfs/union/union_subr.c +++ b/sys/miscfs/union/union_subr.c @@ -53,6 +53,7 @@ #include #include /* for vnode_pager_setsize */ #include +#include /* for vm cache coherency */ #include #include @@ -97,7 +98,7 @@ union_init() for (i = 0; i < NHASH; i++) LIST_INIT(&unhead[i]); - bzero((caddr_t) unvplock, sizeof(unvplock)); + bzero((caddr_t)unvplock, sizeof(unvplock)); return (0); } @@ -105,15 +106,12 @@ static int union_list_lock(ix) int ix; { - - if (unvplock[ix] & UN_LOCKED) { - unvplock[ix] |= UN_WANT; + if (unvplock[ix] & UNVP_LOCKED) { + unvplock[ix] |= UNVP_WANT; (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); return (1); } - - unvplock[ix] |= UN_LOCKED; - + unvplock[ix] |= UNVP_LOCKED; return (0); } @@ -121,15 +119,25 @@ static void union_list_unlock(ix) int ix; { + unvplock[ix] &= ~UNVP_LOCKED; - unvplock[ix] &= ~UN_LOCKED; - - if (unvplock[ix] & UN_WANT) { - unvplock[ix] &= ~UN_WANT; + if (unvplock[ix] & UNVP_WANT) { + unvplock[ix] &= ~UNVP_WANT; wakeup((caddr_t) &unvplock[ix]); } } +/* + * union_updatevp: + * + * The uppervp, if not NULL, must be referenced and not locked by us + * The lowervp, if not NULL, must be referenced. + * + * if uppervp and lowervp match pointers already installed, nothing + * happens. The passed vp's (when matching) are not adjusted. This + * routine may only be called by union_newupper() and union_newlower(). + */ + static void union_updatevp(un, uppervp, lowervp) struct union_node *un; @@ -153,9 +161,10 @@ union_updatevp(un, uppervp, lowervp) uhash = nhash; } - if (lhash != uhash) + if (lhash != uhash) { while (union_list_lock(lhash)) continue; + } while (union_list_lock(uhash)) continue; @@ -177,10 +186,6 @@ union_updatevp(un, uppervp, lowervp) free(un->un_path, M_TEMP); un->un_path = 0; } - if (un->un_dirvp) { - vrele(un->un_dirvp); - un->un_dirvp = NULLVP; - } } un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; @@ -189,7 +194,6 @@ union_updatevp(un, uppervp, lowervp) if (un->un_uppervp != uppervp) { if (un->un_uppervp) vrele(un->un_uppervp); - un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; } @@ -202,21 +206,30 @@ union_updatevp(un, uppervp, lowervp) union_list_unlock(nhash); } +/* + * Set a new lowervp. The passed lowervp must be referenced and will be + * stored in the vp in a referenced state. + */ + static void union_newlower(un, lowervp) struct union_node *un; struct vnode *lowervp; { - union_updatevp(un, un->un_uppervp, lowervp); } +/* + * Set a new uppervp. The passed uppervp must be locked and will be + * stored in the vp in a locked state. The caller should not unlock + * uppervp. + */ + static void union_newupper(un, uppervp) struct union_node *un; struct vnode *uppervp; { - union_updatevp(un, uppervp, un->un_lowervp); } @@ -253,27 +266,51 @@ union_newsize(vp, uppersz, lowersz) } if (sz != VNOVAL) { -#ifdef DEBUG - printf("union: %s size now %ld\n", - uppersz != VNOVAL ? "upper" : "lower", (long) sz); -#endif + UDEBUG(("union: %s size now %ld\n", + (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); vnode_pager_setsize(vp, sz); } } /* - * allocate a union_node/vnode pair. the vnode is - * referenced and locked. the new vnode is returned - * via (vpp). (mp) is the mountpoint of the union filesystem, - * (dvp) is the parent directory where the upper layer object - * should exist (but doesn't) and (cnp) is the componentname - * information which is partially copied to allow the upper - * layer object to be created at a later time. (uppervp) - * and (lowervp) reference the upper and lower layer objects - * being mapped. either, but not both, can be nil. - * if supplied, (uppervp) is locked. - * the reference is either maintained in the new union_node - * object which is allocated, or they are vrele'd. + * union_allocvp: allocate a union_node and associate it with a + * parent union_node and one or two vnodes. + * + * vpp Holds the returned vnode locked and referenced if no + * error occurs. + * + * mp Holds the mount point. mp may or may not be busied. + * allocvp makes no changes to mp. + * + * dvp Holds the parent union_node to the one we wish to create. + * XXX may only be used to traverse an uncopied lowervp-based + * tree? XXX + * + * dvp may or may not be locked. allocvp makes no changes + * to dvp. + * + * upperdvp Holds the parent vnode to uppervp, generally used along + * with path component information to create a shadow of + * lowervp when uppervp does not exist. + * + * upperdvp is referenced but unlocked on entry, and will be + * dereferenced on return. + * + * uppervp Holds the new uppervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * lowervp Holds the new lowervp vnode to be stored in the + * union_node we are allocating. uppervp is referenced but + * not locked, and will be dereferenced on return. + * + * cnp Holds path component information to be coupled with + * lowervp and upperdvp to allow unionfs to create an uppervp + * later on. Only used if lowervp is valid. The conents + * of cnp is only valid for the duration of the call. + * + * docache Determine whether this node should be entered in the + * cache or whether it should be destroyed as soon as possible. * * all union_nodes are maintained on a singly-linked * list. new nodes are only allocated when they cannot @@ -292,12 +329,13 @@ union_newsize(vp, uppersz, lowersz) * zero references to it and so it needs to removed from * the vnode free list. */ + int -union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) +union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) struct vnode **vpp; struct mount *mp; - struct vnode *undvp; /* parent union vnode */ - struct vnode *dvp; /* may be null */ + struct vnode *dvp; /* parent union vnode */ + struct vnode *upperdvp; /* parent vnode of uppervp */ struct componentname *cnp; /* may be null */ struct vnode *uppervp; /* may be null */ struct vnode *lowervp; /* may be null */ @@ -307,6 +345,7 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) struct union_node *un = 0; struct vnode *xlowervp = NULLVP; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + struct proc *p = (cnp) ? cnp->cn_proc : curproc; int hash = 0; int vflag; int try; @@ -382,65 +421,76 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) if (un) { /* - * Obtain a lock on the union_node. - * uppervp is locked, though un->un_uppervp - * may not be. this doesn't break the locking - * hierarchy since in the case that un->un_uppervp - * is not yet locked it will be vrele'd and replaced - * with uppervp. + * Obtain a lock on the union_node. Everything is unlocked + * except for dvp, so check that case. If they match, our + * new un is already locked. Otherwise we have to lock our + * new un. + * + * A potential deadlock situation occurs when we are holding + * one lock while trying to get another. We must follow + * strict ordering rules to avoid it. We try to locate dvp + * by scanning up from un_vnode, since the most likely + * scenario is un being under dvp. */ - if ((dvp != NULLVP) && (uppervp == dvp)) { - /* - * Access ``.'', so (un) will already - * be locked. Since this process has - * the lock on (uppervp) no other - * process can hold the lock on (un). - */ -#ifdef DIAGNOSTIC - if ((un->un_flags & UN_LOCKED) == 0) - panic("union: . not locked"); - else if (curproc && un->un_pid != curproc->p_pid && - un->un_pid > -1 && curproc->p_pid > -1) - panic("union: allocvp not lock owner"); -#endif - } else { - if (un->un_flags & UN_LOCKED) { - vrele(UNIONTOV(un)); - un->un_flags |= UN_WANT; - (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); - goto loop; - } - un->un_flags |= UN_LOCKED; + if (dvp && un->un_vnode != dvp) { + struct vnode *scan = un->un_vnode; -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif + do { + scan = VTOUNION(scan)->un_pvp; + } while (scan && scan->v_tag == VT_UNION && scan != dvp); + if (scan != dvp) { + /* + * our new un is above dvp (we never saw dvp + * while moving up the tree). + */ + VREF(dvp); + VOP_UNLOCK(dvp, 0, p); + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + vrele(dvp); + } else { + /* + * our new un is under dvp + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } + } else if (dvp == NULLVP) { + /* + * dvp is NULL, we need to lock un. + */ + error = vn_lock(un->un_vnode, LK_EXCLUSIVE, p); + } else { + /* + * dvp == un->un_vnode, we are already locked. + */ + error = 0; } - /* - * At this point, the union_node is locked, - * un->un_uppervp may not be locked, and uppervp - * is locked or nil. - */ + if (error) + goto loop; /* - * Save information about the upper layer. + * At this point, the union_node is locked and referenced. + * + * uppervp is locked and referenced or NULL, lowervp is + * referenced or NULL. */ + UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", + un, un->un_vnode, un->un_uppervp, + (un->un_uppervp ? un->un_uppervp->v_usecount : -99), + uppervp, + (uppervp ? uppervp->v_usecount : -99) + )); + if (uppervp != un->un_uppervp) { + KASSERT(uppervp == NULL || uppervp->v_usecount > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp->v_usecount)); union_newupper(un, uppervp); } else if (uppervp) { + KASSERT(uppervp->v_usecount > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp->v_usecount)); vrele(uppervp); } - if (un->un_uppervp) { - un->un_flags |= UN_ULOCK; - un->un_flags &= ~UN_KLOCK; - } - /* * Save information about the lower layer. * This needs to keep track of pathname @@ -456,12 +506,22 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); - un->un_dirvp = dvp; } } else if (lowervp) { vrele(lowervp); } + + /* + * and upperdvp + */ + if (upperdvp != un->un_dirvp) { + if (un->un_dirvp) + vrele(un->un_dirvp); + un->un_dirvp = upperdvp; + } else if (upperdvp) { + vrele(upperdvp); + } + *vpp = UNIONTOV(un); return (0); } @@ -477,17 +537,22 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) goto loop; } + /* + * Create new node rather then replace old node + */ + error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); if (error) { - if (uppervp) { - if (dvp == uppervp) - vrele(uppervp); - else - vput(uppervp); - } + /* + * If an error occurs clear out vnodes. + */ if (lowervp) vrele(lowervp); - + if (uppervp) + vrele(uppervp); + if (upperdvp) + vrele(upperdvp); + *vpp = NULL; goto out; } @@ -499,37 +564,34 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) (*vpp)->v_type = uppervp->v_type; else (*vpp)->v_type = lowervp->v_type; + un = VTOUNION(*vpp); + bzero(un, sizeof(*un)); + + lockinit(&un->un_lock, PVFS, "unlock", 0, 0); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + un->un_vnode = *vpp; un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; - un->un_pvp = undvp; - if (undvp != NULLVP) - VREF(undvp); + un->un_dirvp = upperdvp; + un->un_pvp = dvp; /* only parent dir in new allocation */ + if (dvp != NULLVP) + VREF(dvp); un->un_dircache = 0; un->un_openl = 0; - un->un_flags = UN_LOCKED; - if (un->un_uppervp) - un->un_flags |= UN_ULOCK; -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif + if (cnp && (lowervp != NULLVP)) { un->un_hash = cnp->cn_hash; un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); - un->un_dirvp = dvp; } else { un->un_hash = 0; un->un_path = 0; - un->un_dirvp = 0; + un->un_dirvp = NULL; } if (docache) { @@ -537,10 +599,10 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) un->un_flags |= UN_CACHED; } +out: if (xlowervp) vrele(xlowervp); -out: if (docache) union_list_unlock(hash); @@ -558,16 +620,26 @@ union_freevp(vp) LIST_REMOVE(un, un_cache); } - if (un->un_pvp != NULLVP) + if (un->un_pvp != NULLVP) { vrele(un->un_pvp); - if (un->un_uppervp != NULLVP) + un->un_pvp = NULL; + } + if (un->un_uppervp != NULLVP) { vrele(un->un_uppervp); - if (un->un_lowervp != NULLVP) + un->un_uppervp = NULL; + } + if (un->un_lowervp != NULLVP) { vrele(un->un_lowervp); - if (un->un_dirvp != NULLVP) + un->un_lowervp = NULL; + } + if (un->un_dirvp != NULLVP) { vrele(un->un_dirvp); - if (un->un_path) + un->un_dirvp = NULL; + } + if (un->un_path) { free(un->un_path, M_TEMP); + un->un_path = NULL; + } FREE(vp->v_data, M_TEMP); vp->v_data = 0; @@ -579,6 +651,9 @@ union_freevp(vp) * copyfile. copy the vnode (fvp) to the vnode (tvp) * using a sequence of reads and writes. both (fvp) * and (tvp) are locked on entry and exit. + * + * fvp and tvp are both exclusive locked on call, but their refcount's + * haven't been bumped at all. */ static int union_copyfile(fvp, tvp, cred, p) @@ -600,48 +675,62 @@ union_copyfile(fvp, tvp, cred, p) * give up at the first sign of trouble. */ + bzero(&uio, sizeof(uio)); + uio.uio_procp = p; uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; - VOP_UNLOCK(fvp, 0, p); /* XXX */ VOP_LEASE(fvp, p, cred, LEASE_READ); - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - VOP_UNLOCK(tvp, 0, p); /* XXX */ VOP_LEASE(tvp, p, cred, LEASE_WRITE); - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); /* ugly loop follows... */ do { off_t offset = uio.uio_offset; + int count; + int bufoffset; + /* + * Setup for big read + */ uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_READ; - error = VOP_READ(fvp, &uio, 0, cred); - if (error == 0) { + if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) + break; + + /* + * Get bytes read, handle read eof case and setup for + * write loop + */ + if ((count = MAXBSIZE - uio.uio_resid) == 0) + break; + bufoffset = 0; + + /* + * Write until an error occurs or our buffer has been + * exhausted, then update the offset for the next read. + */ + while (bufoffset < count) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; - iov.iov_base = buf; - iov.iov_len = MAXBSIZE - uio.uio_resid; - uio.uio_offset = offset; + iov.iov_base = buf + bufoffset; + iov.iov_len = count - bufoffset; + uio.uio_offset = offset + bufoffset; uio.uio_rw = UIO_WRITE; uio.uio_resid = iov.iov_len; - if (uio.uio_resid == 0) + if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) break; - - do { - error = VOP_WRITE(tvp, &uio, 0, cred); - } while ((uio.uio_resid > 0) && (error == 0)); + bufoffset += (count - bufoffset) - uio.uio_resid; } - + uio.uio_offset = offset + bufoffset; } while (error == 0); free(buf, M_TEMP); @@ -649,9 +738,10 @@ union_copyfile(fvp, tvp, cred, p) } /* - * (un) is assumed to be locked on entry and remains - * locked on exit. + * + * un's vnode is assumed to be locked on entry and remains locked on exit. */ + int union_copyup(un, docopy, cred, p) struct union_node *un; @@ -676,12 +766,9 @@ union_copyup(un, docopy, cred, p) if (error) return (error); - /* at this point, uppervp is locked */ - union_newupper(un, uvp); - un->un_flags |= UN_ULOCK; - lvp = un->un_lowervp; + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); if (docopy) { /* * XX - should not ignore errors @@ -689,23 +776,22 @@ union_copyup(un, docopy, cred, p) */ vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(lvp, FREAD, cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, cred); if (error == 0) { error = union_copyfile(lvp, uvp, cred, p); VOP_UNLOCK(lvp, 0, p); (void) VOP_CLOSE(lvp, FREAD, cred, p); } -#ifdef DEBUG if (error == 0) - uprintf("union: copied up %s\n", un->un_path); -#endif + UDEBUG(("union: copied up %s\n", un->un_path)); } - un->un_flags &= ~UN_ULOCK; VOP_UNLOCK(uvp, 0, p); + union_newupper(un, uvp); + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); union_vn_close(uvp, FWRITE, cred, p); - vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; - + KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); /* * Subsequent IOs will go to the top layer, so * call close on the lower vnode and open on the @@ -721,6 +807,8 @@ union_copyup(un, docopy, cred, p) (void) VOP_CLOSE(lvp, FREAD, cred, p); (void) VOP_OPEN(uvp, FREAD, cred, p); } + if (vn_canvmio(uvp) == TRUE) + error = vfs_object_create(uvp, p, cred); un->un_openl = 0; } @@ -728,6 +816,17 @@ union_copyup(un, docopy, cred, p) } +/* + * union_relookup: + * + * dvp should be locked on entry and will be locked on return. No + * net change in the ref count will occur. + * + * If an error is returned, *vpp will be invalid, otherwise it + * will hold a locked, referenced vnode. If *vpp == dvp then + * remember that only one exclusive lock is held. + */ + static int union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) struct union_mount *um; @@ -757,7 +856,7 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) cn->cn_pnbuf[cn->cn_namelen] = '\0'; cn->cn_nameiop = CREATE; - cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn->cn_proc = cnp->cn_proc; if (um->um_op == UNMNT_ABOVE) cn->cn_cred = cnp->cn_cred; @@ -768,15 +867,30 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) cn->cn_consume = cnp->cn_consume; VREF(dvp); - error = relookup(dvp, vpp, cn); - if (!error) - vrele(dvp); - else { + VOP_UNLOCK(dvp, 0, cnp->cn_proc); + + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ + + if ((error = relookup(dvp, vpp, cn)) != 0) { zfree(namei_zone, cn->cn_pnbuf); cn->cn_pnbuf = NULL; + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_proc); + return(error); } - return (error); + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + * + * We want to return with dvp as it was passed to us, so we get + * rid of our reference. + */ + vrele(dvp); + return (0); } /* @@ -785,11 +899,11 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) * * (um) points to the union mount structure for access to the * the mounting process's credentials. - * (dvp) is the directory in which to create the shadow directory. - * it is unlocked on entry and exit. + * (dvp) is the directory in which to create the shadow directory, + * it is locked (but not ref'd) on entry and return. * (cnp) is the componentname to be created. * (vpp) is the returned newly created shadow directory, which - * is returned locked. + * is returned locked and ref'd */ int union_mkshadow(um, dvp, cnp, vpp) @@ -810,8 +924,10 @@ union_mkshadow(um, dvp, cnp, vpp) if (*vpp) { VOP_ABORTOP(dvp, &cn); - VOP_UNLOCK(dvp, 0, p); - vrele(*vpp); + if (dvp == *vpp) + vrele(*vpp); + else + vput(*vpp); *vpp = NULLVP; return (EEXIST); } @@ -832,7 +948,7 @@ union_mkshadow(um, dvp, cnp, vpp) VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); error = VOP_MKDIR(dvp, vpp, &cn, &va); - vput(dvp); + /*vput(dvp);*/ return (error); } @@ -842,7 +958,7 @@ union_mkshadow(um, dvp, cnp, vpp) * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the whiteout. - * it is locked on entry and exit. + * it is locked on entry and return. * (cnp) is the componentname to be created. */ int @@ -857,17 +973,16 @@ union_mkwhiteout(um, dvp, cnp, path) struct vnode *wvp; struct componentname cn; - VOP_UNLOCK(dvp, 0, p); error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); - if (error) { - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) return (error); - } if (wvp) { VOP_ABORTOP(dvp, &cn); - vrele(dvp); - vrele(wvp); + if (wvp == dvp) + vrele(wvp); + else + vput(wvp); return (EEXIST); } @@ -877,9 +992,6 @@ union_mkwhiteout(um, dvp, cnp, path) error = VOP_WHITEOUT(dvp, &cn, CREATE); if (error) VOP_ABORTOP(dvp, &cn); - - vrele(dvp); - return (error); } @@ -890,6 +1002,12 @@ union_mkwhiteout(um, dvp, cnp, path) * the problem with calling namei is that a) it locks too many * things, and b) it doesn't start at the "right" directory, * whereas relookup is told where to start. + * + * On entry, the vnode associated with un is locked. It remains locked + * on return. + * + * If no error occurs, *vpp contains a locked referenced vnode for your + * use. If an error occurs *vpp iis undefined. */ static int union_vn_create(vpp, un, p) @@ -921,26 +1039,34 @@ union_vn_create(vpp, un, p) cn.cn_pnbuf = zalloc(namei_zone); bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); cn.cn_nameiop = CREATE; - cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn.cn_proc = p; cn.cn_cred = p->p_ucred; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_hash = un->un_hash; cn.cn_consume = 0; + /* + * Pass dvp unlocked and referenced on call to relookup(). + * + * If an error occurs, dvp will be returned unlocked and dereferenced. + */ VREF(un->un_dirvp); error = relookup(un->un_dirvp, &vp, &cn); if (error) return (error); - vrele(un->un_dirvp); + /* + * If no error occurs, dvp will be returned locked with the reference + * left as before, and vpp will be returned referenced and locked. + */ if (vp) { + vput(un->un_dirvp); VOP_ABORTOP(un->un_dirvp, &cn); - if (un->un_dirvp == vp) - vrele(un->un_dirvp); + if (vp == un->un_dirvp) + vrele(vp); else - vput(un->un_dirvp); - vrele(vp); + vput(vp); return (EEXIST); } @@ -964,11 +1090,12 @@ union_vn_create(vpp, un, p) return (error); error = VOP_OPEN(vp, fmode, cred, p); + if (error == 0 && vn_canvmio(vp) == TRUE) + error = vfs_object_create(vp, p, cred); if (error) { vput(vp); return (error); } - vp->v_writecount++; *vpp = vp; return (0); @@ -987,6 +1114,14 @@ union_vn_close(vp, fmode, cred, p) return (VOP_CLOSE(vp, fmode, cred, p)); } +#if 0 + +/* + * union_removed_upper: + * + * called with union_node unlocked. XXX + */ + void union_removed_upper(un) struct union_node *un; @@ -999,9 +1134,7 @@ union_removed_upper(un) * union node will have neither uppervp nor lowervp. We remove * the union node from cache, so that it will not be referrenced. */ -#if 0 union_newupper(un, NULLVP); -#endif if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); @@ -1013,28 +1146,8 @@ union_removed_upper(un) un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } - - if (un->un_flags & UN_ULOCK) { - un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp, 0, p); - } } -#if 0 -struct vnode * -union_lowervp(vp) - struct vnode *vp; -{ - struct union_node *un = VTOUNION(vp); - - if ((un->un_lowervp != NULLVP) && - (vp->v_type == un->un_lowervp->v_type)) { - if (vget(un->un_lowervp, 0) == 0) - return (un->un_lowervp); - } - - return (NULLVP); -} #endif /* @@ -1104,13 +1217,12 @@ union_dircache(vp, p) nvp = NULLVP; - if (dircache == 0) { + if (dircache == NULL) { cnt = 0; union_dircache_r(vp, 0, &cnt); cnt++; - dircache = (struct vnode **) - malloc(cnt * sizeof(struct vnode *), - M_TEMP, M_WAITOK); + dircache = malloc(cnt * sizeof(struct vnode *), + M_TEMP, M_WAITOK); vpp = dircache; union_dircache_r(vp, &vpp, &cnt); *vpp = NULLVP; @@ -1126,9 +1238,11 @@ union_dircache(vp, p) if (*vpp == NULLVP) goto out; - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);*/ + UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99))); VREF(*vpp); - error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); + error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); + UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99))); if (error) goto out; @@ -1141,6 +1255,40 @@ union_dircache(vp, p) return (nvp); } +/* + * Guarentee coherency with the VM cache by invalidating any clean VM pages + * associated with this write and updating any dirty VM pages. Since our + * vnode is locked, other processes will not be able to read the pages in + * again until after our write completes. + * + * We also have to be coherent with reads, by flushing any pending dirty + * pages prior to issuing the read. + * + * XXX this is somewhat of a hack at the moment. To support this properly + * we would have to be able to run VOP_READ and VOP_WRITE through the VM + * cache. Then we wouldn't need to worry about coherency. + */ + +void +union_vm_coherency(struct vnode *vp, struct uio *uio, int cleanfls) +{ + vm_object_t object; + vm_pindex_t pstart; + vm_pindex_t pend; + int pgoff; + + if ((object = vp->v_object) == NULL) + return; + + pgoff = uio->uio_offset & PAGE_MASK; + pstart = uio->uio_offset / PAGE_SIZE; + pend = pstart + (uio->uio_resid + pgoff + PAGE_MASK) / PAGE_SIZE; + + vm_object_page_clean(object, pstart, pend, OBJPC_SYNC); + if (cleanfls) + vm_object_page_remove(object, pstart, pend, TRUE); +} + /* * Module glue to remove #ifdef UNION from vfs_syscalls.c */ @@ -1169,6 +1317,8 @@ union_dircheck(struct proc *p, struct vnode **vp, struct file *fp) if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); + if (error == 0 && vn_canvmio(lvp) == TRUE) + error = vfs_object_create(lvp, p, fp->f_cred); if (error) { vput(lvp); return (error); @@ -1201,9 +1351,11 @@ union_modevent(module_t mod, int type, void *data) } return 0; } + static moduledata_t union_mod = { "union_dircheck", union_modevent, NULL }; + DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); diff --git a/sys/miscfs/union/union_vfsops.c b/sys/miscfs/union/union_vfsops.c index af828ac64a8c..1a53f88bcc7b 100644 --- a/sys/miscfs/union/union_vfsops.c +++ b/sys/miscfs/union/union_vfsops.c @@ -85,9 +85,7 @@ union_mount(mp, path, data, ndp, p) int len; u_int size; -#ifdef DEBUG - printf("union_mount(mp = %p)\n", (void *)mp); -#endif + UDEBUG(("union_mount(mp = %p)\n", (void *)mp)); /* * Disable clustered write, otherwise system becomes unstable. @@ -114,24 +112,35 @@ union_mount(mp, path, data, ndp, p) if (error) goto bad; + /* + * Obtain lower vnode. Vnode is stored in mp->mnt_vnodecovered. + * We need to reference it but not lock it. + */ + lowerrootvp = mp->mnt_vnodecovered; VREF(lowerrootvp); +#if 0 /* * Unlock lower node to avoid deadlock. */ if (lowerrootvp->v_op == union_vnodeop_p) VOP_UNLOCK(lowerrootvp, 0, p); +#endif /* - * Find upper node. + * Obtain upper vnode by calling namei() on the path. The + * upperrootvp will be turned referenced but not locked. */ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT, UIO_USERSPACE, args.target, p); error = namei(ndp); + +#if 0 if (lowerrootvp->v_op == union_vnodeop_p) vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p); +#endif if (error) goto bad; @@ -139,8 +148,11 @@ union_mount(mp, path, data, ndp, p) vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; + UDEBUG(("mount_root UPPERVP %p locked = %d\n", upperrootvp, VOP_ISLOCKED(upperrootvp))); + /* * Check multi union mount to avoid `lock myself again' panic. + * Also require that it be a directory. */ if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) { #ifdef DIAGNOSTIC @@ -155,35 +167,43 @@ union_mount(mp, path, data, ndp, p) goto bad; } - um = (struct union_mount *) malloc(sizeof(struct union_mount), - M_UNIONFSMNT, M_WAITOK); /* XXX */ - /* - * Keep a held reference to the target vnodes. - * They are vrele'd in union_unmount. - * - * Depending on the _BELOW flag, the filesystems are - * viewed in a different order. In effect, this is the - * same as providing a mount under option to the mount syscall. + * Allocate our union_mount structure and populate the fields. + * The vnode references are stored in the union_mount as held, + * unlocked references. Depending on the _BELOW flag, the + * filesystems are viewed in a different order. In effect this + * is the same as providing a mount-under option to the mount + * syscall. */ + um = (struct union_mount *) malloc(sizeof(struct union_mount), + M_UNIONFSMNT, M_WAITOK); + + bzero(um, sizeof(struct union_mount)); + um->um_op = args.mntflags & UNMNT_OPMASK; + switch (um->um_op) { case UNMNT_ABOVE: um->um_lowervp = lowerrootvp; um->um_uppervp = upperrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; break; case UNMNT_BELOW: um->um_lowervp = upperrootvp; um->um_uppervp = lowerrootvp; + upperrootvp = NULL; + lowerrootvp = NULL; break; case UNMNT_REPLACE: vrele(lowerrootvp); - lowerrootvp = NULLVP; + lowerrootvp = NULL; um->um_uppervp = upperrootvp; um->um_lowervp = lowerrootvp; + upperrootvp = NULL; break; default: @@ -196,7 +216,7 @@ union_mount(mp, path, data, ndp, p) * supports whiteout operations */ if ((mp->mnt_flag & MNT_RDONLY) == 0) { - error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP); + error = VOP_WHITEOUT(um->um_uppervp, NULL, LOOKUP); if (error) goto bad; } @@ -258,15 +278,19 @@ union_mount(mp, path, data, ndp, p) (void)union_statfs(mp, &mp->mnt_stat, p); -#ifdef DEBUG - printf("union_mount: from %s, on %s\n", - mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); -#endif + UDEBUG(("union_mount: from %s, on %s\n", + mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname)); return (0); bad: - if (um) + if (um) { + if (um->um_uppervp) + vrele(um->um_uppervp); + if (um->um_lowervp) + vrele(um->um_lowervp); + /* XXX other fields */ free(um, M_UNIONFSMNT); + } if (cred) crfree(cred); if (upperrootvp) @@ -291,9 +315,7 @@ union_unmount(mp, mntflags, p) int freeing; int flags = 0; -#ifdef DEBUG - printf("union_unmount(mp = %p)\n", (void *)mp); -#endif + UDEBUG(("union_unmount(mp = %p)\n", (void *)mp)); if (mntflags & MNT_FORCE) flags |= FORCECLOSE; @@ -365,55 +387,25 @@ union_root(mp, vpp) struct mount *mp; struct vnode **vpp; { - struct proc *p = curproc; /* XXX */ struct union_mount *um = MOUNTTOUNIONMOUNT(mp); int error; - int loselock; - int lockadj = 0; - - if (um->um_lowervp && um->um_op != UNMNT_BELOW && - VOP_ISLOCKED(um->um_lowervp)) { - VREF(um->um_lowervp); - VOP_UNLOCK(um->um_lowervp, 0, p); - lockadj = 1; - } /* - * Return locked reference to root. + * Supply an unlocked reference to um_uppervp and to um_lowervp. It + * is possible for um_uppervp to be locked without the associated + * root union_node being locked. We let union_allocvp() deal with + * it. */ + UDEBUG(("union_root UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp))); + VREF(um->um_uppervp); - if ((um->um_op == UNMNT_BELOW) && - VOP_ISLOCKED(um->um_uppervp)) { - loselock = 1; - } else { - vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - loselock = 0; - } if (um->um_lowervp) VREF(um->um_lowervp); - error = union_allocvp(vpp, mp, - (struct vnode *) 0, - (struct vnode *) 0, - (struct componentname *) 0, - um->um_uppervp, - um->um_lowervp, - 1); - if (error) { - if (loselock) - vrele(um->um_uppervp); - else - vput(um->um_uppervp); - if (um->um_lowervp) - vrele(um->um_lowervp); - } else { - if (loselock) - VTOUNION(*vpp)->un_flags &= ~UN_ULOCK; - } - if (lockadj) { - vn_lock(um->um_lowervp, LK_EXCLUSIVE | LK_RETRY, p); - vrele(um->um_lowervp); - } + error = union_allocvp(vpp, mp, NULLVP, NULLVP, NULL, + um->um_uppervp, um->um_lowervp, 1); + UDEBUG(("error %d\n", error)); + UDEBUG(("union_root2 UPPERVP %p locked = %d\n", um->um_uppervp, VOP_ISLOCKED(um->um_uppervp))); return (error); } @@ -429,10 +421,8 @@ union_statfs(mp, sbp, p) struct statfs mstat; int lbsize; -#ifdef DEBUG - printf("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", - (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp); -#endif + UDEBUG(("union_statfs(mp = %p, lvp = %p, uvp = %p)\n", + (void *)mp, (void *)um->um_lowervp, (void *)um->um_uppervp)); bzero(&mstat, sizeof(mstat)); diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c index 145f8ca6f0ad..128e59ebaa21 100644 --- a/sys/miscfs/union/union_vnops.c +++ b/sys/miscfs/union/union_vnops.c @@ -50,13 +50,25 @@ #include #include #include +#include #include -#define FIXUP(un, p) { \ - if (((un)->un_flags & UN_ULOCK) == 0) { \ - union_fixup(un, p); \ - } \ -} +#include +#include + +#include +#include +#include +#include +#include + +int uniondebug = 0; + +#if UDEBUG_ENABLED +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, ""); +#else +SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, ""); +#endif static int union_abortop __P((struct vop_abortop_args *ap)); static int union_access __P((struct vop_access_args *ap)); @@ -64,17 +76,15 @@ static int union_advlock __P((struct vop_advlock_args *ap)); static int union_bmap __P((struct vop_bmap_args *ap)); static int union_close __P((struct vop_close_args *ap)); static int union_create __P((struct vop_create_args *ap)); -static void union_fixup __P((struct union_node *un, struct proc *p)); static int union_fsync __P((struct vop_fsync_args *ap)); static int union_getattr __P((struct vop_getattr_args *ap)); static int union_inactive __P((struct vop_inactive_args *ap)); static int union_ioctl __P((struct vop_ioctl_args *ap)); -static int union_islocked __P((struct vop_islocked_args *ap)); static int union_lease __P((struct vop_lease_args *ap)); static int union_link __P((struct vop_link_args *ap)); static int union_lock __P((struct vop_lock_args *ap)); static int union_lookup __P((struct vop_lookup_args *ap)); -static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvpp, +static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvp, struct vnode **vpp, struct componentname *cnp)); static int union_mkdir __P((struct vop_mkdir_args *ap)); @@ -94,36 +104,89 @@ static int union_rmdir __P((struct vop_rmdir_args *ap)); static int union_poll __P((struct vop_poll_args *ap)); static int union_setattr __P((struct vop_setattr_args *ap)); static int union_strategy __P((struct vop_strategy_args *ap)); +static int union_getpages __P((struct vop_getpages_args *ap)); +static int union_putpages __P((struct vop_putpages_args *ap)); static int union_symlink __P((struct vop_symlink_args *ap)); static int union_unlock __P((struct vop_unlock_args *ap)); static int union_whiteout __P((struct vop_whiteout_args *ap)); static int union_write __P((struct vop_read_args *ap)); -static void -union_fixup(un, p) - struct union_node *un; - struct proc *p; +static __inline +struct vnode * +union_lock_upper(struct union_node *un, struct proc *p) { + struct vnode *uppervp; - vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; + if ((uppervp = un->un_uppervp) != NULL) { + VREF(uppervp); + vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + KASSERT((uppervp == NULL || uppervp->v_usecount > 0), ("uppervp usecount is 0")); + return(uppervp); } +static __inline +void +union_unlock_upper(struct vnode *uppervp, struct proc *p) +{ + vput(uppervp); +} + +static __inline +struct vnode * +union_lock_other(struct union_node *un, struct proc *p) +{ + struct vnode *vp; + + if (un->un_uppervp != NULL) { + vp = union_lock_upper(un, p); + } else if ((vp = un->un_lowervp) != NULL) { + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, p); + } + return(vp); +} + +static __inline +void +union_unlock_other(struct vnode *vp, struct proc *p) +{ + vput(vp); +} + +/* + * union_lookup: + * + * udvp must be exclusively locked on call and will remain + * exclusively locked on return. This is the mount point + * for out filesystem. + * + * dvp Our base directory, locked and referenced. + * The passed dvp will be dereferenced and unlocked on return + * and a new dvp will be returned which is locked and + * referenced in the same variable. + * + * vpp is filled in with the result if no error occured, + * locked and ref'd. + * + * If an error is returned, *vpp is set to NULLVP. If no + * error occurs, *vpp is returned with a reference and an + * exclusive lock. + */ + static int -union_lookup1(udvp, dvpp, vpp, cnp) +union_lookup1(udvp, pdvp, vpp, cnp) struct vnode *udvp; - struct vnode **dvpp; + struct vnode **pdvp; struct vnode **vpp; struct componentname *cnp; { int error; struct proc *p = cnp->cn_proc; + struct vnode *dvp = *pdvp; struct vnode *tdvp; - struct vnode *dvp; struct mount *mp; - dvp = *dvpp; - /* * If stepping up the directory tree, check for going * back across the mount point, in which case do what @@ -139,49 +202,79 @@ union_lookup1(udvp, dvpp, vpp, cnp) * filesystems. */ tdvp = dvp; - *dvpp = dvp = dvp->v_mount->mnt_vnodecovered; - vput(tdvp); + dvp = dvp->v_mount->mnt_vnodecovered; VREF(dvp); + vput(tdvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } } + /* + * Set return dvp to be the upperdvp 'parent directory. + */ + *pdvp = dvp; + + /* + * If the VOP_LOOKUP call generates an error, tdvp is invalid and no + * changes will have been made to dvp, so we are set to return. + */ + error = VOP_LOOKUP(dvp, &tdvp, cnp); - if (error) + if (error) { + UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags)); + *vpp = NULL; return (error); + } /* * The parent directory will have been unlocked, unless lookup - * found the last component. In which case, re-lock the node - * here to allow it to be unlocked again (phew) in union_lookup. + * found the last component or if dvp == tdvp (tdvp must be locked). + * + * We want our dvp to remain locked and ref'd. We also want tdvp + * to remain locked and ref'd. */ - if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN)) - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags)); - dvp = tdvp; + if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0) + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); /* * Lastly check if the current node is a mount point in * which case walk up the mount hierarchy making sure not to * bump into the root of the mount tree (ie. dvp != udvp). + * + * We use dvp as a temporary variable here, it is no longer related + * to the dvp above. However, we have to ensure that both *pdvp and + * tdvp are locked on return. */ - while (dvp != udvp && (dvp->v_type == VDIR) && - (mp = dvp->v_mountedhere)) { + + dvp = tdvp; + while ( + dvp != udvp && + (dvp->v_type == VDIR) && + (mp = dvp->v_mountedhere) + ) { + int relock_pdvp = 0; if (vfs_busy(mp, 0, 0, p)) continue; - error = VFS_ROOT(mp, &tdvp); + if (dvp == *pdvp) + relock_pdvp = 1; + vput(dvp); + dvp = NULL; + error = VFS_ROOT(mp, &dvp); + vfs_unbusy(mp, p); + + if (relock_pdvp) + vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) { - vput(dvp); + *vpp = NULL; return (error); } - - vput(dvp); - dvp = tdvp; } - *vpp = dvp; return (0); } @@ -199,8 +292,8 @@ union_lookup(ap) int uerror, lerror; struct vnode *uppervp, *lowervp; struct vnode *upperdvp, *lowerdvp; - struct vnode *dvp = ap->a_dvp; - struct union_node *dun = VTOUNION(dvp); + struct vnode *dvp = ap->a_dvp; /* starting dir */ + struct union_node *dun = VTOUNION(dvp); /* associated union node */ struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; int lockparent = cnp->cn_flags & LOCKPARENT; @@ -209,44 +302,38 @@ union_lookup(ap) int iswhiteout; struct vattr va; + *ap->a_vpp = NULLVP; /* * Disallow write attemps to the filesystem mounted read-only. */ - if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + if ((cnp->cn_flags & ISLASTCN) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { return (EROFS); - -#ifdef notyet - if (cnp->cn_namelen == 3 && - cnp->cn_nameptr[2] == '.' && - cnp->cn_nameptr[1] == '.' && - cnp->cn_nameptr[0] == '.') { - dvp = *ap->a_vpp = LOWERVP(ap->a_dvp); - if (dvp == NULLVP) - return (ENOENT); - VREF(dvp); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(ap->a_dvp, 0, p); - return (0); } -#endif + /* + * For any lookup's we do, always return with the parent locked + */ cnp->cn_flags |= LOCKPARENT; - upperdvp = dun->un_uppervp; lowerdvp = dun->un_lowervp; uppervp = NULLVP; lowervp = NULLVP; iswhiteout = 0; - if (cnp->cn_flags & ISDOTDOT) { - if (upperdvp != NULL) - VREF(upperdvp); - if (lowerdvp != NULL) - VREF(lowerdvp); - } + uerror = ENOENT; + lerror = ENOENT; + + /* + * Get a private lock on uppervp and a reference, effectively + * taking it out of the union_node's control. + * + * We must lock upperdvp while holding our lock on dvp + * to avoid a deadlock. + */ + upperdvp = union_lock_upper(dun, p); /* * do the lookup in the upper level. @@ -255,62 +342,64 @@ union_lookup(ap) * on and just return that vnode. */ if (upperdvp != NULLVP) { - FIXUP(dun, p); /* - * If we're doing `..' in the underlying filesystem, - * we must drop our lock on the union node before - * going up the tree in the lower file system--if we block - * on the lowervp lock, and that's held by someone else - * coming down the tree and who's waiting for our lock, - * we would be hosed. + * We do not have to worry about the DOTDOT case, we've + * already unlocked dvp. */ - if (cnp->cn_flags & ISDOTDOT) { - /* retain lock on underlying VP: */ - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(dvp, 0, p); - } - uerror = union_lookup1(um->um_uppervp, &upperdvp, - &uppervp, cnp); + UDEBUG(("A %p\n", upperdvp)); + + /* + * Do the lookup. We must supply a locked and referenced + * upperdvp to the function and will get a new locked and + * referenced upperdvp back with the old having been + * dereferenced. + * + * If an error is returned, uppervp will be NULLVP. If no + * error occurs, uppervp will be the locked and referenced + * return vnode or possibly NULL, depending on what is being + * requested. It is possible that the returned uppervp + * will be the same as upperdvp. + */ + uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); + UDEBUG(( + "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n", + uerror, + upperdvp, + upperdvp->v_usecount, + VOP_ISLOCKED(upperdvp), + uppervp, + (uppervp ? uppervp->v_usecount : -99), + (uppervp ? VOP_ISLOCKED(uppervp) : -99) + )); + /* * Disallow write attemps to the filesystem mounted read-only. */ if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && - (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - return (EROFS); - } - - if (cnp->cn_flags & ISDOTDOT) { - if (dun->un_uppervp == upperdvp) { - /* - * We got the underlying bugger back locked... - * now take back the union node lock. Since we - * hold the uppervp lock, we can diddle union - * locking flags at will. :) - */ - dun->un_flags |= UN_ULOCK; - } - /* - * If upperdvp got swapped out, it means we did - * some mount point magic, and we do not have - * dun->un_uppervp locked currently--so we get it - * locked here (don't set the UN_ULOCK flag). - */ - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - } - - /*if (uppervp == upperdvp) - dun->un_flags |= UN_KLOCK;*/ - - if (cnp->cn_consume != 0) { - *ap->a_vpp = uppervp; - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - error = uerror; + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { + error = EROFS; goto out; } + + /* + * Special case. If cn_consume != 0 skip out. The result + * of the lookup is transfered to our return variable. If + * an error occured we have to throw away the results. + */ + + if (cnp->cn_consume != 0) { + if ((error = uerror) == 0) { + *ap->a_vpp = uppervp; + uppervp = NULL; + } + goto out; + } + + /* + * Calculate whiteout, fall through + */ + if (uerror == ENOENT || uerror == EJUSTRETURN) { if (cnp->cn_flags & ISWHITEOUT) { iswhiteout = 1; @@ -321,8 +410,6 @@ union_lookup(ap) iswhiteout = 1; } } - } else { - uerror = ENOENT; } /* @@ -332,13 +419,14 @@ union_lookup(ap) * back from the upper layer and return the lower vnode * instead. */ + if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; - vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + UDEBUG(("B %p\n", lowerdvp)); /* - * Only do a LOOKUP on the bottom node, since + * Force only LOOKUPs on the lower node, since * we won't be making changes to it anyway. */ nameiop = cnp->cn_nameiop; @@ -347,42 +435,42 @@ union_lookup(ap) saved_cred = cnp->cn_cred; cnp->cn_cred = um->um_cred; } + /* * We shouldn't have to worry about locking interactions * between the lower layer and our union layer (w.r.t. * `..' processing) because we don't futz with lowervp * locks in the union-node instantiation code path. + * + * union_lookup1() requires lowervp to be locked on entry, + * and it will be unlocked on return. The ref count will + * not change. On return lowervp doesn't represent anything + * to us so we NULL it out. */ - lerror = union_lookup1(um->um_lowervp, &lowerdvp, - &lowervp, cnp); + VREF(lowerdvp); + vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); + if (lowerdvp == lowervp) + vrele(lowerdvp); + else + vput(lowerdvp); + lowerdvp = NULL; /* lowerdvp invalid after vput */ + if (um->um_op == UNMNT_BELOW) cnp->cn_cred = saved_cred; cnp->cn_nameiop = nameiop; - if (lowervp != lowerdvp) - VOP_UNLOCK(lowerdvp, 0, p); - if (cnp->cn_consume != 0 || lerror == EACCES) { - if (lerror == EACCES) - lowervp = NULLVP; - if (uppervp != NULLVP) { - if (uppervp == upperdvp) - vrele(uppervp); - else - vput(uppervp); - uppervp = NULLVP; + if ((error = lerror) == 0) { + *ap->a_vpp = lowervp; + lowervp = NULL; } - *ap->a_vpp = lowervp; - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - error = lerror; goto out; } } else { - lerror = ENOENT; + UDEBUG(("C %p\n", lowerdvp)); if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { - lowervp = LOWERVP(dun->un_pvp); - if (lowervp != NULLVP) { + if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) { VREF(lowervp); vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); lerror = 0; @@ -390,35 +478,27 @@ union_lookup(ap) } } - if (!lockparent) - cnp->cn_flags &= ~LOCKPARENT; - /* - * at this point, we have uerror and lerror indicating - * possible errors with the lookups in the upper and lower - * layers. additionally, uppervp and lowervp are (locked) - * references to existing vnodes in the upper and lower layers. + * Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp. * - * there are now three cases to consider. - * 1. if both layers returned an error, then return whatever - * error the upper layer generated. + * 1. If both layers returned an error, select the upper layer. * - * 2. if the top layer failed and the bottom layer succeeded - * then two subcases occur. - * a. the bottom vnode is not a directory, in which - * case just return a new union vnode referencing - * an empty top layer and the existing bottom layer. - * b. the bottom vnode is a directory, in which case - * create a new directory in the top-level and - * continue as in case 3. + * 2. If the upper layer faile and the bottom layer succeeded, + * two subcases occur: * - * 3. if the top layer succeeded then return a new union + * a. The bottom vnode is not a directory, in which case + * just return a new union vnode referencing an + * empty top layer and the existing bottom layer. + * + * b. The button vnode is a directory, in which case + * create a new directory in the top layer and + * and fall through to case 3. + * + * 3. If the top layer succeeded then return a new union * vnode referencing whatever the new top layer and * whatever the bottom layer returned. */ - *ap->a_vpp = NULLVP; - /* case 1. */ if ((uerror != 0) && (lerror != 0)) { error = uerror; @@ -428,59 +508,126 @@ union_lookup(ap) /* case 2. */ if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ - dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(upperdvp, 0, p); + KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL")); + /* + * oops, uppervp has a problem, we may have to shadow. + */ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); - vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p); - dun->un_flags |= UN_ULOCK; - if (uerror) { - if (lowervp != NULLVP) { - vput(lowervp); - lowervp = NULLVP; - } error = uerror; goto out; } } } - if (lowervp != NULLVP) + /* + * Must call union_allocvp with both the upper and lower vnodes + * referenced and the upper vnode locked. ap->a_vpp is returned + * referenced and locked. lowervp, uppervp, and upperdvp are + * absorbed by union_allocvp() whether it succeeds or fails. + * + * upperdvp is the parent directory of uppervp which may be + * different, depending on the path, from dvp->un_uppervp. That's + * why it is a separate argument. Note that it must be unlocked. + * + * dvp must be locked on entry to the call and will be locked on + * return. + */ + + if (uppervp && uppervp != upperdvp) + VOP_UNLOCK(uppervp, 0, p); + if (lowervp) VOP_UNLOCK(lowervp, 0, p); + if (upperdvp) + VOP_UNLOCK(upperdvp, 0, p); error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); - if (error) { - if (uppervp != NULLVP) - vput(uppervp); - if (lowervp != NULLVP) - vrele(lowervp); - } else { - if (*ap->a_vpp != dvp) - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp, 0, p); -#ifdef DIAGNOSTIC - if (cnp->cn_namelen == 1 && - cnp->cn_nameptr[0] == '.' && - *ap->a_vpp != dvp) { - panic("union_lookup returning . (%p) not same as startdir (%p)", - ap->a_vpp, dvp); - } -#endif - } + UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? ((*ap->a_vpp)->v_usecount) : -99)); + + uppervp = NULL; + upperdvp = NULL; + lowervp = NULL; + + /* + * Termination Code + * + * - put away any extra junk laying around. Note that lowervp + * (if not NULL) will never be the same as *ap->a_vp and + * neither will uppervp, because when we set that state we + * NULL-out lowervp or uppervp. On the otherhand, upperdvp + * may match uppervp or *ap->a_vpp. + * + * - relock/unlock dvp if appropriate. + */ out: - if (cnp->cn_flags & ISDOTDOT) { - if (upperdvp != NULL) - vrele(upperdvp); - if (lowerdvp != NULL) - vrele(lowerdvp); - } + if (upperdvp) { + if (upperdvp == uppervp || upperdvp == *ap->a_vpp) + vrele(upperdvp); + else + vput(upperdvp); + } + + if (uppervp) + vput(uppervp); + + if (lowervp) + vput(lowervp); + + /* + * Restore LOCKPARENT state + */ + + if (!lockparent) + cnp->cn_flags &= ~LOCKPARENT; + + UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp, + ((*ap->a_vpp) ? (*ap->a_vpp)->v_usecount : -99), + lowervp, uppervp)); + + /* + * dvp lock state, determine whether to relock dvp. dvp is expected + * to be locked on return if: + * + * - there was an error (except not EJUSTRETURN), or + * - we hit the last component and lockparent is true + * + * dvp_is_locked is the current state of the dvp lock, not counting + * the possibility that *ap->a_vpp == dvp (in which case it is locked + * anyway). Note that *ap->a_vpp == dvp only if no error occured. + */ + + if (*ap->a_vpp != dvp) { + if ((error == 0 || error == EJUSTRETURN) && + (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) { + VOP_UNLOCK(dvp, 0, p); + } + } + + /* + * Diagnostics + */ + +#ifdef DIAGNOSTIC + if (cnp->cn_namelen == 1 && + cnp->cn_nameptr[0] == '.' && + *ap->a_vpp != dvp) { + panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); + } +#endif return (error); } +/* + * union_create: + * + * a_dvp is locked on entry and remains locked on return. a_vpp is returned + * locked if no error occurs, otherwise it is garbage. + */ + static int union_create(ap) struct vop_create_args /* { @@ -491,36 +638,27 @@ union_create(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, p)) != NULL) { struct vnode *vp; struct mount *mp; - int error; - FIXUP(dun, p); - - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); + if (error == 0) { + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, + cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); } - - mp = ap->a_dvp->v_mount; - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, - NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - return (error); + union_unlock_upper(dvp, p); } - - return (EROFS); + return (error); } static int @@ -533,15 +671,23 @@ union_whiteout(ap) { struct union_node *un = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + struct vnode *uppervp; + int error = EOPNOTSUPP; - if (un->un_uppervp == NULLVP) - return (EOPNOTSUPP); - - FIXUP(un, p); - return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags)); + if ((uppervp = union_lock_upper(un, cnp->cn_proc)) != NULLVP) { + error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags); + union_unlock_upper(uppervp, cnp->cn_proc); + } + return(error); } +/* + * union_mknod: + * + * a_dvp is locked on entry and should remain locked on return. + * a_vpp is garbagre whether an error occurs or not. + */ + static int union_mknod(ap) struct vop_mknod_args /* { @@ -552,42 +698,28 @@ union_mknod(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, cnp->cn_proc)) != NULL) { struct vnode *vp; - struct mount *mp; - int error; - - FIXUP(dun, p); - - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); - } - - if (vp != NULLVP) { - mp = ap->a_dvp->v_mount; - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, - cnp, vp, NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - } else { - dun->un_flags |= UN_ULOCK; - } - return (error); + /* vp is garbage whether an error occurs or not */ + union_unlock_upper(dvp, cnp->cn_proc); } - - return (EROFS); + return (error); } +/* + * union_open: + * + * run open VOP. When opening the underlying vnode we have to mimic + * vn_open. What we *really* need to do to avoid screwups if the + * open semantics change is to call vn_open(). For example, ufs blows + * up if you open a file but do not vmio it prior to writing. + */ + static int union_open(ap) struct vop_open_args /* { @@ -603,13 +735,18 @@ union_open(ap) int mode = ap->a_mode; struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; - int error; + int error = 0; + int tvpisupper = 1; /* * If there is an existing upper vp then simply open that. + * The upper vp takes precedence over the lower vp. When opening + * a lower vp for writing copy it to the uppervp and then open the + * uppervp. + * + * At the end of this section tvp will be left locked. */ - tvp = un->un_uppervp; - if (tvp == NULLVP) { + if ((tvp = union_lock_upper(un, p)) == NULLVP) { /* * If the lower vnode is being opened for writing, then * copy the file contents to the upper vnode and open that, @@ -617,30 +754,50 @@ union_open(ap) */ tvp = un->un_lowervp; if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { - error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p); - if (error == 0) - error = VOP_OPEN(un->un_uppervp, mode, cred, p); - return (error); + int docopy = !(mode & O_TRUNC); + error = union_copyup(un, docopy, cred, p); + tvp = union_lock_upper(un, p); + } else { + un->un_openl++; + VREF(tvp); + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); + tvpisupper = 0; } - - /* - * Just open the lower vnode - */ - un->un_openl++; - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_OPEN(tvp, mode, cred, p); - VOP_UNLOCK(tvp, 0, p); - - return (error); } - FIXUP(un, p); + /* + * We are holding the correct vnode, open it + */ - error = VOP_OPEN(tvp, mode, cred, p); + if (error == 0) + error = VOP_OPEN(tvp, mode, cred, p); + /* + * Absolutely necessary or UFS will blowup + */ + if (error == 0 && vn_canvmio(tvp) == TRUE) { + error = vfs_object_create(tvp, p, cred); + } + + /* + * Release any locks held + */ + if (tvpisupper) { + if (tvp) + union_unlock_upper(tvp, p); + } else { + vput(tvp); + } return (error); } +/* + * union_close: + * + * It is unclear whether a_vp is passed locked or unlocked. Whatever + * the case we do not change it. + */ + static int union_close(ap) struct vop_close_args /* { @@ -661,7 +818,6 @@ union_close(ap) --un->un_openl; vp = un->un_lowervp; } - ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_close), ap)); } @@ -688,12 +844,12 @@ union_access(ap) struct proc *p = ap->a_p; int error = EACCES; struct vnode *vp; - struct vnode *savedvp; /* * Disallow write attempts on filesystems mounted read-only. */ - if (ap->a_mode & VWRITE && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { + if ((ap->a_mode & VWRITE) && + (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (ap->a_vp->v_type) { case VREG: case VDIR: @@ -703,19 +859,30 @@ union_access(ap) break; } } - if ((vp = un->un_uppervp) != NULLVP) { - FIXUP(un, p); + + if ((vp = union_lock_upper(un, p)) != NULLVP) { ap->a_vp = vp; - return (VCALL(vp, VOFFSET(vop_access), ap)); + error = VCALL(vp, VOFFSET(vop_access), ap); + union_unlock_upper(vp, p); + return(error); } if ((vp = un->un_lowervp) != NULLVP) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - savedvp = ap->a_vp; ap->a_vp = vp; + + /* + * Remove VWRITE from a_mode if our mount point is RW, because + * we want to allow writes and lowervp may be read-only. + */ + if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0) + ap->a_mode &= ~VWRITE; + error = VCALL(vp, VOFFSET(vop_access), ap); if (error == 0) { - struct union_mount *um = MOUNTTOUNIONMOUNT(savedvp->v_mount); + struct union_mount *um; + + um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount); if (um->um_op == UNMNT_BELOW) { ap->a_cred = um->um_cred; @@ -723,17 +890,26 @@ union_access(ap) } } VOP_UNLOCK(vp, 0, p); - if (error) - return (error); } - - return (error); + return(error); } /* * We handle getattr only to change the fsid and * track object sizes + * + * It's not clear whether VOP_GETATTR is to be + * called with the vnode locked or not. stat() calls + * it with (vp) locked, and fstat calls it with + * (vp) unlocked. + * + * Because of this we cannot use our normal locking functions + * if we do not intend to lock the main a_vp node. At the moment + * we are running without any specific locking at all, but beware + * to any programmer that care must be taken if locking is added + * to this function. */ + static int union_getattr(ap) struct vop_getattr_args /* { @@ -745,12 +921,10 @@ union_getattr(ap) { int error; struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *vp = un->un_uppervp; - struct proc *p = ap->a_p; + struct vnode *vp; struct vattr *vap; struct vattr va; - /* * Some programs walk the filesystem hierarchy by counting * links to directories to avoid stat'ing all the time. @@ -762,22 +936,11 @@ union_getattr(ap) vap = ap->a_vap; - vp = un->un_uppervp; - if (vp != NULLVP) { - /* - * It's not clear whether VOP_GETATTR is to be - * called with the vnode locked or not. stat() calls - * it with (vp) locked, and fstat calls it with - * (vp) unlocked. - * In the mean time, compensate here by checking - * the union_node's lock flag. - */ - if (un->un_flags & UN_LOCKED) - FIXUP(un, p); - + if ((vp = un->un_uppervp) != NULLVP) { error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); + /* XXX isn't this dangerouso without a lock? */ union_newsize(ap->a_vp, vap->va_size, VNOVAL); } @@ -794,12 +957,12 @@ union_getattr(ap) error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); + /* XXX isn't this dangerous without a lock? */ union_newsize(ap->a_vp, VNOVAL, vap->va_size); } if ((vap != ap->a_vap) && (vap->va_type == VDIR)) ap->a_vap->va_nlink += vap->va_nlink; - return (0); } @@ -815,27 +978,28 @@ union_setattr(ap) struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; struct vattr *vap = ap->a_vap; + struct vnode *uppervp; int error; /* * Disallow write attempts on filesystems mounted read-only. */ if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && - (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || - vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || - vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)) + (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || + vap->va_mode != (mode_t)VNOVAL)) { return (EROFS); + } /* * Handle case of truncating lower object to zero size, * by creating a zero length upper object. This is to * handle the case of open with O_TRUNC and O_CREAT. */ - if ((un->un_uppervp == NULLVP) && - /* assert(un->un_lowervp != NULLVP) */ - (un->un_lowervp->v_type == VREG)) { + if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) { error = union_copyup(un, (ap->a_vap->va_size != 0), - ap->a_cred, ap->a_p); + ap->a_cred, ap->a_p); if (error) return (error); } @@ -844,19 +1008,45 @@ union_setattr(ap) * Try to set attributes in upper layer, * otherwise return read-only filesystem error. */ - if (un->un_uppervp != NULLVP) { - FIXUP(un, p); + error = EROFS; + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred, ap->a_p); if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); - } else { - error = EROFS; + union_unlock_upper(uppervp, p); } - return (error); } +/* + * union_getpages: + */ + +static int +union_getpages(struct vop_getpages_args *ap) +{ + int r; + + r = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, + ap->a_count, ap->a_reqpage); + return(r); +} + +/* + * union_putpages: + */ + +static int +union_putpages(struct vop_putpages_args *ap) +{ + int r; + + r = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_sync, ap->a_rtvals); + return(r); +} + static int union_read(ap) struct vop_read_args /* { @@ -866,18 +1056,19 @@ union_read(ap) struct ucred *a_cred; } */ *ap; { - int error; + struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct vnode *uvp; + int error; - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); - error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); - if (dolock) - VOP_UNLOCK(vp, 0, p); + uvp = union_lock_other(un, p); + KASSERT(uvp != NULL, ("union_read: backing vnode missing!")); + + if (ap->a_vp->v_flag & VOBJBUF) + union_vm_coherency(ap->a_vp, ap->a_uio, 0); + + error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred); + union_unlock_other(uvp, p); /* * XXX @@ -889,7 +1080,7 @@ union_read(ap) struct union_node *un = VTOUNION(ap->a_vp); off_t cur = ap->a_uio->uio_offset; - if (vp == un->un_uppervp) { + if (uvp == un->un_uppervp) { if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } else { @@ -897,7 +1088,6 @@ union_read(ap) union_newsize(ap->a_vp, VNOVAL, cur); } } - return (error); } @@ -910,17 +1100,36 @@ union_write(ap) struct ucred *a_cred; } */ *ap; { - int error; - struct vnode *vp; struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; + struct vnode *uppervp; + int error; - vp = UPPERVP(ap->a_vp); - if (vp == NULLVP) + if ((uppervp = union_lock_upper(un, p)) == NULLVP) panic("union: missing upper layer in write"); - FIXUP(un, p); - error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); + /* + * Since our VM pages are associated with our vnode rather then + * the real vnode, and since we do not run our reads and writes + * through our own VM cache, we have a VM/VFS coherency problem. + * We solve them by invalidating or flushing the associated VM + * pages prior to allowing a normal read or write to occur. + * + * VM-backed writes (UIO_NOCOPY) have to be converted to normal + * writes because we are not cache-coherent. Normal writes need + * to be made coherent with our VM-backing store, which we do by + * first flushing any dirty VM pages associated with the write + * range, and then destroying any clean VM pages associated with + * the write range. + */ + + if (ap->a_uio->uio_segflg == UIO_NOCOPY) { + ap->a_uio->uio_segflg = UIO_SYSSPACE; + } else if (ap->a_vp->v_flag & VOBJBUF) { + union_vm_coherency(ap->a_vp, ap->a_uio, 1); + } + + error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred); /* * the size of the underlying object may be changed by the @@ -932,7 +1141,7 @@ union_write(ap) if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } - + union_unlock_upper(uppervp, p); return (error); } @@ -945,7 +1154,7 @@ union_lease(ap) int a_flag; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_lease), ap)); @@ -962,7 +1171,7 @@ union_ioctl(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); @@ -977,7 +1186,7 @@ union_poll(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_poll), ap)); @@ -1010,7 +1219,7 @@ union_mmap(ap) struct proc *a_p; } */ *ap; { - register struct vnode *ovp = OTHERVP(ap->a_vp); + struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_mmap), ap)); @@ -1027,35 +1236,24 @@ union_fsync(ap) { int error = 0; struct proc *p = ap->a_p; - struct vnode *targetvp = OTHERVP(ap->a_vp); - struct union_node *un; - - if (targetvp != NULLVP) { - int dolock = (targetvp == LOWERVP(ap->a_vp)); - - un = VTOUNION(ap->a_vp); - if (dolock) - vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); - else { - un = VTOUNION(ap->a_vp); - if ((un->un_flags & UN_ULOCK) == 0 && - targetvp->v_data != NULL && - ((struct lock *)targetvp->v_data)->lk_lockholder - == curproc->p_pid && - VOP_ISLOCKED(targetvp) != 0) - return 0; /* XXX */ - - FIXUP(un, p); - } + struct vnode *targetvp; + struct union_node *un = VTOUNION(ap->a_vp); + if ((targetvp = union_lock_other(un, p)) != NULLVP) { error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); - if (dolock) - VOP_UNLOCK(targetvp, 0, p); + union_unlock_other(targetvp, p); } return (error); } +/* + * union_remove: + * + * Remove the specified cnp. The dvp and vp are passed to us locked + * and must remain locked on return. + */ + static int union_remove(ap) struct vop_remove_args /* { @@ -1068,42 +1266,40 @@ union_remove(ap) struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *uppervp; + struct vnode *upperdvp; int error; - if (dun->un_uppervp == NULLVP) + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) panic("union remove: null upper vnode"); - if (un->un_uppervp != NULLVP) { - struct vnode *dvp = dun->un_uppervp; - struct vnode *vp = un->un_uppervp; - - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - FIXUP(un, p); - un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_vp, 0, p); - + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_REMOVE(dvp, vp, cnp); + error = VOP_REMOVE(upperdvp, uppervp, cnp); #if 0 /* XXX */ if (!error) union_removed_upper(un); #endif - dun->un_flags |= UN_ULOCK; - un->un_flags |= UN_ULOCK; + union_unlock_upper(uppervp, p); } else { - FIXUP(dun, p); error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), - dun->un_uppervp, ap->a_cnp, un->un_path); + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + upperdvp, ap->a_cnp, un->un_path); } - + union_unlock_upper(upperdvp, p); return (error); } +/* + * union_link: + * + * tdvp will be locked on entry, vp will not be locked on entry. + * tdvp should remain locked on return and vp should remain unlocked + * on return. + */ + static int union_link(ap) struct vop_link_args /* { @@ -1119,43 +1315,56 @@ union_link(ap) struct vnode *tdvp; int error = 0; - if (ap->a_tdvp->v_op != ap->a_vp->v_op) { vp = ap->a_vp; } else { struct union_node *tun = VTOUNION(ap->a_vp); + if (tun->un_uppervp == NULLVP) { vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); +#if 0 if (dun->un_uppervp == tun->un_dirvp) { - dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(dun->un_uppervp, 0, p); + if (dun->un_flags & UN_ULOCK) { + dun->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(dun->un_uppervp, 0, p); + } } +#endif error = union_copyup(tun, 1, cnp->cn_cred, p); +#if 0 if (dun->un_uppervp == tun->un_dirvp) { vn_lock(dun->un_uppervp, - LK_EXCLUSIVE | LK_RETRY, p); + LK_EXCLUSIVE | LK_RETRY, p); dun->un_flags |= UN_ULOCK; } +#endif VOP_UNLOCK(ap->a_vp, 0, p); } vp = tun->un_uppervp; } - tdvp = dun->un_uppervp; - if (tdvp == NULLVP) - error = EROFS; - if (error) return (error); - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_tdvp, 0, p); + /* + * Make sure upper is locked, then unlock the union directory we were + * called with to avoid a deadlock while we are calling VOP_LINK on + * the upper (with tdvp locked and vp not locked). Our ap->a_tdvp + * is expected to be locked on return. + */ - error = VOP_LINK(tdvp, vp, cnp); + if ((tdvp = union_lock_upper(dun, p)) == NULLVP) + return (EROFS); - dun->un_flags |= UN_ULOCK; + VOP_UNLOCK(ap->a_tdvp, 0, p); /* unlock calling node */ + error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */ + /* + * We have to unlock tdvp prior to relocking our calling node in + * order to avoid a deadlock. + */ + union_unlock_upper(tdvp, p); + vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } @@ -1171,12 +1380,16 @@ union_rename(ap) } */ *ap; { int error; - struct vnode *fdvp = ap->a_fdvp; struct vnode *fvp = ap->a_fvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *tvp = ap->a_tvp; + /* + * Figure out what fdvp to pass to our upper or lower vnode. If we + * replace the fdvp, release the original one and ref the new one. + */ + if (fdvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fdvp); if (un->un_uppervp == NULLVP) { @@ -1189,30 +1402,77 @@ union_rename(ap) error = EXDEV; goto bad; } - fdvp = un->un_uppervp; VREF(fdvp); vrele(ap->a_fdvp); } + /* + * Figure out what fvp to pass to our upper or lower vnode. If we + * replace the fvp, release the original one and ref the new one. + */ + if (fvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fvp); +#if 0 + struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount); +#endif + if (un->un_uppervp == NULLVP) { - /* XXX: should do a copyup */ - error = EXDEV; - goto bad; + switch(fvp->v_type) { + case VREG: + vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_proc); + VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; + case VDIR: + /* + * XXX not yet. + * + * There is only one way to rename a directory + * based in the lowervp, and that is to copy + * the entire directory hierarchy. Otherwise + * it would not last across a reboot. + */ +#if 0 + vrele(fvp); + fvp = NULL; + vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_proc); + error = union_mkshadow(um, fdvp, + ap->a_fcnp, &un->un_uppervp); + VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_proc); + if (un->un_uppervp) + VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_proc); + if (error) + goto bad; + break; +#endif + default: + error = EXDEV; + goto bad; + } } if (un->un_lowervp != NULLVP) ap->a_fcnp->cn_flags |= DOWHITEOUT; - fvp = un->un_uppervp; VREF(fvp); vrele(ap->a_fvp); } + /* + * Figure out what tdvp (destination directory) to pass to the + * lower level. If we replace it with uppervp, we need to vput the + * old one. The exclusive lock is transfered to what we will pass + * down in the VOP_RENAME and we replace uppervp with a simple + * reference. + */ + if (tdvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tdvp); + if (un->un_uppervp == NULLVP) { /* * this should never happen in normal @@ -1224,32 +1484,52 @@ union_rename(ap) goto bad; } - tdvp = un->un_uppervp; - VREF(tdvp); - un->un_flags |= UN_KLOCK; + /* + * new tdvp is a lock and reference on uppervp, put away + * the old tdvp. + */ + tdvp = union_lock_upper(un, ap->a_tcnp->cn_proc); vput(ap->a_tdvp); } + /* + * Figure out what tvp (destination file) to pass to the + * lower level. + * + * If the uppervp file does not exist put away the (wrong) + * file and change tvp to NULL. + */ + if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tvp); - tvp = un->un_uppervp; - if (tvp != NULLVP) { - VREF(tvp); - un->un_flags |= UN_KLOCK; - } + tvp = union_lock_upper(un, ap->a_tcnp->cn_proc); vput(ap->a_tvp); + /* note: tvp may be NULL */ } + /* + * VOP_RENAME releases/vputs prior to returning, so we have no + * cleanup to do. + */ + return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); + /* + * Error. We still have to release / vput the various elements. + */ + bad: vrele(fdvp); - vrele(fvp); + if (fvp) + vrele(fvp); vput(tdvp); - if (tvp != NULLVP) - vput(tvp); - + if (tvp != NULLVP) { + if (tvp != tdvp) + vput(tvp); + else + vrele(tvp); + } return (error); } @@ -1263,34 +1543,26 @@ union_mkdir(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((upperdvp = union_lock_upper(dun, p)) != NULLVP) { struct vnode *vp; - int error; - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap); - if (error) { - dun->un_flags |= UN_ULOCK; - return (error); + error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap); + union_unlock_upper(upperdvp, p); + + if (error == 0) { + VOP_UNLOCK(vp, 0, p); + UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vp->v_usecount)); + error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, + ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); + UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount)); } - - VOP_UNLOCK(dvp, 0, p); - error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp, - NULLVP, cnp, vp, NULLVP, 1); - if (error) - vput(vp); - vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); - - return (error); } - - return (EROFS); + return (error); } static int @@ -1305,42 +1577,34 @@ union_rmdir(ap) struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *upperdvp; + struct vnode *uppervp; int error; - if (dun->un_uppervp == NULLVP) + if ((upperdvp = union_lock_upper(dun, p)) == NULLVP) panic("union rmdir: null upper vnode"); - if (un->un_uppervp != NULLVP) { - struct vnode *dvp = dun->un_uppervp; - struct vnode *vp = un->un_uppervp; - - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - FIXUP(un, p); - un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_vp, 0, p); - + if ((uppervp = union_lock_upper(un, p)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_RMDIR(dvp, vp, ap->a_cnp); -#if 0 - /* XXX */ - if (!error) - union_removed_upper(un); -#endif - dun->un_flags |= UN_ULOCK; - un->un_flags |= UN_ULOCK; + error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp); + union_unlock_upper(uppervp, p); } else { - FIXUP(dun, p); error = union_mkwhiteout( - MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), - dun->un_uppervp, ap->a_cnp, un->un_path); + MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), + dun->un_uppervp, ap->a_cnp, un->un_path); } - + union_unlock_upper(upperdvp, p); return (error); } +/* + * union_symlink: + * + * dvp is locked on entry and remains locked on return. a_vpp is garbage + * (unused). + */ + static int union_symlink(ap) struct vop_symlink_args /* { @@ -1352,24 +1616,20 @@ union_symlink(ap) } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); - struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + struct vnode *dvp; + int error = EROFS; - if (dvp != NULLVP) { + if ((dvp = union_lock_upper(dun, p)) != NULLVP) { struct vnode *vp; - int error; - FIXUP(dun, p); - dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target); - dun->un_flags |= UN_ULOCK; + /* vp is garbage whether an error occurs or not */ *ap->a_vpp = NULLVP; - return (error); + union_unlock_upper(dvp, p); } - - return (EROFS); + return (error); } /* @@ -1391,15 +1651,16 @@ union_readdir(ap) } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); - struct vnode *uvp = un->un_uppervp; struct proc *p = ap->a_uio->uio_procp; + struct vnode *uvp; + int error = 0; - if (uvp == NULLVP) - return (0); - - FIXUP(un, p); - ap->a_vp = uvp; - return (VCALL(uvp, VOFFSET(vop_readdir), ap)); + if ((uvp = union_lock_upper(un, p)) != NULLVP) { + ap->a_vp = uvp; + error = VCALL(uvp, VOFFSET(vop_readdir), ap); + union_unlock_upper(uvp, p); + } + return(error); } static int @@ -1411,23 +1672,28 @@ union_readlink(ap) } */ *ap; { int error; + struct union_node *un = VTOUNION(ap->a_vp); struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_readlink: backing vnode missing!")); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_readlink), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + union_unlock_other(vp, p); return (error); } +/* + * union_abortop: + * + * dvp is locked on entry and left locked on return + * + */ + static int union_abortop(ap) struct vop_abortop_args /* { @@ -1435,28 +1701,35 @@ union_abortop(ap) struct componentname *a_cnp; } */ *ap; { - int error; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; - struct vnode *vp = OTHERVP(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_dvp); - int islocked = un->un_flags & UN_LOCKED; - int dolock = (vp == LOWERVP(ap->a_dvp)); + int islocked = VOP_ISLOCKED(ap->a_dvp); + struct vnode *vp; + int error; if (islocked) { - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_dvp), p); + vp = union_lock_other(un, p); + } else { + vp = OTHERVP(ap->a_dvp); } + KASSERT(vp != NULL, ("union_abortop: backing vnode missing!")); + ap->a_dvp = vp; error = VCALL(vp, VOFFSET(vop_abortop), ap); - if (islocked && dolock) - VOP_UNLOCK(vp, 0, p); + + if (islocked) + union_unlock_other(vp, p); return (error); } +/* + * union_inactive: + * + * Called with the vnode locked. We are expected to unlock the vnode. + */ + static int union_inactive(ap) struct vop_inactive_args /* { @@ -1485,10 +1758,17 @@ union_inactive(ap) if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); - free(un->un_dircache, M_TEMP); + free (un->un_dircache, M_TEMP); un->un_dircache = 0; } +#if 0 + if ((un->un_flags & UN_ULOCK) && un->un_uppervp) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, 0, p); + } +#endif + VOP_UNLOCK(vp, 0, p); if ((un->un_flags & UN_CACHED) == 0) @@ -1503,7 +1783,6 @@ union_reclaim(ap) struct vnode *a_vp; } */ *ap; { - union_freevp(ap->a_vp); return (0); @@ -1513,75 +1792,47 @@ static int union_lock(ap) struct vop_lock_args *ap; { +#if 0 struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; int flags = ap->a_flags; struct union_node *un; +#endif int error; - vop_nolock(ap); - /* - * Need to do real lockmgr-style locking here. - * in the mean time, draining won't work quite right, - * which could lead to a few race conditions. - * the following test was here, but is not quite right, we - * still need to take the lock: - if ((flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - */ - flags &= ~LK_INTERLOCK; - -start: + error = vop_stdlock(ap); +#if 0 un = VTOUNION(vp); - if (un->un_uppervp != NULLVP) { - if (((un->un_flags & UN_ULOCK) == 0) && - (vp->v_usecount != 0)) { - error = vn_lock(un->un_uppervp, flags, p); - if (error) - return (error); - un->un_flags |= UN_ULOCK; + if (error == 0) { + /* + * Lock the upper if it exists and this is an exclusive lock + * request. + */ + if (un->un_uppervp != NULLVP && + (flags & LK_TYPE_MASK) == LK_EXCLUSIVE) { + if ((un->un_flags & UN_ULOCK) == 0 && vp->v_usecount) { + error = vn_lock(un->un_uppervp, flags, p); + if (error) { + struct vop_unlock_args uap = { 0 }; + uap.a_vp = ap->a_vp; + uap.a_flags = ap->a_flags; + uap.a_p = ap->a_p; + vop_stdunlock(&uap); + return (error); + } + un->un_flags |= UN_ULOCK; + } } -#ifdef DIAGNOSTIC - if (un->un_flags & UN_KLOCK) { - vprint("dangling upper lock", vp); - panic("union: dangling upper lock"); - } -#endif } - - if (un->un_flags & UN_LOCKED) { -#ifdef DIAGNOSTIC - if (curproc && un->un_pid == curproc->p_pid && - un->un_pid > -1 && curproc->p_pid > -1) - panic("union: locking against myself"); #endif - un->un_flags |= UN_WANT; - tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0); - goto start; - } - -#ifdef DIAGNOSTIC - if (curproc) - un->un_pid = curproc->p_pid; - else - un->un_pid = -1; -#endif - - un->un_flags |= UN_LOCKED; - return (0); + return (error); } /* - * When operations want to vput() a union node yet retain a lock on - * the upper vnode (say, to do some further operations like link(), - * mkdir(), ...), they set UN_KLOCK on the union node, then call - * vput() which calls VOP_UNLOCK() and comes here. union_unlock() - * unlocks the union node (leaving the upper vnode alone), clears the - * KLOCK flag, and then returns to vput(). The caller then does whatever - * is left to do with the upper vnode, and ensures that it gets unlocked. + * union_unlock: * - * If UN_KLOCK isn't set, then the upper vnode is unlocked here. + * Unlock our union node. This also unlocks uppervp. */ static int union_unlock(ap) @@ -1592,36 +1843,38 @@ union_unlock(ap) } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); - struct proc *p = ap->a_p; + int error; -#ifdef DIAGNOSTIC - if ((un->un_flags & UN_LOCKED) == 0) - panic("union: unlock unlocked node"); - if (curproc && un->un_pid != curproc->p_pid && - curproc->p_pid > -1 && un->un_pid > -1) - panic("union: unlocking other process's union node"); -#endif + KASSERT((un->un_uppervp == NULL || un->un_uppervp->v_usecount > 0), ("uppervp usecount is 0")); - un->un_flags &= ~UN_LOCKED; + error = vop_stdunlock(ap); +#if 0 - if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK) - VOP_UNLOCK(un->un_uppervp, 0, p); + /* + * If no exclusive locks remain and we are holding an uppervp lock, + * remove the uppervp lock. + */ - un->un_flags &= ~(UN_ULOCK|UN_KLOCK); - - if (un->un_flags & UN_WANT) { - un->un_flags &= ~UN_WANT; - wakeup((caddr_t) &un->un_flags); + if ((un->un_flags & UN_ULOCK) && + lockstatus(&un->un_lock) != LK_EXCLUSIVE) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, LK_EXCLUSIVE, p); } - -#ifdef DIAGNOSTIC - un->un_pid = 0; #endif - vop_nounlock(ap); - - return (0); + return(error); } +/* + * union_bmap: + * + * There isn't much we can do. We cannot push through to the real vnode + * to get to the underlying device because this will bypass data + * cached by the real vnode. + * + * For some reason we cannot return the 'real' vnode either, it seems + * to blow up memory maps. + */ + static int union_bmap(ap) struct vop_bmap_args /* { @@ -1633,21 +1886,7 @@ union_bmap(ap) int *a_runb; } */ *ap; { - int error; - struct proc *p = curproc; /* XXX */ - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); - - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); - ap->a_vp = vp; - error = VCALL(vp, VOFFSET(vop_bmap), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); - - return (error); + return(EOPNOTSUPP); } static int @@ -1668,16 +1907,6 @@ union_print(ap) return (0); } -static int -union_islocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0); -} - static int union_pathconf(ap) struct vop_pathconf_args /* { @@ -1688,17 +1917,15 @@ union_pathconf(ap) { int error; struct proc *p = curproc; /* XXX */ - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *vp; + + vp = union_lock_other(un, p); + KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!")); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_pathconf), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + union_unlock_other(vp, p); return (error); } @@ -1722,6 +1949,8 @@ union_advlock(ap) /* * XXX - vop_strategy must be hand coded because it has no + * YYY - and it is not coherent with anything + * * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ @@ -1742,7 +1971,6 @@ union_strategy(ap) (othervp == LOWERVP(bp->b_vp))) panic("union_strategy: writing to lowervp"); #endif - return (VOP_STRATEGY(othervp, bp)); } @@ -1759,10 +1987,12 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_close_desc, (vop_t *) union_close }, { &vop_create_desc, (vop_t *) union_create }, { &vop_fsync_desc, (vop_t *) union_fsync }, + { &vop_getpages_desc, (vop_t *) union_getpages }, + { &vop_putpages_desc, (vop_t *) union_putpages }, { &vop_getattr_desc, (vop_t *) union_getattr }, { &vop_inactive_desc, (vop_t *) union_inactive }, { &vop_ioctl_desc, (vop_t *) union_ioctl }, - { &vop_islocked_desc, (vop_t *) union_islocked }, + { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_lease_desc, (vop_t *) union_lease }, { &vop_link_desc, (vop_t *) union_link }, { &vop_lock_desc, (vop_t *) union_lock },