When the experimental NFS client is handling an NFSv4 server reboot

with delegations enabled, the recovery could fail if the renew thread is trying to return a delegation, since it will not do the recovery. This patch fixes the above by having nfscl_recalldeleg() fail with the I/O operations returning EIO, so that they will be attempted later. Most of the patch consists of adding an argument to various functions to indicate the delegation recall case where this needs to be done. MFC after: 1 week
svn path=/head/; revision=207082
2025-01-31 16:57:10 +00:00 · 2010-04-22 23:51:01 +00:00 · 2010-04-22 23:51:01 +00:00 · 67c5c2d2d8 · 2020-12-20 02:59:44 +00:00
commit 67c5c2d2d8
parent b834123032
8 changed files with 91 additions and 36 deletions
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@ -369,7 +369,7 @@ int nfsrpc_readlink(vnode_t, struct uio *, struct ucred *,
 int nfsrpc_read(vnode_t, struct uio *, struct ucred *, NFSPROC_T *,
    struct nfsvattr *, int *, void *);
 int nfsrpc_write(vnode_t, struct uio *, int *, u_char *,
-    struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
+    struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *, int);
 int nfsrpc_mknod(vnode_t, char *, int, struct vattr *, u_int32_t,
    enum vtype, struct ucred *, NFSPROC_T *, struct nfsvattr *,
    struct nfsvattr *, struct nfsfh **, int *, int *, void *);
@ -502,7 +502,7 @@ int nfscl_maperr(NFSPROC_T *, int, uid_t, gid_t);
 void nfscl_init(void);

 /* nfs_clbio.c */
-int ncl_flush(vnode_t, int, struct ucred *, NFSPROC_T *, int);
+int ncl_flush(vnode_t, int, struct ucred *, NFSPROC_T *, int, int);

 /* nfs_clnode.c */
 void ncl_invalcaches(vnode_t);
--- a/sys/fs/nfsclient/nfs.h
+++ b/sys/fs/nfsclient/nfs.h
@ -79,14 +79,16 @@ int ncl_biowrite(struct vnode *, struct uio *, int, struct ucred *);
 int ncl_vinvalbuf(struct vnode *, int, struct thread *, int);
 int ncl_asyncio(struct nfsmount *, struct buf *, struct ucred *,
    struct thread *);
-int ncl_doio(struct vnode *, struct buf *, struct ucred *, struct thread *);
+int ncl_doio(struct vnode *, struct buf *, struct ucred *, struct thread *,
+    int);
 void ncl_nhinit(void);
 void ncl_nhuninit(void);
 void ncl_nodelock(struct nfsnode *);
 void ncl_nodeunlock(struct nfsnode *);
 int ncl_getattrcache(struct vnode *, struct vattr *);
 int ncl_readrpc(struct vnode *, struct uio *, struct ucred *);
-int ncl_writerpc(struct vnode *, struct uio *, struct ucred *, int *, int *);
+int ncl_writerpc(struct vnode *, struct uio *, struct ucred *, int *, int *,
+    int);
 int ncl_readlinkrpc(struct vnode *, struct uio *, struct ucred *);
 int ncl_readdirrpc(struct vnode *, struct uio *, struct ucred *,
    struct thread *);
--- a/sys/fs/nfsclient/nfs_clbio.c
+++ b/sys/fs/nfsclient/nfs_clbio.c
@ -336,7 +336,7 @@ ncl_putpages(struct vop_putpages_args *ap)
 	else
 	    iomode = NFSWRITE_FILESYNC;

-	error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit);
+	error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0);

 	pmap_qremove(kva, npages);
 	relpbuf(bp, &ncl_pbuf_freecnt);
@ -554,7 +554,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_iocmd = BIO_READ;
 		    vfs_busy_pages(bp, 0);
-		    error = ncl_doio(vp, bp, cred, td);
+		    error = ncl_doio(vp, bp, cred, td, 0);
 		    if (error) {
 			brelse(bp);
 			return (error);
@ -583,7 +583,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_iocmd = BIO_READ;
 		    vfs_busy_pages(bp, 0);
-		    error = ncl_doio(vp, bp, cred, td);
+		    error = ncl_doio(vp, bp, cred, td, 0);
 		    if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			brelse(bp);
@ -609,7 +609,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_iocmd = BIO_READ;
 		    vfs_busy_pages(bp, 0);
-		    error = ncl_doio(vp, bp, cred, td);
+		    error = ncl_doio(vp, bp, cred, td, 0);
 		    if (error) {
 			    brelse(bp);
 		    }
@ -638,7 +638,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
 			    if ((bp->b_flags & B_CACHE) == 0) {
 				    bp->b_iocmd = BIO_READ;
 				    vfs_busy_pages(bp, 0);
-				    error = ncl_doio(vp, bp, cred, td);
+				    error = ncl_doio(vp, bp, cred, td, 0);
 				    /*
 				     * no error + B_INVAL == directory EOF,
 				     * use the block.
@ -771,7 +771,7 @@ nfs_directio_write(vp, uiop, cred, ioflag)
 			uio.uio_td = td;
 			iomode = NFSWRITE_FILESYNC;
 			error = ncl_writerpc(vp, &uio, cred, &iomode,
-			    &must_commit);
+			    &must_commit, 0);
 			KASSERT((must_commit == 0), 
 				("ncl_directio_write: Did not commit write"));
 			if (error)
@ -1122,7 +1122,7 @@ ncl_write(struct vop_write_args *ap)
 		if ((bp->b_flags & B_CACHE) == 0) {
 			bp->b_iocmd = BIO_READ;
 			vfs_busy_pages(bp, 0);
-			error = ncl_doio(vp, bp, cred, td);
+			error = ncl_doio(vp, bp, cred, td, 0);
 			if (error) {
 				brelse(bp);
 				break;
@ -1523,7 +1523,7 @@ ncl_doio_directwrite(struct buf *bp)
 	
 	iomode = NFSWRITE_FILESYNC;
 	uiop->uio_td = NULL; /* NULL since we're in nfsiod */
-	ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit);
+	ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit, 0);
 	KASSERT((must_commit == 0), ("ncl_doio_directwrite: Did not commit write"));
 	free(iov_base, M_NFSDIRECTIO);
 	free(uiop->uio_iov, M_NFSDIRECTIO);
@ -1550,7 +1550,8 @@ ncl_doio_directwrite(struct buf *bp)
 * synchronously or from an nfsiod.
 */
 int
-ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
+ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td,
+    int called_from_strategy)
 {
 	struct uio *uiop;
 	struct nfsnode *np;
@ -1695,7 +1696,8 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
 		else
 		    iomode = NFSWRITE_FILESYNC;

-		error = ncl_writerpc(vp, uiop, cr, &iomode, &must_commit);
+		error = ncl_writerpc(vp, uiop, cr, &iomode, &must_commit,
+		    called_from_strategy);

 		/*
 		 * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try
@ -1732,6 +1734,12 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
 		 * the block is reused. This is indicated by setting
 		 * the B_DELWRI and B_NEEDCOMMIT flags.
 		 *
+		 * EIO is returned by ncl_writerpc() to indicate a recoverable
+		 * write error and is handled as above, except that
+		 * B_EINTR isn't set. One cause of this is a stale stateid
+		 * error for the RPC that indicates recovery is required,
+		 * when called with called_from_strategy != 0.
+		 *
 		 * If the buffer is marked B_PAGING, it does not reside on
 		 * the vp's paging queues so we cannot call bdirty().  The
 		 * bp in this case is not an NFS cache block so we should
@ -1760,7 +1768,8 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
 			    bdirty(bp);
 			    bp->b_flags &= ~B_DONE;
 			}
-			if (error && (bp->b_flags & B_ASYNC) == 0)
+			if ((error == EINTR || error == ETIMEDOUT) &&
+			    (bp->b_flags & B_ASYNC) == 0)
 			    bp->b_flags |= B_EINTR;
 			splx(s);
 	    	} else {
--- a/sys/fs/nfsclient/nfs_clnfsiod.c
+++ b/sys/fs/nfsclient/nfs_clnfsiod.c
@ -278,9 +278,11 @@ nfssvc_iod(void *instance)
 			(void)ncl_doio_directwrite(bp);
 		} else {
 			if (bp->b_iocmd == BIO_READ)
-				(void) ncl_doio(bp->b_vp, bp, bp->b_rcred, NULL);
+				(void) ncl_doio(bp->b_vp, bp, bp->b_rcred,
+				    NULL, 0);
 			else
-				(void) ncl_doio(bp->b_vp, bp, bp->b_wcred, NULL);
+				(void) ncl_doio(bp->b_vp, bp, bp->b_wcred,
+				    NULL, 0);
 		}
 		mtx_lock(&ncl_iod_mutex);
 		/*
--- a/sys/fs/nfsclient/nfs_clnode.c
+++ b/sys/fs/nfsclient/nfs_clnode.c
@ -199,7 +199,7 @@ ncl_inactive(struct vop_inactive_args *ap)
 		 * available for the writes.
 		 */
 		if (nfscl_mustflush(vp))
-			(void) ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1);
+			(void) ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0);
 		(void) nfsrpc_close(vp, 1, ap->a_td);
 	}

--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@ -1346,11 +1346,16 @@ nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,

 /*
 * nfs write operation
+ * When called_from_strategy != 0, it should return EIO for an error that
+ * indicates recovery is in progress, so that the buffer will be left
+ * dirty and be written back to the server later. If it loops around,
+ * the recovery thread could get stuck waiting for the buffer and recovery
+ * will then deadlock.
 */
 APPLESTATIC int
 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, u_char *verfp,
    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
-    void *stuff)
+    void *stuff, int called_from_strategy)
 {
 	int error, expireret = 0, retrycnt, nostateid;
 	u_int32_t clidrev = 0;
@ -1410,12 +1415,15 @@ nfscl_dumpstate(nmp, 1, 1, 0, 0);
 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		}
 		retrycnt++;
-	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
-	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
+	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
+	    ((error == NFSERR_STALESTATEID ||
+	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
-	if (error && retrycnt >= 4)
+	if (error != 0 && (retrycnt >= 4 ||
+	    ((error == NFSERR_STALESTATEID ||
+	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
 		error = EIO;
 	if (NFSHASNFSV4(nmp) && p == NULL)
 		NFSFREECRED(newcred);
--- a/sys/fs/nfsclient/nfs_clstate.c
+++ b/sys/fs/nfsclient/nfs_clstate.c
@ -139,7 +139,7 @@ static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *);
 static int nfscl_errmap(struct nfsrv_descript *);
 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
-    struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *);
+    struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int);
 static void nfscl_freeopenowner(struct nfsclowner *, int);
 static void nfscl_cleandeleg(struct nfscldeleg *);
 static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
@ -2469,7 +2469,7 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
 				NFSUNLOCKCLSTATE();
 				newnfs_copycred(&dp->nfsdl_cred, cred);
 				ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
-				    NULL, cred, p);
+				    NULL, cred, p, 1);
 				if (!ret) {
 				    nfscl_cleandeleg(dp);
 				    TAILQ_REMOVE(&clp->nfsc_deleg, dp,
@ -3309,7 +3309,8 @@ nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
 */
 static int
 nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
-    struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p)
+    struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
+    int called_from_renewthread)
 {
 	struct nfsclowner *owp, *lowp, *nowp;
 	struct nfsclopen *op, *lop;
@ -3343,6 +3344,7 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
 	 * Ok, if it's a write delegation, flush data to the server, so
 	 * that close/open consistency is retained.
 	 */
+	ret = 0;
 	NFSLOCKNODE(np);
 	if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
 #ifdef APPLE
@ -3351,7 +3353,8 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
 		np->n_flag |= NDELEGRECALL;
 #endif
 		NFSUNLOCKNODE(np);
-		(void) ncl_flush(vp, MNT_WAIT, cred, p, 1);
+		ret = ncl_flush(vp, MNT_WAIT, cred, p, 1,
+		    called_from_renewthread);
 		NFSLOCKNODE(np);
 #ifdef APPLE
 		OSBitAndAtomic((int32_t)~(NMODIFIED | NDELEGRECALL), (UInt32 *)&np->n_flag);
@ -3360,6 +3363,16 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
 #endif
 	}
 	NFSUNLOCKNODE(np);
+	if (ret == EIO && called_from_renewthread != 0) {
+		/*
+		 * If the flush failed with EIO for the renew thread,
+		 * return now, so that the dirty buffer will be flushed
+		 * later.
+		 */
+		if (gotvp != 0)
+			vrele(vp);
+		return (ret);
+	}

 	/*
 	 * Now, for each openowner with opens issued locally, move them
@ -3857,7 +3870,7 @@ nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
 			NFSUNLOCKCLSTATE();
 			cred = newnfs_getcred();
 			newnfs_copycred(&dp->nfsdl_cred, cred);
-			(void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p);
+			(void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0);
 			NFSFREECRED(cred);
 			triedrecall = 1;
 			NFSLOCKCLSTATE();
@ -3955,7 +3968,7 @@ nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
 			NFSUNLOCKCLSTATE();
 			cred = newnfs_getcred();
 			newnfs_copycred(&dp->nfsdl_cred, cred);
-			(void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p);
+			(void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0);
 			NFSFREECRED(cred);
 			triedrecall = 1;
 			NFSLOCKCLSTATE();
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@ -670,13 +670,13 @@ nfs_close(struct vop_close_args *ap)
 		     * traditional vnode locking implemented for Vnode Ops.
 		     */
 		    int cm = newnfs_commit_on_close ? 1 : 0;
-		    error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm);
+		    error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0);
 		    /* np->n_flag &= ~NMODIFIED; */
 		} else if (NFS_ISV4(vp)) { 
 			if (nfscl_mustflush(vp)) {
 				int cm = newnfs_commit_on_close ? 1 : 0;
 				error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td,
-				    cm);
+				    cm, 0);
 				/*
 				 * as above w.r.t races when clearing
 				 * NMODIFIED.
@ -1306,7 +1306,7 @@ ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 */
 int
 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
-	     int *iomode, int *must_commit)
+    int *iomode, int *must_commit, int called_from_strategy)
 {
 	struct nfsvattr nfsva;
 	int error = 0, attrflag, ret;
@ -1315,7 +1315,7 @@ ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,

 	*must_commit = 0;
 	error = nfsrpc_write(vp, uiop, iomode, verf, cred,
-	    uiop->uio_td, &nfsva, &attrflag, NULL);
+	    uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy);
 	NFSLOCKMNT(nmp);
 	if (!error && NFSHASWRITEVERF(nmp) &&
 	    NFSBCMP(verf, nmp->nm_verf, NFSX_VERF)) {
@ -2473,7 +2473,7 @@ nfs_strategy(struct vop_strategy_args *ap)
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 	    ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
-		(void)ncl_doio(ap->a_vp, bp, cr, curthread);
+		(void) ncl_doio(ap->a_vp, bp, cr, curthread, 1);
 	return (0);
 }

@ -2484,17 +2484,20 @@ nfs_strategy(struct vop_strategy_args *ap)
 static int
 nfs_fsync(struct vop_fsync_args *ap)
 {
-	return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1));
+	return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0));
 }

 /*
 * Flush all the blocks associated with a vnode.
 * 	Walk through the buffer pool and push any dirty pages
 *	associated with the vnode.
+ * If the called_from_renewthread argument is TRUE, it has been called
+ * from the NFSv4 renew thread and, as such, cannot block indefinitely
+ * waiting for a buffer write to complete.
 */
 int
 ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
-    int commit)
+    int commit, int called_from_renewthread)
 {
 	struct nfsnode *np = VTONFS(vp);
 	struct buf *bp;
@ -2513,6 +2516,8 @@ ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	int bvecsize = 0, bveccount;

+	if (called_from_renewthread != 0)
+		slptimeo = hz;
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = NFS_PCATCH;
 	if (!commit)
@ -2708,6 +2713,14 @@ ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
 				error = 0;
 				goto loop;
 			}
+			if (called_from_renewthread != 0) {
+				/*
+				 * Return EIO so the flush will be retried
+				 * later.
+				 */
+				error = EIO;
+				goto done;
+			}
 			if (newnfs_sigintr(nmp, td)) {
 				error = EINTR;
 				goto done;
@ -2747,6 +2760,14 @@ ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
 			error = bufobj_wwait(bo, slpflag, slptimeo);
 			if (error) {
 			    BO_UNLOCK(bo);
+			    if (called_from_renewthread != 0) {
+				/*
+				 * Return EIO so that the flush will be
+				 * retried later.
+				 */
+				error = EIO;
+				goto done;
+			    }
 			    error = newnfs_sigintr(nmp, td);
 			    if (error)
 				goto done;
@ -2838,7 +2859,7 @@ nfs_advlock(struct vop_advlock_args *ap)
 		 */
 		if (ap->a_op == F_UNLCK &&
 		    nfscl_checkwritelocked(vp, ap->a_fl, cred, td))
-			(void) ncl_flush(vp, MNT_WAIT, cred, td, 1);
+			(void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0);

 		/*
 		 * Loop around doing the lock op, while a blocking lock