From 1f60bfd8227f0972d61f5977ba62452335b757e0 Mon Sep 17 00:00:00 2001 From: Rick Macklem Date: Sat, 8 Dec 2012 22:52:39 +0000 Subject: [PATCH] Move the NFSv4.1 client patches over from projects/nfsv4.1-client to head. I don't think the NFS client behaviour will change unless the new "minorversion=1" mount option is used. It includes basic NFSv4.1 support plus support for pNFS using the Files Layout only. All problems detecting during an NFSv4.1 Bakeathon testing event in June 2012 have been resolved in this code and it has been tested against the NFSv4.1 server available to me. Although not reviewed, I believe that kib@ has looked at it. --- sys/fs/nfs/nfs.h | 17 + sys/fs/nfs/nfs_commonkrpc.c | 155 ++- sys/fs/nfs/nfs_commonport.c | 6 + sys/fs/nfs/nfs_commonsubs.c | 346 +++++- sys/fs/nfs/nfs_var.h | 68 +- sys/fs/nfs/nfscl.h | 2 +- sys/fs/nfs/nfsclstate.h | 208 +++- sys/fs/nfs/nfsport.h | 87 +- sys/fs/nfs/nfsproto.h | 128 ++- sys/fs/nfsclient/nfs_clbio.c | 2 + sys/fs/nfsclient/nfs_clcomsubs.c | 90 +- sys/fs/nfsclient/nfs_clkdtrace.c | 26 +- sys/fs/nfsclient/nfs_clkrpc.c | 19 +- sys/fs/nfsclient/nfs_clport.c | 9 +- sys/fs/nfsclient/nfs_clrpcops.c | 1845 ++++++++++++++++++++++++++++-- sys/fs/nfsclient/nfs_clstate.c | 1166 +++++++++++++++++-- sys/fs/nfsclient/nfs_clvfsops.c | 120 +- sys/fs/nfsclient/nfs_clvnops.c | 56 +- sys/fs/nfsclient/nfsmount.h | 8 + sys/fs/nfsclient/nfsnode.h | 2 + sys/fs/nfsserver/nfs_nfsdstate.c | 3 +- sys/nfsclient/nfsargs.h | 1 + 22 files changed, 3978 insertions(+), 386 deletions(-) diff --git a/sys/fs/nfs/nfs.h b/sys/fs/nfs/nfs.h index 3d588a5dbf90..3cc8c8fc86fa 100644 --- a/sys/fs/nfs/nfs.h +++ b/sys/fs/nfs/nfs.h @@ -50,6 +50,7 @@ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFSV4_CALLBACKTIMEO (2 * NFS_HZ) /* Timeout in ticks */ #define NFSV4_CALLBACKRETRY 5 /* Number of retries before failure */ +#define NFSV4_CBSLOTS 8 /* Number of slots for session */ #define NFSV4_CBRETRYCNT 4 /* # of CBRecall retries upon err */ #define NFSV4_UPCALLTIMEO (15 * NFS_HZ) /* Timeout in ticks for upcalls */ /* to gssd or nfsuserd */ @@ -100,6 +101,9 @@ #ifndef NFSCLDELEGHIGHWATER #define NFSCLDELEGHIGHWATER 10000 /* limit for client delegations */ #endif +#ifndef NFSCLLAYOUTHIGHWATER +#define NFSCLLAYOUTHIGHWATER 10000 /* limit for client pNFS layouts */ +#endif #ifndef NFSNOOPEN /* Inactive open owner (sec) */ #define NFSNOOPEN 120 #endif @@ -529,6 +533,7 @@ struct nfsrv_descript { nfsquad_t nd_clientid; /* Implied clientid */ int nd_gssnamelen; /* principal name length */ char *nd_gssname; /* principal name */ + uint32_t *nd_slotseq; /* ptr to slot seq# in req */ }; #define nd_princlen nd_gssnamelen @@ -560,6 +565,8 @@ struct nfsrv_descript { #define ND_EXGSSPRIVACY 0x00400000 #define ND_INCRSEQID 0x00800000 #define ND_NFSCL 0x01000000 +#define ND_NFSV41 0x02000000 +#define ND_HASSEQUENCE 0x04000000 /* * ND_GSS should be the "or" of all GSS type authentications. @@ -572,6 +579,7 @@ struct nfsv4_opflag { int savereply; int modifyfs; int lktype; + int needsseq; }; /* @@ -645,6 +653,15 @@ struct nfsv4lock { #define NFSACCCHK_VPNOTLOCKED 0 #define NFSACCCHK_VPISLOCKED 1 +/* + * Slot for the NFSv4.1 Sequence Op. + */ +struct nfsslot { + int nfssl_inprog; + uint32_t nfssl_seq; + struct mbuf *nfssl_reply; +}; + #endif /* _KERNEL */ #endif /* _NFS_NFS_H */ diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index 7b72ced680ad..6640c1f0cbf8 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -76,23 +76,27 @@ dtrace_nfsclient_nfs23_done_probe_func_t /* * Registered probes by RPC type. */ -uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; -uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; +uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; +uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; -uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; -uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; +uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; +uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; -uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; -uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; +uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; +uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; #endif NFSSTATESPINLOCK; NFSREQSPINLOCK; +NFSDLOCKMUTEX; extern struct nfsstats newnfsstats; extern struct nfsreqhead nfsd_reqq; extern int nfscl_ticks; extern void (*ncl_call_invalcaches)(struct vnode *); +extern int nfs_numnfscbd; +extern int nfscl_debuglevel; +SVCPOOL *nfscbd_pool; static int nfsrv_gsscallbackson = 0; static int nfs_bufpackets = 4; static int nfs_reconnects; @@ -167,6 +171,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, struct socket *so; int one = 1, retries, error = 0; struct thread *td = curthread; + SVCXPRT *xprt; struct timeval timo; /* @@ -277,6 +282,24 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, retries = nmp->nm_retry; } else retries = INT_MAX; + if (NFSHASNFSV4N(nmp)) { + /* + * Make sure the nfscbd_pool doesn't get destroyed + * while doing this. + */ + NFSD_LOCK(); + if (nfs_numnfscbd > 0) { + nfs_numnfscbd++; + NFSD_UNLOCK(); + xprt = svc_vc_create_backchannel(nfscbd_pool); + CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt); + NFSD_LOCK(); + nfs_numnfscbd--; + if (nfs_numnfscbd == 0) + wakeup(&nfs_numnfscbd); + } + NFSD_UNLOCK(); + } } else { /* * Three cases: @@ -468,12 +491,13 @@ int newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, - u_char *retsum, int toplevel, u_int64_t *xidp) + u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *sep) { - u_int32_t *tl; + u_int32_t retseq, retval, *tl; time_t waituntil; - int i, j, set_sigset = 0, timeo; + int i = 0, j = 0, opcnt, set_sigset = 0, slot; int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS; + int freeslot, timeo; u_int16_t procnum; u_int trylater_delay = 1; struct nfs_feedback_arg nf; @@ -670,7 +694,9 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, #endif } trycnt = 0; + freeslot = -1; /* Set to slot that needs to be free'd */ tryagain: + slot = -1; /* Slot that needs a sequence# increment. */ /* * This timeout specifies when a new socket should be created, * along with new xid values. For UDP, this should be done @@ -772,11 +798,66 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); nd->nd_repstat = 0; if (nd->nd_procnum != NFSPROC_NULL) { + /* If sep == NULL, set it to the default in nmp. */ + if (sep == NULL && nmp != NULL) + sep = NFSMNT_MDSSESSION(nmp); /* * and now the actual NFS xdr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); + if (nd->nd_repstat >= 10000) + NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum, + (int)nd->nd_repstat); + + /* + * Get rid of the tag, return count and SEQUENCE result for + * NFSv4. + */ + if ((nd->nd_flag & ND_NFSV4) != 0) { + NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl); + error = nfsm_advance(nd, NFSM_RNDUP(i), -1); + if (error) + goto nfsmout; + NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + opcnt = fxdr_unsigned(int, *tl++); + i = fxdr_unsigned(int, *tl++); + j = fxdr_unsigned(int, *tl); + if (j >= 10000) + NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j); + /* + * If the first op is Sequence, free up the slot. + */ + if (nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) + NFSCL_DEBUG(1, "failed seq=%d\n", j); + if (nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + 5 * NFSX_UNSIGNED); + mtx_lock(&sep->nfsess_mtx); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + retseq = fxdr_unsigned(uint32_t, *tl++); + slot = fxdr_unsigned(int, *tl++); + freeslot = slot; + if (retseq != sep->nfsess_slotseq[slot]) + printf("retseq diff 0x%x\n", retseq); + retval = fxdr_unsigned(uint32_t, *++tl); + if ((retval + 1) < sep->nfsess_foreslots) + sep->nfsess_foreslots = (retval + 1); + else if ((retval + 1) > sep->nfsess_foreslots) + sep->nfsess_foreslots = (retval < 64) ? + (retval + 1) : 64; + mtx_unlock(&sep->nfsess_mtx); + + /* Grab the op and status for the next one. */ + if (opcnt > 1) { + NFSM_DISSECT(tl, uint32_t *, + 2 * NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl++); + j = fxdr_unsigned(int, *tl); + } + } + } if (nd->nd_repstat != 0) { if (((nd->nd_repstat == NFSERR_DELAY || nd->nd_repstat == NFSERR_GRACE) && @@ -784,7 +865,9 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, nd->nd_procnum != NFSPROC_DELEGRETURN && nd->nd_procnum != NFSPROC_SETATTR && nd->nd_procnum != NFSPROC_READ && + nd->nd_procnum != NFSPROC_READDS && nd->nd_procnum != NFSPROC_WRITE && + nd->nd_procnum != NFSPROC_WRITEDS && nd->nd_procnum != NFSPROC_OPEN && nd->nd_procnum != NFSPROC_CREATE && nd->nd_procnum != NFSPROC_OPENCONFIRM && @@ -801,6 +884,13 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, while (NFSD_MONOSEC < waituntil) (void) nfs_catnap(PZERO, 0, "nfstry"); trylater_delay *= 2; + if (slot != -1) { + mtx_lock(&sep->nfsess_mtx); + sep->nfsess_slotseq[slot]++; + *nd->nd_slotseq = txdr_unsigned( + sep->nfsess_slotseq[slot]); + mtx_unlock(&sep->nfsess_mtx); + } m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; @@ -817,34 +907,22 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, (*ncl_call_invalcaches)(vp); } } - - /* - * Get rid of the tag, return count, and PUTFH result for V4. - */ - if (nd->nd_flag & ND_NFSV4) { - NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); - i = fxdr_unsigned(int, *tl); - error = nfsm_advance(nd, NFSM_RNDUP(i), -1); - if (error) - goto nfsmout; - NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); - i = fxdr_unsigned(int, *++tl); - + if ((nd->nd_flag & ND_NFSV4) != 0) { + /* Free the slot, as required. */ + if (freeslot != -1) + nfsv4_freeslot(sep, freeslot); /* - * If the first op's status is non-zero, mark that - * there is no more data to process. + * If this op is Putfh, throw its results away. */ - if (*++tl) - nd->nd_flag |= ND_NOMOREDATA; - - /* - * If the first op is Putfh, throw its results away - * and toss the op# and status for the first op. - */ - if (nmp != NULL && i == NFSV4OP_PUTFH && *tl == 0) { + if (j >= 10000) + NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j); + if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) { NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); + if (j >= 10000) + NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i, + j); /* * All Compounds that do an Op that must * be in sequence consist of NFSV4OP_PUTFH @@ -867,19 +945,20 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, j != NFSERR_RESOURCE && j != NFSERR_NOFILEHANDLE))) nd->nd_flag |= ND_INCRSEQID; - /* - * If the first op's status is non-zero, mark - * that there is no more data to process. - */ - if (j) - nd->nd_flag |= ND_NOMOREDATA; } + /* + * If this op's status is non-zero, mark + * that there is no more data to process. + */ + if (j) + nd->nd_flag |= ND_NOMOREDATA; /* * If R_DONTRECOVER is set, replace the stale error * reply, so that recovery isn't initiated. */ if ((nd->nd_repstat == NFSERR_STALECLIENTID || + nd->nd_repstat == NFSERR_BADSESSION || nd->nd_repstat == NFSERR_STALESTATEID) && rep != NULL && (rep->r_flags & R_DONTRECOVER)) nd->nd_repstat = NFSERR_STALEDONTRECOVER; diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index fe9b94ab487f..545b995c075c 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -106,6 +106,12 @@ MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroffdiroff", "New NFS directory offset data"); MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback", "New NFS local lock rollback"); +MALLOC_DEFINE(M_NEWNFSLAYOUT, "NFSCL layout", "NFSv4.1 Layout"); +MALLOC_DEFINE(M_NEWNFSFLAYOUT, "NFSCL flayout", "NFSv4.1 File Layout"); +MALLOC_DEFINE(M_NEWNFSDEVINFO, "NFSCL devinfo", "NFSv4.1 Device Info"); +MALLOC_DEFINE(M_NEWNFSSOCKREQ, "NFSCL sockreq", "NFS Sock Req"); +MALLOC_DEFINE(M_NEWNFSCLDS, "NFSCL session", "NFSv4.1 Session"); +MALLOC_DEFINE(M_NEWNFSLAYRECALL, "NFSCL layrecall", "NFSv4.1 Layout Recall"); /* * Definition of mutex locks. diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 5fe831528de0..95aa7bd30ceb 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -85,47 +85,66 @@ NFSSOCKMUTEX; * non-idempotent Ops. * Define it here, since it is used by both the client and server. */ -struct nfsv4_opflag nfsv4_opflag[NFSV4OP_NOPS] = { - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* undef */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* undef */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* undef */ - { 0, 1, 0, 0, LK_SHARED }, /* Access */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* Close */ - { 0, 2, 0, 1, LK_EXCLUSIVE }, /* Commit */ - { 1, 2, 1, 1, LK_EXCLUSIVE }, /* Create */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* Delegpurge */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* Delegreturn */ - { 0, 1, 0, 0, LK_SHARED }, /* Getattr */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* GetFH */ - { 2, 1, 1, 1, LK_EXCLUSIVE }, /* Link */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* Lock */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* LockT */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* LockU */ - { 1, 1, 0, 0, LK_EXCLUSIVE }, /* Lookup */ - { 1, 1, 0, 0, LK_EXCLUSIVE }, /* Lookupp */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* NVerify */ - { 1, 1, 0, 1, LK_EXCLUSIVE }, /* Open */ - { 1, 1, 0, 0, LK_EXCLUSIVE }, /* OpenAttr */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* OpenConfirm */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* OpenDowngrade */ - { 1, 0, 0, 0, LK_EXCLUSIVE }, /* PutFH */ - { 1, 0, 0, 0, LK_EXCLUSIVE }, /* PutPubFH */ - { 1, 0, 0, 0, LK_EXCLUSIVE }, /* PutRootFH */ - { 0, 1, 0, 0, LK_SHARED }, /* Read */ - { 0, 1, 0, 0, LK_SHARED }, /* Readdir */ - { 0, 1, 0, 0, LK_SHARED }, /* ReadLink */ - { 0, 2, 1, 1, LK_EXCLUSIVE }, /* Remove */ - { 2, 1, 1, 1, LK_EXCLUSIVE }, /* Rename */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* Renew */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* RestoreFH */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* SaveFH */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* SecInfo */ - { 0, 2, 1, 1, LK_EXCLUSIVE }, /* Setattr */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* SetClientID */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* SetClientIDConfirm */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* Verify */ - { 0, 2, 1, 1, LK_EXCLUSIVE }, /* Write */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* ReleaseLockOwner */ +struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = { + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* undef */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* undef */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* undef */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* Access */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Close */ + { 0, 2, 0, 1, LK_EXCLUSIVE, 1 }, /* Commit */ + { 1, 2, 1, 1, LK_EXCLUSIVE, 1 }, /* Create */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Delegpurge */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Delegreturn */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* Getattr */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* GetFH */ + { 2, 1, 1, 1, LK_EXCLUSIVE, 1 }, /* Link */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Lock */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* LockT */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* LockU */ + { 1, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Lookup */ + { 1, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Lookupp */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* NVerify */ + { 1, 1, 0, 1, LK_EXCLUSIVE, 1 }, /* Open */ + { 1, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* OpenAttr */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* OpenConfirm */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* OpenDowngrade */ + { 1, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* PutFH */ + { 1, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* PutPubFH */ + { 1, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* PutRootFH */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* Read */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* Readdir */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* ReadLink */ + { 0, 2, 1, 1, LK_EXCLUSIVE, 1 }, /* Remove */ + { 2, 1, 1, 1, LK_EXCLUSIVE, 1 }, /* Rename */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Renew */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* RestoreFH */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* SaveFH */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* SecInfo */ + { 0, 2, 1, 1, LK_EXCLUSIVE, 1 }, /* Setattr */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* SetClientID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* SetClientIDConfirm */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Verify */ + { 0, 2, 1, 1, LK_EXCLUSIVE, 1 }, /* Write */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* ReleaseLockOwner */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Backchannel Ctrl */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Bind Conn to Sess */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 0 }, /* Exchange ID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 0 }, /* Create Session */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 0 }, /* Destroy Session */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Free StateID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Get Dir Deleg */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Get Device Info */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Get Device List */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Layout Commit */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Layout Get */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Layout Return */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Secinfo No name */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Sequence */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Set SSV */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Test StateID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Want Delegation */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 0 }, /* Destroy ClientID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Reclaim Complete */ }; #endif /* !APPLEKEXT */ @@ -147,9 +166,9 @@ static struct nfsuserlruhead nfsuserlruhead; * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ -static int nfs_bigreply[NFS_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, +int nfs_bigreply[NFSV41_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0 }; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); @@ -1857,7 +1876,7 @@ nfsv4_getref(struct nfsv4lock *lp, int *isleptp, void *mutex, if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, - PZERO - 1, "nfsv4lck", NULL); + PZERO - 1, "nfsv4gr", NULL); } if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) return; @@ -3016,7 +3035,7 @@ nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name, NFSPROC_T *p) (void) nfsm_strtom(nd, name, len); } error = newnfs_request(nd, NULL, NULL, &nfsrv_nfsuserdsock, NULL, NULL, - cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL); + cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL, NULL); NFSFREECRED(cred); if (!error) { mbuf_freem(nd->nd_mrep); @@ -3510,3 +3529,240 @@ newnfs_sndunlock(int *flagp) NFSUNLOCKSOCK(); } +APPLESTATIC int +nfsv4_getipaddr(struct nfsrv_descript *nd, struct sockaddr_storage *sa, + int *isudp) +{ + struct sockaddr_in *sad; + struct sockaddr_in6 *sad6; + struct in_addr saddr; + uint32_t portnum, *tl; + int af = 0, i, j, k; + char addr[64], protocol[5], *cp; + int cantparse = 0, error = 0; + uint16_t portv; + + NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl); + if (i >= 3 && i <= 4) { + error = nfsrv_mtostr(nd, protocol, i); + if (error) + goto nfsmout; + if (strcmp(protocol, "tcp") == 0) { + af = AF_INET; + *isudp = 0; + } else if (strcmp(protocol, "udp") == 0) { + af = AF_INET; + *isudp = 1; + } else if (strcmp(protocol, "tcp6") == 0) { + af = AF_INET6; + *isudp = 0; + } else if (strcmp(protocol, "udp6") == 0) { + af = AF_INET6; + *isudp = 1; + } else + cantparse = 1; + } else { + cantparse = 1; + if (i > 0) { + error = nfsm_advance(nd, NFSM_RNDUP(i), -1); + if (error) + goto nfsmout; + } + } + NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl); + if (i < 0) { + error = NFSERR_BADXDR; + goto nfsmout; + } else if (cantparse == 0 && i >= 11 && i < 64) { + /* + * The shortest address is 11chars and the longest is < 64. + */ + error = nfsrv_mtostr(nd, addr, i); + if (error) + goto nfsmout; + + /* Find the port# at the end and extract that. */ + i = strlen(addr); + k = 0; + cp = &addr[i - 1]; + /* Count back two '.'s from end to get port# field. */ + for (j = 0; j < i; j++) { + if (*cp == '.') { + k++; + if (k == 2) + break; + } + cp--; + } + if (k == 2) { + /* + * The NFSv4 port# is appended as .N.N, where N is + * a decimal # in the range 0-255, just like an inet4 + * address. Cheat and use inet_aton(), which will + * return a Class A address and then shift the high + * order 8bits over to convert it to the port#. + */ + *cp++ = '\0'; + if (inet_aton(cp, &saddr) == 1) { + portnum = ntohl(saddr.s_addr); + portv = (uint16_t)((portnum >> 16) | + (portnum & 0xff)); + } else + cantparse = 1; + } else + cantparse = 1; + if (cantparse == 0) { + if (af == AF_INET) { + sad = (struct sockaddr_in *)sa; + if (inet_pton(af, addr, &sad->sin_addr) == 1) { + sad->sin_len = sizeof(*sad); + sad->sin_family = AF_INET; + sad->sin_port = htons(portv); + return (0); + } + } else { + sad6 = (struct sockaddr_in6 *)sa; + if (inet_pton(af, addr, &sad6->sin6_addr) + == 1) { + sad6->sin6_len = sizeof(*sad6); + sad6->sin6_family = AF_INET6; + sad6->sin6_port = htons(portv); + return (0); + } + } + } + } else { + if (i > 0) { + error = nfsm_advance(nd, NFSM_RNDUP(i), -1); + if (error) + goto nfsmout; + } + } + error = EPERM; +nfsmout: + return (error); +} + +/* + * Handle an NFSv4.1 Sequence request for the session. + */ +int +nfsv4_seqsession(uint32_t seqid, uint32_t slotid, uint32_t highslot, + struct nfsslot *slots, struct mbuf **reply, uint16_t maxslot) +{ + int error; + + error = 0; + *reply = NULL; + if (slotid > maxslot) + return (NFSERR_BADSLOT); + if (seqid == slots[slotid].nfssl_seq) { + /* A retry. */ + if (slots[slotid].nfssl_inprog != 0) + error = NFSERR_DELAY; + else if (slots[slotid].nfssl_reply != NULL) { + *reply = slots[slotid].nfssl_reply; + slots[slotid].nfssl_reply = NULL; + slots[slotid].nfssl_inprog = 1; + } else + error = NFSERR_SEQMISORDERED; + } else if ((slots[slotid].nfssl_seq + 1) == seqid) { + m_freem(slots[slotid].nfssl_reply); + slots[slotid].nfssl_reply = NULL; + slots[slotid].nfssl_inprog = 1; + slots[slotid].nfssl_seq++; + } else + error = NFSERR_SEQMISORDERED; + return (error); +} + +/* + * Cache this reply for the slot. + */ +void +nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, struct mbuf *rep) +{ + + slots[slotid].nfssl_reply = rep; + slots[slotid].nfssl_inprog = 0; +} + +/* + * Generate the xdr for an NFSv4.1 Sequence Operation. + */ +APPLESTATIC void +nfsv4_setsequence(struct nfsrv_descript *nd, struct nfsclsession *sep, + int dont_replycache) +{ + uint32_t *tl, slotseq = 0; + int i, maxslot, slotpos; + uint64_t bitval; + uint8_t sessionid[NFSX_V4SESSIONID]; + + /* Find an unused slot. */ + slotpos = -1; + maxslot = -1; + mtx_lock(&sep->nfsess_mtx); + do { + bitval = 1; + for (i = 0; i < sep->nfsess_foreslots; i++) { + if ((bitval & sep->nfsess_slots) == 0) { + slotpos = i; + sep->nfsess_slots |= bitval; + sep->nfsess_slotseq[i]++; + slotseq = sep->nfsess_slotseq[i]; + break; + } + bitval <<= 1; + } + if (slotpos == -1) + (void)mtx_sleep(&sep->nfsess_slots, &sep->nfsess_mtx, + PZERO, "nfsclseq", 0); + } while (slotpos == -1); + /* Now, find the highest slot in use. (nfsc_slots is 64bits) */ + bitval = 1; + for (i = 0; i < 64; i++) { + if ((bitval & sep->nfsess_slots) != 0) + maxslot = i; + bitval <<= 1; + } + bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); + mtx_unlock(&sep->nfsess_mtx); + KASSERT(maxslot >= 0, ("nfscl_setsequence neg maxslot")); + + /* Build the Sequence arguments. */ + NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); + bcopy(sessionid, tl, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + nd->nd_slotseq = tl; + *tl++ = txdr_unsigned(slotseq); + *tl++ = txdr_unsigned(slotpos); + *tl++ = txdr_unsigned(maxslot); + if (dont_replycache == 0) + *tl = newnfs_true; + else + *tl = newnfs_false; + nd->nd_flag |= ND_HASSEQUENCE; +} + +/* + * Free a session slot. + */ +APPLESTATIC void +nfsv4_freeslot(struct nfsclsession *sep, int slot) +{ + uint64_t bitval; + + bitval = 1; + if (slot > 0) + bitval <<= slot; + mtx_lock(&sep->nfsess_mtx); + if ((bitval & sep->nfsess_slots) == 0) + printf("freeing free slot!!\n"); + sep->nfsess_slots &= ~bitval; + wakeup(&sep->nfsess_slots); + mtx_unlock(&sep->nfsess_mtx); +} + diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 8e68d424db36..a13f880b8c3a 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -69,9 +69,12 @@ struct nfsclowner; struct nfsclopen; struct nfsclopenhead; struct nfsclclient; +struct nfsclsession; struct nfscllockowner; struct nfscllock; struct nfscldeleg; +struct nfscllayout; +struct nfscldevinfo; struct nfsv4lock; struct nfsvattr; struct nfs_vattr; @@ -257,11 +260,18 @@ int nfsrv_mtostr(struct nfsrv_descript *, char *, int); int nfsrv_checkutf8(u_int8_t *, int); int newnfs_sndlock(int *); void newnfs_sndunlock(int *); +int nfsv4_getipaddr(struct nfsrv_descript *, struct sockaddr_storage *, + int *); +int nfsv4_seqsession(uint32_t, uint32_t, uint32_t, struct nfsslot *, + struct mbuf **, uint16_t); +void nfsv4_seqsess_cacherep(uint32_t, struct nfsslot *, struct mbuf *); +void nfsv4_setsequence(struct nfsrv_descript *, struct nfsclsession *, int); +void nfsv4_freeslot(struct nfsclsession *, int); /* nfs_clcomsubs.c */ void nfsm_uiombuf(struct nfsrv_descript *, struct uio *, int); void nfscl_reqstart(struct nfsrv_descript *, int, struct nfsmount *, - u_int8_t *, int, u_int32_t **); + u_int8_t *, int, u_int32_t **, struct nfsclsession *); nfsuint64 *nfscl_getcookie(struct nfsnode *, off_t off, int); void nfscl_fillsattr(struct nfsrv_descript *, struct vattr *, vnode_t, int, u_int32_t); @@ -360,12 +370,12 @@ int nfsrpc_closerpc(struct nfsrv_descript *, struct nfsmount *, struct nfsclopen *, struct ucred *, NFSPROC_T *, int); int nfsrpc_openconfirm(vnode_t, u_int8_t *, int, struct nfsclopen *, struct ucred *, NFSPROC_T *); -int nfsrpc_setclient(struct nfsmount *, struct nfsclclient *, +int nfsrpc_setclient(struct nfsmount *, struct nfsclclient *, int, struct ucred *, NFSPROC_T *); int nfsrpc_getattr(vnode_t, struct ucred *, NFSPROC_T *, struct nfsvattr *, void *); int nfsrpc_getattrnovp(struct nfsmount *, u_int8_t *, int, int, - struct ucred *, NFSPROC_T *, struct nfsvattr *, u_int64_t *); + struct ucred *, NFSPROC_T *, struct nfsvattr *, u_int64_t *, uint32_t *); int nfsrpc_setattr(vnode_t, struct vattr *, NFSACL_T *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_lookup(vnode_t, char *, int, struct ucred *, NFSPROC_T *, @@ -404,7 +414,7 @@ int nfsrpc_readdir(vnode_t, struct uio *, nfsuint64 *, struct ucred *, int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, int *, void *); int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *, - NFSPROC_T *, u_char *, struct nfsvattr *, int *, void *); + NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int, struct ucred *, NFSPROC_T *, void *, int); int nfsrpc_lockt(struct nfsrv_descript *, vnode_t, @@ -419,7 +429,7 @@ int nfsrpc_fsinfo(vnode_t, struct nfsfsinfo *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); -int nfsrpc_renew(struct nfsclclient *, struct ucred *, +int nfsrpc_renew(struct nfsclclient *, struct nfsclds *, struct ucred *, NFSPROC_T *); int nfsrpc_rellockown(struct nfsmount *, struct nfscllockowner *, uint8_t *, int, struct ucred *, NFSPROC_T *); @@ -429,16 +439,42 @@ int nfsrpc_delegreturn(struct nfscldeleg *, struct ucred *, struct nfsmount *, NFSPROC_T *, int); int nfsrpc_getacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); int nfsrpc_setacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); +int nfsrpc_exchangeid(struct nfsmount *, struct nfsclclient *, + struct nfssockreq *, uint32_t, struct nfsclds **, struct ucred *, + NFSPROC_T *); +int nfsrpc_createsession(struct nfsmount *, struct nfsclsession *, + struct nfssockreq *, uint32_t, int, struct ucred *, NFSPROC_T *); +int nfsrpc_destroysession(struct nfsmount *, struct nfsclclient *, + struct ucred *, NFSPROC_T *); +int nfsrpc_destroyclient(struct nfsmount *, struct nfsclclient *, + struct ucred *, NFSPROC_T *); +int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t, uint64_t, + uint64_t, int, nfsv4stateid_t *, int *, struct nfsclflayouthead *, + struct ucred *, NFSPROC_T *, void *); +int nfsrpc_getdeviceinfo(struct nfsmount *, uint8_t *, int, uint32_t *, + struct nfscldevinfo **, struct ucred *, NFSPROC_T *); +int nfsrpc_layoutcommit(struct nfsmount *, uint8_t *, int, int, + uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, int, uint8_t *, + struct ucred *, NFSPROC_T *, void *); +int nfsrpc_layoutreturn(struct nfsmount *, uint8_t *, int, int, int, uint32_t, + int, uint64_t, uint64_t, nfsv4stateid_t *, int, uint32_t *, struct ucred *, + NFSPROC_T *, void *); +int nfsrpc_reclaimcomplete(struct nfsmount *, struct ucred *, NFSPROC_T *); +int nfscl_doiods(vnode_t, struct uio *, int *, int *, uint32_t, + struct ucred *, NFSPROC_T *); +int nfscl_findlayoutforio(struct nfscllayout *, uint64_t, uint32_t, + struct nfsclflayout **); +void nfscl_freenfsclds(struct nfsclds *); /* nfs_clstate.c */ int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *, NFSPROC_T *, struct nfsclowner **, struct nfsclopen **, int *, int *, int); -int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, struct ucred *, +int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *, NFSPROC_T *, nfsv4stateid_t *, void **); void nfscl_ownerrelease(struct nfsclowner *, int, int, int); void nfscl_openrelease(struct nfsclopen *, int, int); -int nfscl_getcl(vnode_t, struct ucred *, NFSPROC_T *, +int nfscl_getcl(struct mount *, struct ucred *, NFSPROC_T *, int, struct nfsclclient **); struct nfsclclient *nfscl_findcl(struct nfsmount *); void nfscl_clientrelease(struct nfsclclient *); @@ -490,6 +526,21 @@ void nfscl_deleggetmodtime(vnode_t, struct timespec *); int nfscl_tryclose(struct nfsclopen *, struct ucred *, struct nfsmount *, NFSPROC_T *); void nfscl_cleanup(NFSPROC_T *); +int nfscl_layout(struct nfsmount *, vnode_t, u_int8_t *, int, nfsv4stateid_t *, + int, struct nfsclflayouthead *, struct nfscllayout **, struct ucred *, + NFSPROC_T *); +struct nfscllayout *nfscl_getlayout(struct nfsclclient *, uint8_t *, int, + uint64_t, struct nfsclflayout **, int *); +void nfscl_rellayout(struct nfscllayout *, int); +struct nfscldevinfo *nfscl_getdevinfo(struct nfsclclient *, uint8_t *, + struct nfscldevinfo *); +void nfscl_reldevinfo(struct nfscldevinfo *); +int nfscl_adddevinfo(struct nfsmount *, struct nfscldevinfo *, + struct nfsclflayout *); +void nfscl_freelayout(struct nfscllayout *); +void nfscl_freeflayout(struct nfsclflayout *); +void nfscl_freedevinfo(struct nfscldevinfo *); +int nfscl_layoutcommit(vnode_t, NFSPROC_T *); /* nfs_clport.c */ int nfscl_nget(mount_t, vnode_t, struct nfsfh *, @@ -588,7 +639,8 @@ void newnfs_restore_sigmask(struct thread *, sigset_t *); int newnfs_msleep(struct thread *, void *, struct mtx *, int, char *, int); int newnfs_request(struct nfsrv_descript *, struct nfsmount *, struct nfsclient *, struct nfssockreq *, vnode_t, NFSPROC_T *, - struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *); + struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *, + struct nfsclsession *); int newnfs_connect(struct nfsmount *, struct nfssockreq *, struct ucred *, NFSPROC_T *, int); void newnfs_disconnect(struct nfssockreq *); diff --git a/sys/fs/nfs/nfscl.h b/sys/fs/nfs/nfscl.h index 1ff4d8bf78cd..bda8d4812d3f 100644 --- a/sys/fs/nfs/nfscl.h +++ b/sys/fs/nfs/nfscl.h @@ -49,7 +49,7 @@ struct nfsv4node { */ #define NFSCL_REQSTART(n, p, v) \ nfscl_reqstart((n), (p), VFSTONFS((v)->v_mount), \ - VTONFS(v)->n_fhp->nfh_fh, VTONFS(v)->n_fhp->nfh_len, NULL) + VTONFS(v)->n_fhp->nfh_fh, VTONFS(v)->n_fhp->nfh_len, NULL, NULL) /* * These two macros convert between a lease duration and renew interval. diff --git a/sys/fs/nfs/nfsclstate.h b/sys/fs/nfs/nfsclstate.h index 868e7cfb5db1..aa2bfee90586 100644 --- a/sys/fs/nfs/nfsclstate.h +++ b/sys/fs/nfs/nfsclstate.h @@ -40,26 +40,75 @@ LIST_HEAD(nfsclhead, nfsclclient); LIST_HEAD(nfsclownerhead, nfsclowner); TAILQ_HEAD(nfscldeleghead, nfscldeleg); LIST_HEAD(nfscldeleghash, nfscldeleg); +TAILQ_HEAD(nfscllayouthead, nfscllayout); +LIST_HEAD(nfscllayouthash, nfscllayout); +LIST_HEAD(nfsclflayouthead, nfsclflayout); +LIST_HEAD(nfscldevinfohead, nfscldevinfo); +LIST_HEAD(nfsclrecalllayouthead, nfsclrecalllayout); #define NFSCLDELEGHASHSIZE 256 -#define NFSCLDELEGHASH(c, f, l) \ +#define NFSCLDELEGHASH(c, f, l) \ (&((c)->nfsc_deleghash[ncl_hash((f), (l)) % NFSCLDELEGHASHSIZE])) +#define NFSCLLAYOUTHASHSIZE 256 +#define NFSCLLAYOUTHASH(c, f, l) \ + (&((c)->nfsc_layouthash[ncl_hash((f), (l)) % NFSCLLAYOUTHASHSIZE])) + +/* Structure for NFSv4.1 session stuff. */ +struct nfsclsession { + struct mtx nfsess_mtx; + struct nfsslot nfsess_cbslots[NFSV4_CBSLOTS]; + nfsquad_t nfsess_clientid; + uint32_t nfsess_slotseq[64]; /* Max for 64bit nm_slots */ + uint64_t nfsess_slots; + uint32_t nfsess_sequenceid; + uint32_t nfsess_maxcache; /* Max size for cached reply. */ + uint16_t nfsess_foreslots; + uint16_t nfsess_backslots; + uint8_t nfsess_sessionid[NFSX_V4SESSIONID]; +}; + +/* + * This structure holds the session, clientid and related information + * needed for an NFSv4.1 Meta Data Server (MDS) or Data Server (DS). + * It is malloc'd to the correct length. + */ +struct nfsclds { + TAILQ_ENTRY(nfsclds) nfsclds_list; + struct nfsclsession nfsclds_sess; + struct mtx nfsclds_mtx; + struct nfssockreq *nfsclds_sockp; + time_t nfsclds_expire; + uint16_t nfsclds_flags; + uint16_t nfsclds_servownlen; + uint8_t nfsclds_verf[NFSX_VERF]; + uint8_t nfsclds_serverown[0]; +}; + +/* + * Flags for nfsclds_flags. + */ +#define NFSCLDS_HASWRITEVERF 0x0001 +#define NFSCLDS_MDS 0x0002 +#define NFSCLDS_DS 0x0004 struct nfsclclient { LIST_ENTRY(nfsclclient) nfsc_list; struct nfsclownerhead nfsc_owner; struct nfscldeleghead nfsc_deleg; struct nfscldeleghash nfsc_deleghash[NFSCLDELEGHASHSIZE]; - struct nfsv4lock nfsc_lock; - struct proc *nfsc_renewthread; - struct nfsmount *nfsc_nmp; - nfsquad_t nfsc_clientid; - time_t nfsc_expire; - u_int32_t nfsc_clientidrev; - u_int32_t nfsc_renew; - u_int32_t nfsc_cbident; - u_int16_t nfsc_flags; - u_int16_t nfsc_idlen; - u_int8_t nfsc_id[1]; /* Malloc'd to correct length */ + struct nfscllayouthead nfsc_layout; + struct nfscllayouthash nfsc_layouthash[NFSCLLAYOUTHASHSIZE]; + struct nfscldevinfohead nfsc_devinfo; + struct nfsv4lock nfsc_lock; + struct proc *nfsc_renewthread; + struct nfsmount *nfsc_nmp; + time_t nfsc_expire; + u_int32_t nfsc_clientidrev; + u_int32_t nfsc_rev; + u_int32_t nfsc_renew; + u_int32_t nfsc_cbident; + u_int16_t nfsc_flags; + u_int16_t nfsc_idlen; + u_int8_t nfsc_id[1]; /* Malloc'd to correct length */ }; /* @@ -175,6 +224,141 @@ struct nfscllockownerfh { uint8_t nfslfh_fh[NFSX_V4FHMAX]; }; +/* + * MALLOC'd to the correct length to accommodate the file handle. + */ +struct nfscllayout { + TAILQ_ENTRY(nfscllayout) nfsly_list; + LIST_ENTRY(nfscllayout) nfsly_hash; + nfsv4stateid_t nfsly_stateid; + struct nfsv4lock nfsly_lock; + uint64_t nfsly_filesid[2]; + uint64_t nfsly_lastbyte; + struct nfsclflayouthead nfsly_flayread; + struct nfsclflayouthead nfsly_flayrw; + struct nfsclrecalllayouthead nfsly_recall; + time_t nfsly_timestamp; + struct nfsclclient *nfsly_clp; + uint16_t nfsly_flags; + uint16_t nfsly_fhlen; + uint8_t nfsly_fh[1]; +}; + +/* + * Flags for nfsly_flags. + */ +#define NFSLY_FILES 0x0001 +#define NFSLY_BLOCK 0x0002 +#define NFSLY_OBJECT 0x0004 +#define NFSLY_RECALL 0x0008 +#define NFSLY_RECALLFILE 0x0010 +#define NFSLY_RECALLFSID 0x0020 +#define NFSLY_RECALLALL 0x0040 +#define NFSLY_RETONCLOSE 0x0080 +#define NFSLY_WRITTEN 0x0100 /* Has been used to write to a DS. */ + +/* + * MALLOC'd to the correct length to accommodate the file handle list. + * These hang off of nfsly_flayread and nfsly_flayrw, sorted in increasing + * offset order. + * The nfsly_flayread list holds the ones with iomode == NFSLAYOUTIOMODE_READ, + * whereas the nfsly_flayrw holds the ones with iomode == NFSLAYOUTIOMODE_RW. + */ +struct nfsclflayout { + LIST_ENTRY(nfsclflayout) nfsfl_list; + uint8_t nfsfl_dev[NFSX_V4DEVICEID]; + uint64_t nfsfl_off; + uint64_t nfsfl_end; + uint64_t nfsfl_patoff; + struct nfscldevinfo *nfsfl_devp; + uint32_t nfsfl_iomode; + uint32_t nfsfl_util; + uint32_t nfsfl_stripe1; + uint16_t nfsfl_flags; + uint16_t nfsfl_fhcnt; + struct nfsfh *nfsfl_fh[1]; /* FH list for DS */ +}; + +/* + * Flags for nfsfl_flags. + */ +#define NFSFL_RECALL 0x0001 /* File layout has been recalled */ + +/* + * Structure that is used to store a LAYOUTRECALL. + */ +struct nfsclrecalllayout { + LIST_ENTRY(nfsclrecalllayout) nfsrecly_list; + uint64_t nfsrecly_off; + uint64_t nfsrecly_len; + int nfsrecly_recalltype; + uint32_t nfsrecly_iomode; + uint32_t nfsrecly_stateseqid; +}; + +/* + * Stores the NFSv4.1 Device Info. Malloc'd to the correct length to + * store the list of network connections and list of indices. + * nfsdi_data[] is allocated the following way: + * - nfsdi_addrcnt * struct nfsclds + * - stripe indices, each stored as one byte, since there can be many + * of them. (This implies a limit of 256 on nfsdi_addrcnt, since the + * indices select which address.) + */ +struct nfscldevinfo { + LIST_ENTRY(nfscldevinfo) nfsdi_list; + uint8_t nfsdi_deviceid[NFSX_V4DEVICEID]; + struct nfsclclient *nfsdi_clp; + uint32_t nfsdi_refcnt; + uint32_t nfsdi_layoutrefs; + uint16_t nfsdi_stripecnt; + uint16_t nfsdi_addrcnt; + struct nfsclds *nfsdi_data[0]; +}; + +/* These inline functions return values from nfsdi_data[]. */ +/* + * Return a pointer to the address at "pos". + */ +static __inline struct nfsclds ** +nfsfldi_addr(struct nfscldevinfo *ndi, int pos) +{ + + if (pos >= ndi->nfsdi_addrcnt) + return (NULL); + return (&ndi->nfsdi_data[pos]); +} + +/* + * Return the Nth ("pos") stripe index. + */ +static __inline int +nfsfldi_stripeindex(struct nfscldevinfo *ndi, int pos) +{ + uint8_t *valp; + + if (pos >= ndi->nfsdi_stripecnt) + return (-1); + valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt]; + valp += pos; + return ((int)*valp); +} + +/* + * Set the Nth ("pos") stripe index to "val". + */ +static __inline void +nfsfldi_setstripeindex(struct nfscldevinfo *ndi, int pos, uint8_t val) +{ + uint8_t *valp; + + if (pos >= ndi->nfsdi_stripecnt) + return; + valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt]; + valp += pos; + *valp = val; +} + /* * Macro for incrementing the seqid#. */ diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index eb026bc4cbf2..3f22b7adea9f 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -228,6 +228,34 @@ */ #define NFSV4OP_NOPS 40 +/* + * Additional Ops for NFSv4.1. + */ +#define NFSV4OP_BACKCHANNELCTL 40 +#define NFSV4OP_BINDCONNTOSESS 41 +#define NFSV4OP_EXCHANGEID 42 +#define NFSV4OP_CREATESESSION 43 +#define NFSV4OP_DESTROYSESSION 44 +#define NFSV4OP_FREESTATEID 45 +#define NFSV4OP_GETDIRDELEG 46 +#define NFSV4OP_GETDEVINFO 47 +#define NFSV4OP_GETDEVLIST 48 +#define NFSV4OP_LAYOUTCOMMIT 49 +#define NFSV4OP_LAYOUTGET 50 +#define NFSV4OP_LAYOUTRETURN 51 +#define NFSV4OP_SECINFONONAME 52 +#define NFSV4OP_SEQUENCE 53 +#define NFSV4OP_SETSSV 54 +#define NFSV4OP_TESTSTATEID 55 +#define NFSV4OP_WANTDELEG 56 +#define NFSV4OP_DESTROYCLIENTID 57 +#define NFSV4OP_RECLAIMCOMPL 58 + +/* + * Must be one more than last op#. + */ +#define NFSV41_NOPS 59 + /* Quirky case if the illegal op code */ #define NFSV4OP_OPILLEGAL 10044 @@ -260,6 +288,20 @@ */ #define NFSV4OP_CBNOPS 5 +/* + * Additional Callback Ops for NFSv4.1 only. Not yet in nfsstats. + */ +#define NFSV4OP_CBLAYOUTRECALL 5 +#define NFSV4OP_CBNOTIFY 6 +#define NFSV4OP_CBPUSHDELEG 7 +#define NFSV4OP_CBRECALLANY 8 +#define NFSV4OP_CBRECALLOBJAVAIL 9 +#define NFSV4OP_CBRECALLSLOT 10 +#define NFSV4OP_CBSEQUENCE 11 +#define NFSV4OP_CBWANTCANCELLED 12 +#define NFSV4OP_CBNOTIFYLOCK 13 +#define NFSV4OP_CBNOTIFYDEVID 14 + /* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. @@ -293,6 +335,27 @@ * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 + +/* Additional procedures for NFSv4.1. */ +#define NFSPROC_EXCHANGEID 41 +#define NFSPROC_CREATESESSION 42 +#define NFSPROC_DESTROYSESSION 43 +#define NFSPROC_DESTROYCLIENT 44 +#define NFSPROC_FREESTATEID 45 +#define NFSPROC_LAYOUTGET 46 +#define NFSPROC_GETDEVICEINFO 47 +#define NFSPROC_LAYOUTCOMMIT 48 +#define NFSPROC_LAYOUTRETURN 49 +#define NFSPROC_RECLAIMCOMPL 50 +#define NFSPROC_WRITEDS 51 +#define NFSPROC_READDS 52 +#define NFSPROC_COMMITDS 53 + +/* + * Must be defined as one higher than the last NFSv4.1 Proc# above. + */ +#define NFSV41_NPROCS 54 + #endif /* NFS_V3NPROCS */ /* @@ -368,13 +431,13 @@ struct ext_nfsstats { #include #include #include +#include #include #include #include #include #include #include -#include #include #include @@ -583,6 +646,8 @@ void nfsrvd_rcv(struct socket *, void *, int); #define NFSPROCLISTUNLOCK() sx_sunlock(&allproc_lock) #define NFSLOCKSOCKREQ(r) mtx_lock(&((r)->nr_mtx)) #define NFSUNLOCKSOCKREQ(r) mtx_unlock(&((r)->nr_mtx)) +#define NFSLOCKDS(d) mtx_lock(&((d)->nfsclds_mtx)) +#define NFSUNLOCKDS(d) mtx_unlock(&((d)->nfsclds_mtx)) /* * Use these macros to initialize/free a mutex. @@ -672,6 +737,12 @@ MALLOC_DECLARE(M_NEWNFSV4NODE); MALLOC_DECLARE(M_NEWNFSDIRECTIO); MALLOC_DECLARE(M_NEWNFSMNT); MALLOC_DECLARE(M_NEWNFSDROLLBACK); +MALLOC_DECLARE(M_NEWNFSLAYOUT); +MALLOC_DECLARE(M_NEWNFSFLAYOUT); +MALLOC_DECLARE(M_NEWNFSDEVINFO); +MALLOC_DECLARE(M_NEWNFSSOCKREQ); +MALLOC_DECLARE(M_NEWNFSCLDS); +MALLOC_DECLARE(M_NEWNFSLAYRECALL); #define M_NFSRVCACHE M_NEWNFSRVCACHE #define M_NFSDCLIENT M_NEWNFSDCLIENT #define M_NFSDSTATE M_NEWNFSDSTATE @@ -691,6 +762,12 @@ MALLOC_DECLARE(M_NEWNFSDROLLBACK); #define M_NFSV4NODE M_NEWNFSV4NODE #define M_NFSDIRECTIO M_NEWNFSDIRECTIO #define M_NFSDROLLBACK M_NEWNFSDROLLBACK +#define M_NFSLAYOUT M_NEWNFSLAYOUT +#define M_NFSFLAYOUT M_NEWNFSFLAYOUT +#define M_NFSDEVINFO M_NEWNFSDEVINFO +#define M_NFSSOCKREQ M_NEWNFSSOCKREQ +#define M_NFSCLDS M_NEWNFSCLDS +#define M_NFSLAYRECALL M_NEWNFSLAYRECALL #define NFSINT_SIGMASK(set) \ (SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) || \ @@ -759,12 +836,16 @@ void newnfs_realign(struct mbuf **); */ #define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier */ #define NFSSTA_GOTFSINFO 0x00100000 /* Got the fsinfo */ +#define NFSSTA_NOLAYOUTCOMMIT 0x04000000 /* Don't do LayoutCommit */ +#define NFSSTA_SESSPERSIST 0x08000000 /* Has a persistent session */ #define NFSSTA_TIMEO 0x10000000 /* Experiencing a timeout */ #define NFSSTA_LOCKTIMEO 0x20000000 /* Experiencing a lockd timeout */ #define NFSSTA_HASSETFSID 0x40000000 /* Has set the fsid */ +#define NFSSTA_PNFS 0x80000000 /* pNFS is enabled */ #define NFSHASNFSV3(n) ((n)->nm_flag & NFSMNT_NFSV3) #define NFSHASNFSV4(n) ((n)->nm_flag & NFSMNT_NFSV4) +#define NFSHASNFSV4N(n) ((n)->nm_minorvers > 0) #define NFSHASNFSV3OR4(n) ((n)->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) #define NFSHASGOTFSINFO(n) ((n)->nm_state & NFSSTA_GOTFSINFO) #define NFSHASHASSETFSID(n) ((n)->nm_state & NFSSTA_HASSETFSID) @@ -781,6 +862,10 @@ void newnfs_realign(struct mbuf **); #define NFSHASPRIVACY(n) ((n)->nm_flag & NFSMNT_PRIVACY) #define NFSSETWRITEVERF(n) ((n)->nm_state |= NFSSTA_HASWRITEVERF) #define NFSSETHASSETFSID(n) ((n)->nm_state |= NFSSTA_HASSETFSID) +#define NFSHASPNFSOPT(n) ((n)->nm_flag & NFSMNT_PNFS) +#define NFSHASNOLAYOUTCOMMIT(n) ((n)->nm_state & NFSSTA_NOLAYOUTCOMMIT) +#define NFSHASSESSPERSIST(n) ((n)->nm_state & NFSSTA_SESSPERSIST) +#define NFSHASPNFS(n) ((n)->nm_state & NFSSTA_PNFS) /* * Gets the stats field out of the mount structure. diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index 5ae2e3d15799..6836661ee823 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -62,7 +62,9 @@ #define NFS_MINPACKET 20 #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ #define NFSV4_MINORVERSION 0 /* V4 Minor version */ +#define NFSV41_MINORVERSION 1 /* V4 Minor version */ #define NFSV4_CBVERS 1 /* V4 CB Version */ +#define NFSV41_CBVERS 4 /* V4.1 CB Version */ #define NFSV4_SMALLSTR 50 /* Strings small enough for stack */ /* Stat numbers for rpc returns (version 2, 3 and 4) */ @@ -145,6 +147,46 @@ #define NFSERR_ADMINREVOKED 10047 #define NFSERR_CBPATHDOWN 10048 +/* NFSv4.1 specific errors. */ +#define NFSERR_BADIOMODE 10049 +#define NFSERR_BADLAYOUT 10050 +#define NFSERR_BADSESSIONDIGEST 10051 +#define NFSERR_BADSESSION 10052 +#define NFSERR_BADSLOT 10053 +#define NFSERR_COMPLETEALREADY 10054 +#define NFSERR_NOTBNDTOSESS 10055 +#define NFSERR_DELEGALREADYWANT 10056 +#define NFSERR_BACKCHANBUSY 10057 +#define NFSERR_LAYOUTTRYLATER 10058 +#define NFSERR_LAYOUTUNAVAIL 10059 +#define NFSERR_NOMATCHLAYOUT 10060 +#define NFSERR_RECALLCONFLICT 10061 +#define NFSERR_UNKNLAYOUTTYPE 10062 +#define NFSERR_SEQMISORDERED 10063 +#define NFSERR_SEQUENCEPOS 10064 +#define NFSERR_REQTOOBIG 10065 +#define NFSERR_REPTOOBIG 10066 +#define NFSERR_REPTOOBIGTOCACHE 10067 +#define NFSERR_RETRYUNCACHEDREP 10068 +#define NFSERR_UNSAFECOMPOUND 10069 +#define NFSERR_TOOMANYOPS 10070 +#define NFSERR_OPNOTINSESS 10071 +#define NFSERR_HASHALGUNSUPP 10072 +#define NFSERR_CLIENTIDBUSY 10074 +#define NFSERR_PNFSIOHOLE 10075 +#define NFSERR_SEQFALSERETRY 10076 +#define NFSERR_BADHIGHSLOT 10077 +#define NFSERR_DEADSESSION 10078 +#define NFSERR_ENCRALGUNSUPP 10079 +#define NFSERR_PNFSNOLAYOUT 10080 +#define NFSERR_NOTONLYOP 10081 +#define NFSERR_WRONGCRED 10082 +#define NFSERR_WRONGTYPE 10083 +#define NFSERR_DIRDELEGUNAVAIL 10084 +#define NFSERR_REJECTDELEG 10085 +#define NFSERR_RETURNCONFLICT 10086 +#define NFSERR_DELEGREVOKED 10087 + #define NFSERR_STALEWRITEVERF 30001 /* Fake return for nfs_commit() */ #define NFSERR_DONTREPLY 30003 /* Don't process request */ #define NFSERR_RETVOID 30004 /* Return void, not error */ @@ -189,6 +231,8 @@ #define NFSX_V4SPECDATA (2 * NFSX_UNSIGNED) #define NFSX_V4TIME (NFSX_HYPER + NFSX_UNSIGNED) #define NFSX_V4SETTIME (NFSX_UNSIGNED + NFSX_V4TIME) +#define NFSX_V4SESSIONID 16 +#define NFSX_V4DEVICEID 16 /* sizes common to multiple NFS versions */ #define NFSX_FHMAX (NFSX_V4FHMAX) @@ -258,6 +302,27 @@ * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 + +/* Additional procedures for NFSv4.1. */ +#define NFSPROC_EXCHANGEID 41 +#define NFSPROC_CREATESESSION 42 +#define NFSPROC_DESTROYSESSION 43 +#define NFSPROC_DESTROYCLIENT 44 +#define NFSPROC_FREESTATEID 45 +#define NFSPROC_LAYOUTGET 46 +#define NFSPROC_GETDEVICEINFO 47 +#define NFSPROC_LAYOUTCOMMIT 48 +#define NFSPROC_LAYOUTRETURN 49 +#define NFSPROC_RECLAIMCOMPL 50 +#define NFSPROC_WRITEDS 51 +#define NFSPROC_READDS 52 +#define NFSPROC_COMMITDS 53 + +/* + * Must be defined as one higher than the last NFSv4.1 Proc# above. + */ +#define NFSV41_NPROCS 54 + #endif /* NFS_V3NPROCS */ /* @@ -269,10 +334,10 @@ /* * NFSPROC_NOOP is a fake op# that can't be the same as any V2/3/4 Procedure - * or Operation#. Since the NFS V4 Op #s go higher, use NFSV4OP_NOPS, which + * or Operation#. Since the NFS V4 Op #s go higher, use NFSV41_NOPS, which * is one greater than the highest Op#. */ -#define NFSPROC_NOOP NFSV4OP_NOPS +#define NFSPROC_NOOP NFSV41_NOPS /* Actual Version 2 procedure numbers */ #define NFSV2PROC_NULL 0 @@ -406,6 +471,7 @@ #define NFSSTATEID_PUTALLZERO 0 #define NFSSTATEID_PUTALLONE 1 #define NFSSTATEID_PUTSTATEID 2 +#define NFSSTATEID_PUTSEQIDZERO 3 /* * Bits for share access and deny. @@ -462,12 +528,70 @@ #define NFSCREATE_UNCHECKED 0 #define NFSCREATE_GUARDED 1 #define NFSCREATE_EXCLUSIVE 2 +#define NFSCREATE_EXCLUSIVE41 3 #define NFSV3FSINFO_LINK 0x01 #define NFSV3FSINFO_SYMLINK 0x02 #define NFSV3FSINFO_HOMOGENEOUS 0x08 #define NFSV3FSINFO_CANSETTIME 0x10 +/* Flags for Exchange ID */ +#define NFSV4EXCH_SUPPMOVEDREFER 0x00000001 +#define NFSV4EXCH_SUPPMOVEDMIGR 0x00000002 +#define NFSV4EXCH_BINDPRINCSTATEID 0x00000100 +#define NFSV4EXCH_USENONPNFS 0x00010000 +#define NFSV4EXCH_USEPNFSMDS 0x00020000 +#define NFSV4EXCH_USEPNFSDS 0x00040000 +#define NFSV4EXCH_MASKPNFS 0x00070000 +#define NFSV4EXCH_UPDCONFIRMEDRECA 0x40000000 +#define NFSV4EXCH_CONFIRMEDR 0x80000000 + +/* State Protects */ +#define NFSV4EXCH_SP4NONE 0 +#define NFSV4EXCH_SP4MACHCRED 1 +#define NFSV4EXCH_SP4SSV 2 + +/* Flags for Create Session */ +#define NFSV4CRSESS_PERSIST 0x00000001 +#define NFSV4CRSESS_CONNBACKCHAN 0x00000002 +#define NFSV4CRSESS_CONNRDMA 0x00000004 + +/* Flags for Sequence */ +#define NFSV4SEQ_CBPATHDOWN 0x00000001 +#define NFSV4SEQ_CBGSSCONTEXPIRING 0x00000002 +#define NFSV4SEQ_CBGSSCONTEXPIRED 0x00000004 +#define NFSV4SEQ_EXPIREDALLSTATEREVOKED 0x00000008 +#define NFSV4SEQ_EXPIREDSOMESTATEREVOKED 0x00000010 +#define NFSV4SEQ_ADMINSTATEREVOKED 0x00000020 +#define NFSV4SEQ_RECALLABLESTATEREVOKED 0x00000040 +#define NFSV4SEQ_LEASEMOVED 0x00000080 +#define NFSV4SEQ_RESTARTRECLAIMNEEDED 0x00000100 +#define NFSV4SEQ_CBPATHDOWNSESSION 0x00000200 +#define NFSV4SEQ_BACKCHANNELFAULT 0x00000400 +#define NFSV4SEQ_DEVIDCHANGED 0x00000800 +#define NFSV4SEQ_DEVIDDELETED 0x00001000 + +/* Flags for Layout. */ +#define NFSLAYOUTRETURN_FILE 1 +#define NFSLAYOUTRETURN_FSID 2 +#define NFSLAYOUTRETURN_ALL 3 + +#define NFSLAYOUT_NFSV4_1_FILES 0x1 +#define NFSLAYOUT_OSD2_OBJECTS 0x2 +#define NFSLAYOUT_BLOCK_VOLUME 0x3 + +#define NFSLAYOUTIOMODE_READ 1 +#define NFSLAYOUTIOMODE_RW 2 +#define NFSLAYOUTIOMODE_ANY 3 + +/* Flags for Get Device Info. */ +#define NFSDEVICEIDNOTIFY_CHANGEBIT 0x1 +#define NFSDEVICEIDNOTIFY_DELETEBIT 0x2 + +/* Flags for File Layout. */ +#define NFSFLAYUTIL_DENSE 0x1 +#define NFSFLAYUTIL_COMMIT_THRU_MDS 0x2 + /* Conversion macros */ #define vtonfsv2_mode(t,m) \ txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \ diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c index 26286c5a758a..dba3bc9328f1 100644 --- a/sys/fs/nfsclient/nfs_clbio.c +++ b/sys/fs/nfsclient/nfs_clbio.c @@ -1371,6 +1371,8 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg) goto out; error = vinvalbuf(vp, flags, 0, slptimeo); } + if (NFSHASPNFS(nmp)) + nfscl_layoutcommit(vp, td); mtx_lock(&np->n_mtx); if (np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; diff --git a/sys/fs/nfsclient/nfs_clcomsubs.c b/sys/fs/nfsclient/nfs_clcomsubs.c index b68e5bd07fd5..073c6cceea16 100644 --- a/sys/fs/nfsclient/nfs_clcomsubs.c +++ b/sys/fs/nfsclient/nfs_clcomsubs.c @@ -43,10 +43,11 @@ __FBSDID("$FreeBSD$"); #include extern struct nfsstats newnfsstats; -extern struct nfsv4_opflag nfsv4_opflag[NFSV4OP_NOPS]; +extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; extern int ncl_mbuf_mlen; extern enum vtype newnv2tov_type[8]; extern enum vtype nv34tov_type[8]; +extern int nfs_bigreply[NFSV41_NPROCS]; NFSCLSTATEMUTEX; #endif /* !APPLEKEXT */ @@ -56,7 +57,7 @@ static struct { int opcnt; const u_char *tag; int taglen; -} nfsv4_opmap[NFS_NPROCS] = { +} nfsv4_opmap[NFSV41_NPROCS] = { { 0, 1, "Null", 4 }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_SETATTR, 2, "Setattr", 7, }, @@ -98,15 +99,28 @@ static struct { { NFSV4OP_DELEGRETURN, 9, "DelegRename2", 12, }, { NFSV4OP_GETATTR, 1, "Getacl", 6, }, { NFSV4OP_SETATTR, 1, "Setacl", 6, }, + { NFSV4OP_EXCHANGEID, 1, "ExchangeID", 10, }, + { NFSV4OP_CREATESESSION, 1, "CreateSession", 13, }, + { NFSV4OP_DESTROYSESSION, 1, "DestroySession", 14, }, + { NFSV4OP_DESTROYCLIENTID, 1, "DestroyClient", 13, }, + { NFSV4OP_FREESTATEID, 1, "FreeStateID", 11, }, + { NFSV4OP_LAYOUTGET, 1, "LayoutGet", 9, }, + { NFSV4OP_GETDEVINFO, 1, "GetDeviceInfo", 13, }, + { NFSV4OP_LAYOUTCOMMIT, 1, "LayoutCommit", 12, }, + { NFSV4OP_LAYOUTRETURN, 1, "LayoutReturn", 12, }, + { NFSV4OP_RECLAIMCOMPL, 1, "ReclaimComplete", 15, }, + { NFSV4OP_WRITE, 1, "WriteDS", 7, }, + { NFSV4OP_READ, 1, "ReadDS", 6, }, + { NFSV4OP_COMMIT, 1, "CommitDS", 8, }, }; - /* * NFS RPCS that have large request message size. */ -static int nfs_bigrequest[NFS_NPROCS] = { +static int nfs_bigrequest[NFSV41_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0 }; /* @@ -115,7 +129,7 @@ static int nfs_bigrequest[NFS_NPROCS] = { */ APPLESTATIC void nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, - u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp) + u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp, struct nfsclsession *sep) { struct mbuf *mb; u_int32_t *tl; @@ -125,9 +139,12 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, /* * First, fill in some of the fields of nd. */ - if (NFSHASNFSV4(nmp)) + nd->nd_slotseq = NULL; + if (NFSHASNFSV4(nmp)) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; - else if (NFSHASNFSV3(nmp)) + if (NFSHASNFSV4N(nmp)) + nd->nd_flag |= ND_NFSV41; + } else if (NFSHASNFSV3(nmp)) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else nd->nd_flag = ND_NFSV2 | ND_NFSCL; @@ -151,33 +168,71 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, if (nd->nd_flag & ND_NFSV4) { opcnt = nfsv4_opmap[procnum].opcnt + nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh; + if ((nd->nd_flag & ND_NFSV41) != 0) { + opcnt += nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq; + if (procnum == NFSPROC_RENEW) + /* + * For the special case of Renew, just do a + * Sequence Op. + */ + opcnt = 1; + else if (procnum == NFSPROC_WRITEDS || + procnum == NFSPROC_COMMITDS) + /* + * For the special case of a Writeor Commit to + * a DS, the opcnt == 3, for Sequence, PutFH, + * Write/Commit. + */ + opcnt = 3; + } /* * What should the tag really be? */ (void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag, nfsv4_opmap[procnum].taglen); - NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); - *tl++ = txdr_unsigned(NFSV4_MINORVERSION); + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + if ((nd->nd_flag & ND_NFSV41) != 0) + *tl++ = txdr_unsigned(NFSV41_MINORVERSION); + else + *tl++ = txdr_unsigned(NFSV4_MINORVERSION); if (opcntpp != NULL) *opcntpp = tl; - *tl++ = txdr_unsigned(opcnt); + *tl = txdr_unsigned(opcnt); + if ((nd->nd_flag & ND_NFSV41) != 0 && + nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq > 0) { + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_SEQUENCE); + if (sep == NULL) + nfsv4_setsequence(nd, NFSMNT_MDSSESSION(nmp), + nfs_bigreply[procnum]); + else + nfsv4_setsequence(nd, sep, + nfs_bigreply[procnum]); + } if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh > 0) { + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, nfhp, fhlen, 0); - if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh==2){ + if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh + == 2 && procnum != NFSPROC_WRITEDS && + procnum != NFSPROC_COMMITDS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; } - NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); } - *tl = txdr_unsigned(nfsv4_opmap[procnum].op); + if (procnum != NFSPROC_RENEW || + (nd->nd_flag & ND_NFSV41) == 0) { + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(nfsv4_opmap[procnum].op); + } } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } - NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]); + if (procnum < NFSV4_NPROCS) + NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]); } #ifndef APPLE @@ -453,6 +508,11 @@ nfsm_stateidtom(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int flag) st->other[0] = 0xffffffff; st->other[1] = 0xffffffff; st->other[2] = 0xffffffff; + } else if (flag == NFSSTATEID_PUTSEQIDZERO) { + st->seqid = 0; + st->other[0] = stateidp->other[0]; + st->other[1] = stateidp->other[1]; + st->other[2] = stateidp->other[2]; } else { st->seqid = stateidp->seqid; st->other[0] = stateidp->other[0]; diff --git a/sys/fs/nfsclient/nfs_clkdtrace.c b/sys/fs/nfsclient/nfs_clkdtrace.c index c7db3a4913e5..cf3d8b09f10f 100644 --- a/sys/fs/nfsclient/nfs_clkdtrace.c +++ b/sys/fs/nfsclient/nfs_clkdtrace.c @@ -92,7 +92,7 @@ struct dtnfsclient_rpc { * This table is indexed by NFSv3 procedure number, but also used for NFSv2 * procedure names and NFSv4 operations. */ -static struct dtnfsclient_rpc dtnfsclient_rpcs[NFS_NPROCS + 1] = { +static struct dtnfsclient_rpc dtnfsclient_rpcs[NFSV41_NPROCS + 1] = { { "null", "null", "null" }, { "getattr", "getattr", "getattr" }, { "setattr", "setattr", "setattr" }, @@ -196,17 +196,17 @@ extern uint32_t nfscl_attrcache_load_done_id; * stored in one of these two NFS client-allocated arrays; 0 indicates that * the event is not being traced so probes should not be called. * - * For simplicity, we allocate both v2, v3 and v4 arrays as NFS_NPROCS + 1, and - * the v2, v3 arrays are simply sparse. + * For simplicity, we allocate both v2, v3 and v4 arrays as NFSV41_NPROCS + 1, + * and the v2, v3 arrays are simply sparse. */ -extern uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; -extern uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; +extern uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; -extern uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; -extern uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; +extern uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; -extern uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; -extern uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; +extern uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; /* * Look up a DTrace probe ID to see if it's associated with a "done" event -- @@ -217,7 +217,7 @@ dtnfs234_isdoneprobe(dtrace_id_t id) { int i; - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v4_id_done == id || dtnfsclient_rpcs[i].nr_v3_id_done == id || dtnfsclient_rpcs[i].nr_v2_id_done == id) @@ -401,7 +401,7 @@ dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc) * Register NFSv2 RPC procedures; note sparseness check for each slot * in the NFSv3, NFSv4 procnum-indexed array. */ - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v2_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_start_str) == @@ -430,7 +430,7 @@ dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc) * Register NFSv3 RPC procedures; note sparseness check for each slot * in the NFSv4 procnum-indexed array. */ - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v3_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_start_str) == @@ -458,7 +458,7 @@ dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc) /* * Register NFSv4 RPC procedures. */ - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_start_str) == 0) { diff --git a/sys/fs/nfsclient/nfs_clkrpc.c b/sys/fs/nfsclient/nfs_clkrpc.c index fceb36d173ea..71889fc985d9 100644 --- a/sys/fs/nfsclient/nfs_clkrpc.c +++ b/sys/fs/nfsclient/nfs_clkrpc.c @@ -45,12 +45,13 @@ __FBSDID("$FreeBSD$"); NFSDLOCKMUTEX; -SVCPOOL *nfscbd_pool; +extern SVCPOOL *nfscbd_pool; static int nfs_cbproc(struct nfsrv_descript *, u_int32_t); extern u_long sb_max_adj; extern int nfs_numnfscbd; +extern int nfscl_debuglevel; /* * NFS client system calls for handling callbacks. @@ -90,6 +91,7 @@ nfscb_program(struct svc_req *rqst, SVCXPRT *xprt) nd.nd_mreq = NULL; nd.nd_cred = NULL; + NFSCL_DEBUG(1, "cbproc=%d\n",nd.nd_procnum); if (nd.nd_procnum != NFSPROC_NULL) { if (!svc_getcred(rqst, &nd.nd_cred, &credflavor)) { svcerr_weakauth(rqst); @@ -133,9 +135,10 @@ nfscb_program(struct svc_req *rqst, SVCXPRT *xprt) svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR); if (nd.nd_mreq != NULL) m_freem(nd.nd_mreq); - } else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) { + } else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) svcerr_systemerr(rqst); - } + else + NFSCL_DEBUG(1, "cbrep sent\n"); svc_freereq(rqst); } @@ -271,13 +274,15 @@ nfsrvd_cbinit(int terminating) NFSD_LOCK_ASSERT(); if (terminating) { + /* Wait for any xprt registrations to complete. */ + while (nfs_numnfscbd > 0) + msleep(&nfs_numnfscbd, NFSDLOCKMUTEXPTR, PZERO, + "nfscbdt", 0); NFSD_UNLOCK(); svcpool_destroy(nfscbd_pool); nfscbd_pool = NULL; - NFSD_LOCK(); - } - - NFSD_UNLOCK(); + } else + NFSD_UNLOCK(); nfscbd_pool = svcpool_create("nfscbd", NULL); nfscbd_pool->sp_rcache = NULL; diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index fe28975c919d..145ff63b4e6d 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -853,7 +853,7 @@ nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p, else vers = NFS_VER2; ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, - NFS_PROG, vers, NULL, 1, NULL); + NFS_PROG, vers, NULL, 1, NULL, NULL); return (ret); } @@ -1112,10 +1112,15 @@ nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) "No name and/or group mapping for uid,gid:(%d,%d)\n", uid, gid); return (EPERM); + case NFSERR_BADNAME: + case NFSERR_BADCHAR: + printf("nfsv4 char/name not handled by server\n"); + return (ENOENT); case NFSERR_STALECLIENTID: case NFSERR_STALESTATEID: case NFSERR_EXPIRED: case NFSERR_BADSTATEID: + case NFSERR_BADSESSION: printf("nfsv4 recover err returned %d\n", error); return (EIO); case NFSERR_BADHANDLE: @@ -1131,8 +1136,6 @@ nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) case NFSERR_LEASEMOVED: case NFSERR_RECLAIMBAD: case NFSERR_BADXDR: - case NFSERR_BADCHAR: - case NFSERR_BADNAME: case NFSERR_OPILLEGAL: printf("nfsv4 client/server protocol prob err=%d\n", error); diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 7da93cf6c6a2..be0476a41273 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -67,6 +67,19 @@ int nfstest_openallsetattr = 0; #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) +/* + * nfscl_getsameserver() can return one of three values: + * NFSDSP_USETHISSESSION - Use this session for the DS. + * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new + * session. + * NFSDSP_NOTFOUND - No matching server was found. + */ +enum nfsclds_state { + NFSDSP_USETHISSESSION = 0, + NFSDSP_SEQTHISSESSION = 1, + NFSDSP_NOTFOUND = 2, +}; + static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *, @@ -86,6 +99,27 @@ static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *, u_int32_t, struct ucred *, NFSPROC_T *, int); static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *, struct acl *, nfsv4stateid_t *, void *); +static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int, + uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **, + struct ucred *, NFSPROC_T *); +static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *, + struct nfsclds **, NFSPROC_T *); +static void nfscl_initsessionslots(struct nfsclsession *); +static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *, + nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, + struct nfsclflayout *, uint64_t, uint64_t, struct ucred *, NFSPROC_T *); +static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *, + struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *, + NFSPROC_T *); +static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *, + nfsv4stateid_t *, struct nfsclds *, uint64_t, int, + struct nfsfh *, int, struct ucred *, NFSPROC_T *); +static enum nfsclds_state nfscl_getsameserver(struct nfsmount *, + struct nfsclds *, struct nfsclds **); +#ifdef notyet +static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *, + struct nfsfh *, struct ucred *, NFSPROC_T *, void *); +#endif /* * nfs null call from vfs. @@ -308,7 +342,8 @@ else printf(" fhl=0\n"); op->nfso_opencnt++; nfscl_openrelease(op, error, newone); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || - error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) { + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -317,6 +352,7 @@ else printf(" fhl=0\n"); } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) @@ -344,13 +380,13 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, dp = *dpp; *dpp = NULL; - nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL); + nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); - *tl++ = op->nfso_own->nfsow_clp->nfsc_clientid.lval[0]; - *tl = op->nfso_own->nfsow_clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); @@ -362,7 +398,10 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, if (dp != NULL) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = dp->nfsdl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; @@ -380,7 +419,7 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); @@ -501,14 +540,15 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, if (ndp != NULL) FREE((caddr_t)ndp, M_NFSCLDELEG); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) error = ret; } } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; - if (error == NFSERR_STALECLIENTID) + if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: if (!error) @@ -532,7 +572,10 @@ nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op, NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); - *tl++ = op->nfso_stateid.seqid; + if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp)))) + *tl++ = 0; + else + *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; @@ -552,7 +595,7 @@ nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op, } if (nd->nd_repstat && error == 0) error = nd->nd_repstat; - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -690,24 +733,27 @@ nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp, int error; nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh, - op->nfso_fhlen, NULL); + op->nfso_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); - *tl++ = op->nfso_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl = op->nfso_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat == 0) NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); error = nd->nd_repstat; - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -723,10 +769,13 @@ nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsmount *nmp; int error; - nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, VFSTONFS(vnode_mount(vp)), - nfhp, fhlen, NULL); + nmp = VFSTONFS(vnode_mount(vp)); + if (NFSHASNFSV4N(nmp)) + return (0); /* No confirmation for NFSv4.1. */ + nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; @@ -745,7 +794,7 @@ nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, op->nfso_stateid.other[2] = *tl; } error = nd->nd_repstat; - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -757,7 +806,7 @@ nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, * when a mount has just occurred and when the server replies NFSERR_EXPIRED. */ APPLESTATIC int -nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, +nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; @@ -770,13 +819,58 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, nfsquad_t confirm; u_int32_t lease; static u_int32_t rev = 0; + struct nfsclds *dsp, *ndsp, *tdsp; if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); - nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL); + clp->nfsc_rev = rev++; + if (NFSHASNFSV4N(nmp)) { + error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, + NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p); + NFSCL_DEBUG(1, "aft exch=%d\n", error); + if (error == 0) { + error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, + &nmp->nm_sockreq, + dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p); + if (error == 0) { + NFSLOCKMNT(nmp); + TAILQ_FOREACH_SAFE(tdsp, &nmp->nm_sess, + nfsclds_list, ndsp) + nfscl_freenfsclds(tdsp); + TAILQ_INIT(&nmp->nm_sess); + TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, + nfsclds_list); + NFSUNLOCKMNT(nmp); + } else + nfscl_freenfsclds(dsp); + NFSCL_DEBUG(1, "aft createsess=%d\n", error); + } + if (error == 0 && reclaim == 0) { + error = nfsrpc_reclaimcomplete(nmp, cred, p); + NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error); + if (error == NFSERR_COMPLETEALREADY || + error == NFSERR_NOTSUPP) + /* Ignore this error. */ + error = 0; + } + return (error); + } + + /* + * Allocate a single session structure for NFSv4.0, because some of + * the fields are used by NFSv4.0 although it doesn't do a session. + */ + dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); + mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); + mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); + NFSLOCKMNT(nmp); + TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); + NFSUNLOCKMNT(nmp); + + nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); - *tl = txdr_unsigned(rev++); + *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); /* @@ -827,13 +921,13 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, *tl = txdr_unsigned(clp->nfsc_cbident); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); - clp->nfsc_clientid.lval[0] = *tl++; - clp->nfsc_clientid.lval[1] = *tl++; + NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0] = *tl++; + NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; mbuf_freem(nd->nd_mrep); @@ -842,28 +936,29 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, /* * and confirm it. */ - nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL); + nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL, + NULL); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); - *tl++ = clp->nfsc_clientid.lval[0]; - *tl++ = clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, - cred, NFS_PROG, NFS_VER4, NULL, 1, NULL); + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; if (nd->nd_repstat == 0) { nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh, - nmp->nm_fhsize, NULL); + nmp->nm_fhsize, NULL, NULL); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, - cred, NFS_PROG, NFS_VER4, NULL, 1, NULL); + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { @@ -917,16 +1012,18 @@ nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p, */ APPLESTATIC int nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, - struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp) + struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp, + uint32_t *leasep) { struct nfsrv_descript nfsd, *nd = &nfsd; int error, vers = NFS_VER2; nfsattrbit_t attrbits; - nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL); + nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL); if (nd->nd_flag & ND_NFSV4) { vers = NFS_VER4; NFSGETATTR_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); } else if (nd->nd_flag & ND_NFSV3) { vers = NFS_VER3; @@ -934,12 +1031,17 @@ nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, vers, NULL, 1, xidp); + NFS_PROG, vers, NULL, 1, xidp, NULL); if (error) return (error); - if (!nd->nd_repstat) - error = nfsm_loadattr(nd, nap); - else + if (nd->nd_repstat == 0) { + if ((nd->nd_flag & ND_NFSV4) != 0) + error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, + NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL, + NULL, NULL); + else + error = nfsm_loadattr(nd, nap); + } else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); @@ -973,7 +1075,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, if (NFSHASNFSV4(nmp)) { nfhp = VTONFS(vp)->n_fhp; error = nfscl_getstateid(vp, nfhp->nfh_fh, - nfhp->nfh_len, mode, cred, p, &stateid, &lckp); + nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); if (error && vnode_vtype(vp) == VREG && (mode == NFSV4OPEN_ACCESSWRITE || nfstest_openallsetattr)) { @@ -990,7 +1092,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, if (!openerr) (void) nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, - mode, cred, p, &stateid, &lckp); + mode, 0, cred, p, &stateid, &lckp); } } if (vap != NULL) @@ -999,7 +1101,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, else error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid, stuff); - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); @@ -1007,7 +1109,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, (void) nfsrpc_close(vp, 0, p); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || - error == NFSERR_OLDSTATEID) { + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_setattr"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -1016,6 +1118,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); @@ -1242,16 +1345,17 @@ nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, lckp = NULL; if (NFSHASNFSV4(nmp)) (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, - NFSV4OPEN_ACCESSREAD, newcred, p, &stateid, &lckp); + NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid, + &lckp); error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap, attrflagp, stuff); - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || - error == NFSERR_OLDSTATEID) { + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_read"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -1260,6 +1364,7 @@ nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); @@ -1395,7 +1500,8 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, nostateid = 0; if (NFSHASNFSV4(nmp)) { (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, - NFSV4OPEN_ACCESSWRITE, newcred, p, &stateid, &lckp); + NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid, + &lckp); if (stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { nostateid = 1; @@ -1413,13 +1519,13 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, else error = nfsrpc_writerpc(vp, uiop, iomode, must_commit, newcred, &stateid, p, nap, attrflagp, stuff); - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || - error == NFSERR_OLDSTATEID) { + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_write"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -1427,13 +1533,13 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_DELAY || - ((error == NFSERR_STALESTATEID || + ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error != 0 && (retrycnt >= 4 || - ((error == NFSERR_STALESTATEID || + ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0))) error = EIO; if (NFSHASNFSV4(nmp)) @@ -1747,7 +1853,8 @@ nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp); nfscl_ownerrelease(owp, error, newone, unlocked); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || - error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) { + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -1756,6 +1863,7 @@ nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) @@ -1836,7 +1944,9 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsattrbit_t attrbits; nfsv4stateid_t stateid; u_int32_t rflags; + struct nfsmount *nmp; + nmp = VFSTONFS(dvp->v_mount); *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; @@ -1853,16 +1963,32 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); - *tl++ = owp->nfsow_clp->nfsc_clientid.lval[0]; - *tl = owp->nfsow_clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if (fmode & O_EXCL) { - *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); - NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); - *tl++ = cverf.lval[0]; - *tl = cverf.lval[1]; + if (NFSHASNFSV4N(nmp)) { + if (NFSHASSESSPERSIST(nmp)) { + /* Use GUARDED for persistent sessions. */ + *tl = txdr_unsigned(NFSCREATE_GUARDED); + nfscl_fillsattr(nd, vap, dvp, 0, 0); + } else { + /* Otherwise, use EXCLUSIVE4_1. */ + *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); + NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); + *tl++ = cverf.lval[0]; + *tl = cverf.lval[1]; + nfscl_fillsattr(nd, vap, dvp, 0, 0); + } + } else { + /* NFSv4.0 */ + *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); + NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); + *tl++ = cverf.lval[0]; + *tl = cverf.lval[1]; + } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); @@ -2009,7 +2135,8 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, if (dp != NULL) FREE((caddr_t)dp, M_NFSCLDELEG); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) error = ret; } } @@ -2018,7 +2145,7 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; - if (error == NFSERR_STALECLIENTID) + if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: if (!error) @@ -2055,7 +2182,10 @@ nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp, NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); - *tl++ = dstateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = dstateid.seqid; *tl++ = dstateid.other[0]; *tl++ = dstateid.other[1]; *tl++ = dstateid.other[2]; @@ -2138,7 +2268,10 @@ nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen, } if (gotfd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = fdstateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = fdstateid.seqid; *tl++ = fdstateid.other[0]; *tl++ = fdstateid.other[1]; *tl = fdstateid.other[2]; @@ -2154,7 +2287,10 @@ nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen, } if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = tdstateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = tdstateid.seqid; *tl++ = tdstateid.other[0]; *tl++ = tdstateid.other[1]; *tl = tdstateid.other[2]; @@ -3421,13 +3557,13 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, */ APPLESTATIC int nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, - NFSPROC_T *p, u_char *verfp, struct nfsvattr *nap, int *attrflagp, - void *stuff) + NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp); @@ -3450,7 +3586,12 @@ nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff); if (!error && !nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); - NFSBCOPY((caddr_t)tl, verfp, NFSX_VERF); + NFSLOCKMNT(nmp); + if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + nd->nd_repstat = NFSERR_STALEWRITEVERF; + } + NFSUNLOCKMNT(nmp); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } @@ -3516,7 +3657,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, do { nd->nd_repstat = 0; if (op == F_GETLK) { - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags); @@ -3533,7 +3674,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, * We must loop around for all lockowner cases. */ callcnt = 0; - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); do { @@ -3610,7 +3751,8 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, error = nd->nd_repstat; if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || - error == NFSERR_STALECLIENTID || error == NFSERR_DELAY) { + error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || + error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_advlock"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -3620,6 +3762,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID || + error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) @@ -3639,7 +3782,9 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, int error, type, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsnode *np; + struct nfsmount *nmp; + nmp = VFSTONFS(vp->v_mount); NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (fl->l_type == F_RDLCK) @@ -3650,8 +3795,8 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, tl += 2; txdr_hyper(len, tl); tl += 2; - *tl++ = clp->nfsc_clientid.lval[0]; - *tl = clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; nfscl_filllockowner(id, own, flags); np = VTONFS(vp); NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN], @@ -3691,7 +3836,8 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); - } else if (nd->nd_repstat == NFSERR_STALECLIENTID) + } else if (nd->nd_repstat == NFSERR_STALECLIENTID || + nd->nd_repstat == NFSERR_BADSESSION) nfscl_initiate_recovery(clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -3710,7 +3856,7 @@ nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, int error; nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh, - lp->nfsl_open->nfso_fhlen, NULL); + lp->nfsl_open->nfso_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(type); *tl = txdr_unsigned(lp->nfsl_seqid); @@ -3718,7 +3864,10 @@ nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); tl++; - *tl++ = lp->nfsl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; @@ -3728,7 +3877,7 @@ nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (error) return (error); @@ -3738,7 +3887,8 @@ nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; - } else if (nd->nd_repstat == NFSERR_STALESTATEID) + } else if (nd->nd_repstat == NFSERR_STALESTATEID || + nd->nd_repstat == NFSERR_BADSESSION) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -3758,7 +3908,7 @@ nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, int error, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; - nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL); + nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); @@ -3774,20 +3924,26 @@ nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 2 * NFSX_UNSIGNED + NFSX_HYPER); *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid); - *tl++ = lp->nfsl_open->nfso_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = lp->nfsl_open->nfso_stateid.seqid; *tl++ = lp->nfsl_open->nfso_stateid.other[0]; *tl++ = lp->nfsl_open->nfso_stateid.other[1]; *tl++ = lp->nfsl_open->nfso_stateid.other[2]; *tl++ = txdr_unsigned(lp->nfsl_seqid); - *tl++ = lp->nfsl_open->nfso_own->nfsow_clp->nfsc_clientid.lval[0]; - *tl = lp->nfsl_open->nfso_own->nfsow_clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } else { *tl = newnfs_false; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); - *tl++ = lp->nfsl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; @@ -3799,7 +3955,7 @@ nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (newone) @@ -3818,7 +3974,8 @@ nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); - } else if (nd->nd_repstat == NFSERR_STALESTATEID) + } else if (nd->nd_repstat == NFSERR_STALESTATEID || + nd->nd_repstat == NFSERR_BADSESSION) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -4009,24 +4166,34 @@ nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred, * This function performs the Renew RPC. */ APPLESTATIC int -nfsrpc_renew(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) +nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred, + NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsmount *nmp; int error; + struct nfssockreq *nrp; nmp = clp->nfsc_nmp; if (nmp == NULL) return (0); - nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL); - NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); - *tl++ = clp->nfsc_clientid.lval[0]; - *tl = clp->nfsc_clientid.lval[1]; + nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, + &dsp->nfsclds_sess); + if (!NFSHASNFSV4N(nmp)) { + /* NFSv4.1 just uses a Sequence Op and not a Renew. */ + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; + } + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; nd->nd_flag |= ND_USEGSSNAME; - error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error) return (error); error = nd->nd_repstat; @@ -4046,16 +4213,24 @@ nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp, int error; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; - nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL); - NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); - *tl++ = nmp->nm_clp->nfsc_clientid.lval[0]; - *tl = nmp->nm_clp->nfsc_clientid.lval[1]; - NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); - NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); - (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); + if (NFSHASNFSV4N(nmp)) { + /* For NFSv4.1, do a FreeStateID. */ + nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL, + NULL); + nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID); + } else { + nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL, + NULL); + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; + NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); + NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); + (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); + } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; @@ -4077,7 +4252,7 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, int error, cnt, len, setnil; u_int32_t *opcntp; - nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp); + nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL); cp = dirpath; cnt = 0; do { @@ -4101,12 +4276,16 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, *cp2++ = '/'; cp = cp2; } while (*cp != '\0'); - *opcntp = txdr_unsigned(2 + cnt); + if (NFSHASNFSV4N(nmp)) + /* Has a Sequence Op done by nfscl_reqstart(). */ + *opcntp = txdr_unsigned(3 + cnt); + else + *opcntp = txdr_unsigned(2 + cnt); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETFH); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { @@ -4140,16 +4319,19 @@ nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred, int error; nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh, - dp->nfsdl_fhlen, NULL); + dp->nfsdl_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = dp->nfsdl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; @@ -4230,3 +4412,1466 @@ nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, mbuf_freem(nd->nd_mrep); return (nd->nd_repstat); } + +/* + * Do the NFSv4.1 Exchange ID. + */ +int +nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, + struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl, v41flags; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct nfsclds *dsp; + struct timespec verstime; + int error, len; + + *dspp = NULL; + nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* Client owner */ + *tl = txdr_unsigned(clp->nfsc_rev); + (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); + + NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(exchflags); + *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); + + /* Set the implementation id4 */ + *tl = txdr_unsigned(1); + (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); + (void) nfsm_strtom(nd, version, strlen(version)); + NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); + verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ + verstime.tv_nsec = 0; + txdr_nfsv4time(&verstime, tl); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error, + (int)nd->nd_repstat); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER); + len = fxdr_unsigned(int, *(tl + 7)); + if (len < 0 || len > NFSV4_OPAQUELIMIT) { + error = NFSERR_BADXDR; + goto nfsmout; + } + dsp = malloc(sizeof(struct nfsclds) + len, M_NFSCLDS, + M_WAITOK | M_ZERO); + dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; + dsp->nfsclds_servownlen = len; + dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++; + dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++; + dsp->nfsclds_sess.nfsess_sequenceid = + fxdr_unsigned(uint32_t, *tl++); + v41flags = fxdr_unsigned(uint32_t, *tl); + if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 && + NFSHASPNFSOPT(nmp)) { + NFSCL_DEBUG(1, "set PNFS\n"); + NFSLOCKMNT(nmp); + nmp->nm_state |= NFSSTA_PNFS; + NFSUNLOCKMNT(nmp); + dsp->nfsclds_flags |= NFSCLDS_MDS; + } + if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0) + dsp->nfsclds_flags |= NFSCLDS_DS; + if (len > 0) + nd->nd_repstat = nfsrv_mtostr(nd, + dsp->nfsclds_serverown, len); + if (nd->nd_repstat == 0) { + mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); + mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", + NULL, MTX_DEF); + nfscl_initsessionslots(&dsp->nfsclds_sess); + *dspp = dsp; + } else + free(dsp, M_NFSCLDS); + } + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Create Session. + */ +int +nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, + struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, + NFSPROC_T *p) +{ + uint32_t crflags, *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error, irdcnt; + + nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); + *tl++ = sep->nfsess_clientid.lval[0]; + *tl++ = sep->nfsess_clientid.lval[1]; + *tl++ = txdr_unsigned(sequenceid); + crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST); + if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) + crflags |= NFSV4CRSESS_CONNBACKCHAN; + *tl = txdr_unsigned(crflags); + + /* Fill in fore channel attributes. */ + NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); + *tl++ = 0; /* Header pad size */ + *tl++ = txdr_unsigned(100000); /* Max request size */ + *tl++ = txdr_unsigned(100000); /* Max response size */ + *tl++ = txdr_unsigned(4096); /* Max response size cached */ + *tl++ = txdr_unsigned(20); /* Max operations */ + *tl++ = txdr_unsigned(64); /* Max slots */ + *tl = 0; /* No rdma ird */ + + /* Fill in back channel attributes. */ + NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); + *tl++ = 0; /* Header pad size */ + *tl++ = txdr_unsigned(10000); /* Max request size */ + *tl++ = txdr_unsigned(10000); /* Max response size */ + *tl++ = txdr_unsigned(4096); /* Max response size cached */ + *tl++ = txdr_unsigned(4); /* Max operations */ + *tl++ = txdr_unsigned(NFSV4_CBSLOTS); /* Max slots */ + *tl = 0; /* No rdma ird */ + + NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */ + + /* Allow AUTH_SYS callbacks as uid, gid == 0. */ + *tl++ = txdr_unsigned(1); /* Auth_sys only */ + *tl++ = txdr_unsigned(AUTH_SYS); /* AUTH_SYS type */ + *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */ + *tl++ = 0; /* Null machine name */ + *tl++ = 0; /* Uid == 0 */ + *tl++ = 0; /* Gid == 0 */ + *tl = 0; /* No additional gids */ + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, + NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + 2 * NFSX_UNSIGNED); + bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); + crflags = fxdr_unsigned(uint32_t, *tl); + if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) { + NFSLOCKMNT(nmp); + nmp->nm_state |= NFSSTA_SESSPERSIST; + NFSUNLOCKMNT(nmp); + } + + /* Get the fore channel slot count. */ + NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); + tl += 3; /* Skip the other counts. */ + sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); + tl++; + sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); + NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots); + irdcnt = fxdr_unsigned(int, *tl); + if (irdcnt > 0) + NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED); + + /* and the back channel slot count. */ + NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); + tl += 5; + sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl); + NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots); + } + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Destroy Session. + */ +int +nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error; + + nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); + bcopy(NFSMNT_MDSSESSION(nmp)->nfsess_sessionid, tl, NFSX_V4SESSIONID); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Destroy Client. + */ +int +nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error; + + nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 LayoutGet. + */ +int +nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode, + uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen, + nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp, + struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsfh *nfhp; + struct nfsclflayout *flp, *prevflp, *tflp; + int cnt, error, gotiomode, fhcnt, nfhlen, i, j; + uint8_t *cp; + uint64_t retlen; + + flp = NULL; + gotiomode = -1; + nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + + NFSX_STATEID); + *tl++ = newnfs_false; /* Don't signal availability. */ + *tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES); + *tl++ = txdr_unsigned(iomode); + txdr_hyper(offset, tl); + tl += 2; + txdr_hyper(len, tl); + tl += 2; + txdr_hyper(minlen, tl); + tl += 2; + *tl++ = txdr_unsigned(stateidp->seqid); + NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid); + *tl++ = stateidp->other[0]; + *tl++ = stateidp->other[1]; + *tl++ = stateidp->other[2]; + *tl = txdr_unsigned(layoutlen); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID); + if (*tl++ != 0) + *retonclosep = 1; + else + *retonclosep = 0; + stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); + NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep, + (int)stateidp->seqid); + stateidp->other[0] = *tl++; + stateidp->other[1] = *tl++; + stateidp->other[2] = *tl++; + cnt = fxdr_unsigned(int, *tl); + NFSCL_DEBUG(4, "layg cnt=%d\n", cnt); + if (cnt <= 0 || cnt > 10000) { + /* Don't accept more than 10000 layouts in reply. */ + error = NFSERR_BADXDR; + goto nfsmout; + } + for (i = 0; i < cnt; i++) { + /* Dissect all the way to the file handle cnt. */ + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER + + 6 * NFSX_UNSIGNED + NFSX_V4DEVICEID); + fhcnt = fxdr_unsigned(int, *(tl + 11 + + NFSX_V4DEVICEID / NFSX_UNSIGNED)); + NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); + if (fhcnt < 0 || fhcnt > 100) { + /* Don't accept more than 100 file handles. */ + error = NFSERR_BADXDR; + goto nfsmout; + } + if (fhcnt > 1) + flp = malloc(sizeof(*flp) + (fhcnt - 1) * + sizeof(struct nfsfh *), + M_NFSFLAYOUT, M_WAITOK); + else + flp = malloc(sizeof(*flp), + M_NFSFLAYOUT, M_WAITOK); + flp->nfsfl_flags = 0; + flp->nfsfl_fhcnt = 0; + flp->nfsfl_devp = NULL; + flp->nfsfl_off = fxdr_hyper(tl); tl += 2; + retlen = fxdr_hyper(tl); tl += 2; + if (flp->nfsfl_off + retlen < flp->nfsfl_off) + flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; + else + flp->nfsfl_end = flp->nfsfl_off + retlen; + flp->nfsfl_iomode = fxdr_unsigned(int, *tl++); + if (gotiomode == -1) + gotiomode = flp->nfsfl_iomode; + NFSCL_DEBUG(4, "layg reqiom=%d retiom=%d\n", iomode, + (int)flp->nfsfl_iomode); + if (fxdr_unsigned(int, *tl++) != + NFSLAYOUT_NFSV4_1_FILES) { + printf("NFSv4.1: got non-files layout\n"); + error = NFSERR_BADXDR; + goto nfsmout; + } + NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++); + NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util); + flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++); + flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2; + if (fxdr_unsigned(int, *tl) != fhcnt) { + printf("EEK! bad fhcnt\n"); + error = NFSERR_BADXDR; + goto nfsmout; + } + for (j = 0; j < fhcnt; j++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + nfhlen = fxdr_unsigned(int, *tl); + if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { + error = NFSERR_BADXDR; + goto nfsmout; + } + nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, + M_NFSFH, M_WAITOK); + flp->nfsfl_fh[j] = nfhp; + flp->nfsfl_fhcnt++; + nfhp->nfh_len = nfhlen; + NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); + NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); + } + if (flp->nfsfl_iomode == gotiomode) { + /* Keep the list in increasing offset order. */ + tflp = LIST_FIRST(flhp); + prevflp = NULL; + while (tflp != NULL && + tflp->nfsfl_off < flp->nfsfl_off) { + prevflp = tflp; + tflp = LIST_NEXT(tflp, nfsfl_list); + } + if (prevflp == NULL) + LIST_INSERT_HEAD(flhp, flp, nfsfl_list); + else + LIST_INSERT_AFTER(prevflp, flp, + nfsfl_list); + } else { + printf("nfscl_layoutget(): got wrong iomode\n"); + nfscl_freeflayout(flp); + } + flp = NULL; + } + } + if (nd->nd_repstat != 0 && error == 0) + error = nd->nd_repstat; +nfsmout: + if (error != 0 && flp != NULL) + nfscl_freeflayout(flp); + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Get Device Info. + */ +int +nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, + uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred, + NFSPROC_T *p) +{ + uint32_t cnt, *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct sockaddr_storage ss; + struct nfsclds *dsp = NULL, **dspp; + struct nfscldevinfo *ndi; + int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt; + uint8_t stripeindex; + + *ndip = NULL; + ndi = NULL; + nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED); + NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + *tl++ = txdr_unsigned(layouttype); + *tl++ = txdr_unsigned(100000); + if (notifybitsp != NULL && *notifybitsp != 0) { + *tl = txdr_unsigned(1); /* One word of bits. */ + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(*notifybitsp); + } else + *tl = txdr_unsigned(0); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED); + if (layouttype != fxdr_unsigned(int, *tl++)) + printf("EEK! devinfo layout type not same!\n"); + stripecnt = fxdr_unsigned(int, *++tl); + NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt); + if (stripecnt < 1 || stripecnt > 4096) { + printf("NFS devinfo stripecnt %d: out of range\n", + stripecnt); + error = NFSERR_BADXDR; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED); + addrcnt = fxdr_unsigned(int, *(tl + stripecnt)); + NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt); + if (addrcnt < 1 || addrcnt > 128) { + printf("NFS devinfo addrcnt %d: out of range\n", + addrcnt); + error = NFSERR_BADXDR; + goto nfsmout; + } + + /* + * Now we know how many stripe indices and addresses, so + * we can allocate the structure the correct size. + */ + i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *) + + 1; + NFSCL_DEBUG(4, "stripeindices=%d\n", i); + ndi = malloc(sizeof(*ndi) + (addrcnt + i) * + sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); + NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); + ndi->nfsdi_refcnt = 0; + ndi->nfsdi_stripecnt = stripecnt; + ndi->nfsdi_addrcnt = addrcnt; + /* Fill in the stripe indices. */ + for (i = 0; i < stripecnt; i++) { + stripeindex = fxdr_unsigned(uint8_t, *tl++); + NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex); + if (stripeindex >= addrcnt) { + printf("NFS devinfo stripeindex %d: too big\n", + (int)stripeindex); + error = NFSERR_BADXDR; + goto nfsmout; + } + nfsfldi_setstripeindex(ndi, i, stripeindex); + } + + /* Now, dissect the server address(es). */ + safilled = 0; + for (i = 0; i < addrcnt; i++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + cnt = fxdr_unsigned(uint32_t, *tl); + if (cnt == 0) { + printf("NFS devinfo 0 len addrlist\n"); + error = NFSERR_BADXDR; + goto nfsmout; + } + dspp = nfsfldi_addr(ndi, i); + pos = arc4random() % cnt; /* Choose one. */ + safilled = 0; + for (j = 0; j < cnt; j++) { + error = nfsv4_getipaddr(nd, &ss, &isudp); + if (error != 0 && error != EPERM) { + error = NFSERR_BADXDR; + goto nfsmout; + } + if (error == 0 && isudp == 0) { + /* + * The algorithm is: + * - use "pos" entry if it is of the + * same af_family or none of them + * is of the same af_family + * else + * - use the first one of the same + * af_family. + */ + if ((safilled == 0 && ss.ss_family == + nmp->nm_nam->sa_family) || + (j == pos && + (safilled == 0 || ss.ss_family == + nmp->nm_nam->sa_family)) || + (safilled == 1 && ss.ss_family == + nmp->nm_nam->sa_family)) { + error = nfsrpc_fillsa(nmp, &ss, + &dsp, p); + if (error == 0) { + *dspp = dsp; + if (ss.ss_family == + nmp->nm_nam->sa_family) + safilled = 2; + else + safilled = 1; + } + } + } + } + if (safilled == 0) + break; + } + + /* And the notify bits. */ + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (safilled != 0) { + bitcnt = fxdr_unsigned(int, *tl); + if (bitcnt > 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (notifybitsp != NULL) + *notifybitsp = + fxdr_unsigned(uint32_t, *tl); + } + *ndip = ndi; + } else + error = EPERM; + } + if (nd->nd_repstat != 0) + error = nd->nd_repstat; +nfsmout: + if (error != 0 && ndi != NULL) + nfscl_freedevinfo(ndi); + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 LayoutCommit. + */ +int +nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, + uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp, + int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred, + NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + int error, outcnt, i; + uint8_t *cp; + + nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER + + NFSX_STATEID); + txdr_hyper(off, tl); + tl += 2; + txdr_hyper(len, tl); + tl += 2; + if (reclaim != 0) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + *tl++ = txdr_unsigned(stateidp->seqid); + *tl++ = stateidp->other[0]; + *tl++ = stateidp->other[1]; + *tl++ = stateidp->other[2]; + *tl++ = newnfs_true; + if (lastbyte < off) + lastbyte = off; + else if (lastbyte >= (off + len)) + lastbyte = off + len - 1; + txdr_hyper(lastbyte, tl); + tl += 2; + *tl++ = newnfs_false; + *tl++ = txdr_unsigned(layouttype); + *tl = txdr_unsigned(layoutupdatecnt); + if (layoutupdatecnt > 0) { + KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES, + ("Must be nil for Files Layout")); + outcnt = NFSM_RNDUP(layoutupdatecnt); + NFSM_BUILD(cp, uint8_t *, outcnt); + NFSBCOPY(layp, cp, layoutupdatecnt); + cp += layoutupdatecnt; + for (i = 0; i < (outcnt - layoutupdatecnt); i++) + *cp++ = 0x0; + } + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 LayoutReturn. + */ +int +nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, + int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset, + uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp, + struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + int error, outcnt, i; + uint8_t *cp; + + nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); + if (reclaim != 0) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + *tl++ = txdr_unsigned(layouttype); + *tl++ = txdr_unsigned(iomode); + *tl = txdr_unsigned(layoutreturn); + if (layoutreturn == NFSLAYOUTRETURN_FILE) { + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + + NFSX_UNSIGNED); + txdr_hyper(offset, tl); + tl += 2; + txdr_hyper(len, tl); + tl += 2; + NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid); + *tl++ = txdr_unsigned(stateidp->seqid); + *tl++ = stateidp->other[0]; + *tl++ = stateidp->other[1]; + *tl++ = stateidp->other[2]; + *tl = txdr_unsigned(layoutcnt); + if (layoutcnt > 0) { + outcnt = NFSM_RNDUP(layoutcnt); + NFSM_BUILD(cp, uint8_t *, outcnt); + NFSBCOPY(layp, cp, layoutcnt); + cp += layoutcnt; + for (i = 0; i < (outcnt - layoutcnt); i++) + *cp++ = 0x0; + } + } + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (*tl != 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); + stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); + stateidp->other[0] = *tl++; + stateidp->other[1] = *tl++; + stateidp->other[2] = *tl; + } + } else + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Acquire a layout and devinfo, if possible. The caller must have acquired + * a reference count on the nfsclclient structure before calling this. + * Return the layout in lypp with a reference count on it, if successful. + */ +static int +nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp, + int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off, + struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) +{ + struct nfscllayout *lyp; + struct nfsclflayout *flp, *tflp; + struct nfscldevinfo *dip; + struct nfsclflayouthead flh; + int error = 0, islocked, layoutlen, recalled, retonclose; + nfsv4stateid_t stateid; + + *lypp = NULL; + /* + * If lyp is returned non-NULL, there will be a refcnt (shared lock) + * on it, iff flp != NULL or a lock (exclusive lock) on it iff + * flp == NULL. + */ + lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len, + off, &flp, &recalled); + islocked = 0; + if (lyp == NULL || flp == NULL) { + if (recalled != 0) + return (EIO); + LIST_INIT(&flh); + layoutlen = NFSMNT_MDSSESSION(nmp)->nfsess_maxcache - + (NFSX_STATEID + 3 * NFSX_UNSIGNED); + if (lyp == NULL) { + stateid.seqid = 0; + stateid.other[0] = stateidp->other[0]; + stateid.other[1] = stateidp->other[1]; + stateid.other[2] = stateidp->other[2]; + error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, + nfhp->nfh_len, iomode, (uint64_t)0, INT64_MAX, + (uint64_t)0, layoutlen, &stateid, &retonclose, + &flh, cred, p, NULL); + } else { + islocked = 1; + stateid.seqid = lyp->nfsly_stateid.seqid; + stateid.other[0] = lyp->nfsly_stateid.other[0]; + stateid.other[1] = lyp->nfsly_stateid.other[1]; + stateid.other[2] = lyp->nfsly_stateid.other[2]; + error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, + nfhp->nfh_len, iomode, off, INT64_MAX, + (uint64_t)0, layoutlen, &stateid, &retonclose, + &flh, cred, p, NULL); + } + if (error == 0) + LIST_FOREACH(tflp, &flh, nfsfl_list) { + error = nfscl_adddevinfo(nmp, NULL, tflp); + if (error != 0) { + error = nfsrpc_getdeviceinfo(nmp, + tflp->nfsfl_dev, + NFSLAYOUT_NFSV4_1_FILES, + notifybitsp, &dip, cred, p); + if (error != 0) + break; + error = nfscl_adddevinfo(nmp, dip, + tflp); + if (error != 0) + printf( + "getlayout: cannot add\n"); + } + } + if (error == 0) { + /* + * nfscl_layout() always returns with the nfsly_lock + * set to a refcnt (shared lock). + */ + error = nfscl_layout(nmp, vp, nfhp->nfh_fh, + nfhp->nfh_len, &stateid, retonclose, &flh, &lyp, + cred, p); + if (error == 0) + *lypp = lyp; + } else if (islocked != 0) + nfsv4_unlock(&lyp->nfsly_lock, 0); + } else + *lypp = lyp; + return (error); +} + +/* + * Do a TCP connection plus exchange id and create session. + * If successful, a "struct nfsclds" is linked into the list for the + * mount point and a pointer to it is returned. + */ +static int +nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp, + struct nfsclds **dspp, NFSPROC_T *p) +{ + struct sockaddr_in *msad, *sad, *ssd; + struct sockaddr_in6 *msad6, *sad6, *ssd6; + struct nfsclclient *clp; + struct nfssockreq *nrp; + struct nfsclds *dsp, *tdsp; + int error; + enum nfsclds_state retv; + uint32_t sequenceid; + + KASSERT(nmp->nm_sockreq.nr_cred != NULL, + ("nfsrpc_fillsa: NULL nr_cred")); + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + NFSUNLOCKCLSTATE(); + if (clp == NULL) + return (EPERM); + if (ssp->ss_family == AF_INET) { + ssd = (struct sockaddr_in *)ssp; + NFSLOCKMNT(nmp); + + /* + * Check to see if we already have a session for this + * address that is usable for a DS. + * Note that the MDS's address is in a different place + * than the sessions already acquired for DS's. + */ + msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam; + tdsp = TAILQ_FIRST(&nmp->nm_sess); + while (tdsp != NULL) { + if (msad != NULL && msad->sin_family == AF_INET && + ssd->sin_addr.s_addr == msad->sin_addr.s_addr && + ssd->sin_port == msad->sin_port && + (tdsp->nfsclds_flags & NFSCLDS_DS) != 0) { + *dspp = tdsp; + NFSUNLOCKMNT(nmp); + NFSCL_DEBUG(4, "fnd same addr\n"); + return (0); + } + tdsp = TAILQ_NEXT(tdsp, nfsclds_list); + if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) + msad = (struct sockaddr_in *) + tdsp->nfsclds_sockp->nr_nam; + else + msad = NULL; + } + NFSUNLOCKMNT(nmp); + + /* No IP address match, so look for new/trunked one. */ + sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO); + sad->sin_len = sizeof(*sad); + sad->sin_family = AF_INET; + sad->sin_port = ssd->sin_port; + sad->sin_addr.s_addr = ssd->sin_addr.s_addr; + nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); + nrp->nr_nam = (struct sockaddr *)sad; + } else if (ssp->ss_family == AF_INET6) { + ssd6 = (struct sockaddr_in6 *)ssp; + NFSLOCKMNT(nmp); + + /* + * Check to see if we already have a session for this + * address that is usable for a DS. + * Note that the MDS's address is in a different place + * than the sessions already acquired for DS's. + */ + msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam; + tdsp = TAILQ_FIRST(&nmp->nm_sess); + while (tdsp != NULL) { + if (msad6 != NULL && msad6->sin6_family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr, + &msad6->sin6_addr) && + ssd6->sin6_port == msad6->sin6_port && + (tdsp->nfsclds_flags & NFSCLDS_DS) != 0) { + *dspp = tdsp; + NFSUNLOCKMNT(nmp); + return (0); + } + tdsp = TAILQ_NEXT(tdsp, nfsclds_list); + if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) + msad6 = (struct sockaddr_in6 *) + tdsp->nfsclds_sockp->nr_nam; + else + msad6 = NULL; + } + NFSUNLOCKMNT(nmp); + + /* No IP address match, so look for new/trunked one. */ + sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO); + sad6->sin6_len = sizeof(*sad6); + sad6->sin6_family = AF_INET6; + sad6->sin6_port = ssd6->sin6_port; + NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr, + sizeof(struct in6_addr)); + nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); + nrp->nr_nam = (struct sockaddr *)sad6; + } else + return (EPERM); + + nrp->nr_sotype = SOCK_STREAM; + mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF); + nrp->nr_prog = NFS_PROG; + nrp->nr_vers = NFS_VER4; + + /* + * Use the credentials that were used for the mount, which are + * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc. + * Ref. counting the credentials with crhold() is probably not + * necessary, since nm_sockreq.nr_cred won't be crfree()'d until + * unmount, but I did it anyhow. + */ + nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred); + error = newnfs_connect(nmp, nrp, NULL, p, 0); + NFSCL_DEBUG(3, "DS connect=%d\n", error); + + /* Now, do the exchangeid and create session. */ + if (error == 0) + error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS, + &dsp, nrp->nr_cred, p); + NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); + if (error == 0) { + dsp->nfsclds_sockp = nrp; + NFSLOCKMNT(nmp); + retv = nfscl_getsameserver(nmp, dsp, &tdsp); + NFSCL_DEBUG(3, "getsame ret=%d\n", retv); + if (retv == NFSDSP_USETHISSESSION) { + NFSUNLOCKMNT(nmp); + /* + * If there is already a session for this server, + * use it. + */ + (void)newnfs_disconnect(nrp); + nfscl_freenfsclds(dsp); + *dspp = tdsp; + return (0); + } + if (retv == NFSDSP_SEQTHISSESSION) + sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid; + else + sequenceid = dsp->nfsclds_sess.nfsess_sequenceid; + NFSUNLOCKMNT(nmp); + error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, + nrp, sequenceid, 0, nrp->nr_cred, p); + NFSCL_DEBUG(3, "DS createsess=%d\n", error); + } else { + NFSFREECRED(nrp->nr_cred); + NFSFREEMUTEX(&nrp->nr_mtx); + free(nrp->nr_nam, M_SONAME); + free(nrp, M_NFSSOCKREQ); + } + if (error == 0) { + NFSCL_DEBUG(3, "add DS session\n"); + /* + * Put it at the end of the list. That way the list + * is ordered by when the entry was added. This matters + * since the one done first is the one that should be + * used for sequencid'ing any subsequent create sessions. + */ + NFSLOCKMNT(nmp); + TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list); + NFSUNLOCKMNT(nmp); + *dspp = dsp; + } else if (dsp != NULL) + nfscl_freenfsclds(dsp); + return (error); +} + +/* + * Do the NFSv4.1 Reclaim Complete. + */ +int +nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error; + + nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = newnfs_false; + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Initialize the slot tables for a session. + */ +static void +nfscl_initsessionslots(struct nfsclsession *sep) +{ + int i; + + for (i = 0; i < NFSV4_CBSLOTS; i++) { + if (sep->nfsess_cbslots[i].nfssl_reply != NULL) + m_freem(sep->nfsess_cbslots[i].nfssl_reply); + NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot)); + } + for (i = 0; i < 64; i++) + sep->nfsess_slotseq[i] = 0; + sep->nfsess_slots = 0; +} + +/* + * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS). + */ +int +nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, + uint32_t rwaccess, struct ucred *cred, NFSPROC_T *p) +{ + struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfscllayout *layp; + struct nfscldevinfo *dip; + struct nfsclflayout *rflp; + nfsv4stateid_t stateid; + struct ucred *newcred; + uint64_t lastbyte, len, off, oresid, xfer; + int eof, error, iolaymode, recalled; + void *lckp; + + if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || + (np->n_flag & NNOLAYOUT) != 0) + return (EIO); + /* Now, get a reference cnt on the clientid for this mount. */ + if (nfscl_getref(nmp) == 0) + return (EIO); + + /* Find an appropriate stateid. */ + newcred = NFSNEWCRED(cred); + error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, + rwaccess, 1, newcred, p, &stateid, &lckp); + if (error != 0) { + NFSFREECRED(newcred); + nfscl_relref(nmp); + return (error); + } + /* Search for a layout for this file. */ + off = uiop->uio_offset; + layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh, + np->n_fhp->nfh_len, off, &rflp, &recalled); + if (layp == NULL || rflp == NULL) { + if (recalled != 0) { + NFSFREECRED(newcred); + nfscl_relref(nmp); + return (EIO); + } + if (layp != NULL) { + nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0); + layp = NULL; + } + /* Try and get a Layout, if it is supported. */ + if (rwaccess == NFSV4OPEN_ACCESSWRITE || + (np->n_flag & NWRITEOPENED) != 0) + iolaymode = NFSLAYOUTIOMODE_RW; + else + iolaymode = NFSLAYOUTIOMODE_READ; + error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode, + NULL, &stateid, off, &layp, newcred, p); + if (error != 0) { + NFSLOCKNODE(np); + np->n_flag |= NNOLAYOUT; + NFSUNLOCKNODE(np); + if (lckp != NULL) + nfscl_lockderef(lckp); + NFSFREECRED(newcred); + if (layp != NULL) + nfscl_rellayout(layp, 0); + nfscl_relref(nmp); + return (error); + } + } + + /* + * Loop around finding a layout that works for the first part of + * this I/O operation, and then call the function that actually + * does the RPC. + */ + eof = 0; + len = (uint64_t)uiop->uio_resid; + while (len > 0 && error == 0 && eof == 0) { + off = uiop->uio_offset; + error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp); + if (error == 0) { + oresid = xfer = (uint64_t)uiop->uio_resid; + if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off)) + xfer = rflp->nfsfl_end - rflp->nfsfl_off; + dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev, + rflp->nfsfl_devp); + if (dip != NULL) { + error = nfscl_doflayoutio(vp, uiop, iomode, + must_commit, &eof, &stateid, rwaccess, dip, + layp, rflp, off, xfer, newcred, p); + nfscl_reldevinfo(dip); + lastbyte = off + xfer - 1; + if (error == 0) { + NFSLOCKCLSTATE(); + if (lastbyte > layp->nfsly_lastbyte) + layp->nfsly_lastbyte = lastbyte; + NFSUNLOCKCLSTATE(); + } + } else + error = EIO; + if (error == 0) + len -= (oresid - (uint64_t)uiop->uio_resid); + } + } + if (lckp != NULL) + nfscl_lockderef(lckp); + NFSFREECRED(newcred); + nfscl_rellayout(layp, 0); + nfscl_relref(nmp); + return (error); +} + +/* + * Find a file layout that will handle the first bytes of the requested + * range and return the information from it needed to to the I/O operation. + */ +int +nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess, + struct nfsclflayout **retflpp) +{ + struct nfsclflayout *flp, *nflp, *rflp; + uint32_t rw; + + rflp = NULL; + rw = rwaccess; + /* For reading, do the Read list first and then the Write list. */ + do { + if (rw == NFSV4OPEN_ACCESSREAD) + flp = LIST_FIRST(&lyp->nfsly_flayread); + else + flp = LIST_FIRST(&lyp->nfsly_flayrw); + while (flp != NULL) { + nflp = LIST_NEXT(flp, nfsfl_list); + if (flp->nfsfl_off > off) + break; + if (flp->nfsfl_end > off && + (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end)) + rflp = flp; + flp = nflp; + } + if (rw == NFSV4OPEN_ACCESSREAD) + rw = NFSV4OPEN_ACCESSWRITE; + else + rw = 0; + } while (rw != 0); + if (rflp != NULL) { + /* This one covers the most bytes starting at off. */ + *retflpp = rflp; + return (0); + } + return (EIO); +} + +/* + * Do I/O using an NFSv4.1 file layout. + */ +static int +nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, + int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, + struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, + uint64_t len, struct ucred *cred, NFSPROC_T *p) +{ + uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer; + int commit_thru_mds, error = 0, stripe_index, stripe_pos; + struct nfsnode *np; + struct nfsfh *fhp; + struct nfsclds **dspp; + + np = VTONFS(vp); + rel_off = off - flp->nfsfl_patoff; + stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff; + stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) % + dp->nfsdi_stripecnt; + transfer = stripe_unit_size - (rel_off % stripe_unit_size); + + /* Loop around, doing I/O for each stripe unit. */ + while (len > 0 && error == 0) { + stripe_index = nfsfldi_stripeindex(dp, stripe_pos); + dspp = nfsfldi_addr(dp, stripe_index); + if (len > transfer) + xfer = transfer; + else + xfer = len; + if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) { + /* Dense layout. */ + if (stripe_pos >= flp->nfsfl_fhcnt) + return (EIO); + fhp = flp->nfsfl_fh[stripe_pos]; + io_off = (rel_off / (stripe_unit_size * + dp->nfsdi_stripecnt)) * stripe_unit_size + + rel_off % stripe_unit_size; + } else { + /* Sparse layout. */ + if (flp->nfsfl_fhcnt > 1) { + if (stripe_index >= flp->nfsfl_fhcnt) + return (EIO); + fhp = flp->nfsfl_fh[stripe_index]; + } else if (flp->nfsfl_fhcnt == 1) + fhp = flp->nfsfl_fh[0]; + else + fhp = np->n_fhp; + io_off = off; + } + if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) + commit_thru_mds = 1; + else + commit_thru_mds = 0; + if (rwflag == FREAD) + error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, + io_off, xfer, fhp, cred, p); + else { + error = nfsrpc_writeds(vp, uiop, iomode, must_commit, + stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds, + cred, p); + if (error == 0) { + NFSLOCKCLSTATE(); + lyp->nfsly_flags |= NFSLY_WRITTEN; + NFSUNLOCKCLSTATE(); + } + } + if (error == 0) { + transfer = stripe_unit_size; + stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt; + len -= xfer; + off += xfer; + } + } + return (error); +} + +/* + * The actual read RPC done to a DS. + */ +static int +nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp, + struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int error, retlen; + struct nfsrv_descript nfsd; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfsrv_descript *nd = &nfsd; + struct nfssockreq *nrp; + + nd->nd_mrep = NULL; + nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len, + NULL, &dsp->nfsclds_sess); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); + txdr_hyper(io_off, tl); + *(tl + 2) = txdr_unsigned(len); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) { + error = nd->nd_repstat; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + *eofp = fxdr_unsigned(int, *tl); + NFSM_STRSIZ(retlen, len); + error = nfsm_mbufuio(nd, uiop, retlen); +nfsmout: + if (nd->nd_mrep != NULL) + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The actual write RPC done to a DS. + */ +static int +nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, + nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, + struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + int error, rlen, commit, committed = NFSWRITE_FILESYNC; + int32_t backup; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct nfssockreq *nrp; + + KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); + nd->nd_mrep = NULL; + nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, + NULL, &dsp->nfsclds_sess); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); + txdr_hyper(io_off, tl); + tl += 2; + *tl++ = txdr_unsigned(*iomode); + *tl = txdr_unsigned(len); + nfsm_uiombuf(nd, uiop, len); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) { + /* + * In case the rpc gets retried, roll + * the uio fileds changed by nfsm_uiombuf() + * back. + */ + uiop->uio_offset -= len; + uio_uio_resid_add(uiop, len); + uio_iov_base_add(uiop, -len); + uio_iov_len_add(uiop, len); + error = nd->nd_repstat; + } else { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); + rlen = fxdr_unsigned(int, *tl++); + if (rlen == 0) { + error = NFSERR_IO; + goto nfsmout; + } else if (rlen < len) { + backup = len - rlen; + uio_iov_base_add(uiop, -(backup)); + uio_iov_len_add(uiop, backup); + uiop->uio_offset -= backup; + uio_uio_resid_add(uiop, backup); + len = rlen; + } + commit = fxdr_unsigned(int, *tl++); + + /* + * Return the lowest committment level + * obtained by any of the RPCs. + */ + if (committed == NFSWRITE_FILESYNC) + committed = commit; + else if (committed == NFSWRITE_DATASYNC && + commit == NFSWRITE_UNSTABLE) + committed = commit; + if (commit_thru_mds != 0) { + NFSLOCKMNT(nmp); + if (!NFSHASWRITEVERF(nmp)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + NFSSETWRITEVERF(nmp); + } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { + *must_commit = 1; + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + } + NFSUNLOCKMNT(nmp); + } else { + NFSLOCKDS(dsp); + if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { + NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); + dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; + } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { + *must_commit = 1; + NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); + } + NFSUNLOCKDS(dsp); + } + } +nfsmout: + if (nd->nd_mrep != NULL) + mbuf_freem(nd->nd_mrep); + *iomode = committed; + if (nd->nd_repstat != 0 && error == 0) + error = nd->nd_repstat; + return (error); +} + +/* + * Free up the nfsclds structure. + */ +void +nfscl_freenfsclds(struct nfsclds *dsp) +{ + int i; + + if (dsp == NULL) + return; + if (dsp->nfsclds_sockp != NULL) { + NFSFREECRED(dsp->nfsclds_sockp->nr_cred); + NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx); + free(dsp->nfsclds_sockp->nr_nam, M_SONAME); + free(dsp->nfsclds_sockp, M_NFSSOCKREQ); + } + NFSFREEMUTEX(&dsp->nfsclds_mtx); + NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx); + for (i = 0; i < NFSV4_CBSLOTS; i++) { + if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL) + m_freem( + dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply); + } + free(dsp, M_NFSCLDS); +} + +static enum nfsclds_state +nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp, + struct nfsclds **retdspp) +{ + struct nfsclds *dsp, *cur_dsp; + + /* + * Search the list of nfsclds structures for one with the same + * server. + */ + cur_dsp = NULL; + TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { + if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen && + dsp->nfsclds_servownlen != 0 && + !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown, + dsp->nfsclds_servownlen)) { + NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n", + TAILQ_FIRST(&nmp->nm_sess), dsp, + dsp->nfsclds_flags); + /* Server major id matches. */ + if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) { + *retdspp = dsp; + return (NFSDSP_USETHISSESSION); + } + + /* + * Note the first match, so it can be used for + * sequence'ing new sessions. + */ + if (cur_dsp == NULL) + cur_dsp = dsp; + } + } + if (cur_dsp != NULL) { + *retdspp = cur_dsp; + return (NFSDSP_SEQTHISSESSION); + } + return (NFSDSP_NOTFOUND); +} + +#ifdef notyet +/* + * NFS commit rpc to a DS. + */ +static int +nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, + struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfssockreq *nrp; + int error; + + nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len, + NULL, &dsp->nfsclds_sess); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(offset, tl); + tl += 2; + *tl = txdr_unsigned(cnt); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); + if (error) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); + NFSLOCKDS(dsp); + if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { + NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); + error = NFSERR_STALEWRITEVERF; + } + NFSUNLOCKDS(dsp); + } +nfsmout: + if (error == 0 && nd->nd_repstat != 0) + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} +#endif + diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index b54805db8d05..8b5acb93e454 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -86,14 +86,18 @@ __FBSDID("$FreeBSD$"); */ extern struct nfsstats newnfsstats; extern struct nfsreqhead nfsd_reqq; +extern u_int32_t newnfs_false, newnfs_true; +extern int nfscl_debuglevel; NFSREQSPINLOCK; NFSCLSTATEMUTEX; int nfscl_inited = 0; struct nfsclhead nfsclhead; /* Head of clientid list */ int nfscl_deleghighwater = NFSCLDELEGHIGHWATER; +int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER; #endif /* !APPLEKEXT */ static int nfscl_delegcnt = 0; +static int nfscl_layoutcnt = 0; static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **); static void nfscl_clrelease(struct nfsclclient *); @@ -109,9 +113,16 @@ static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **, struct nfscllock **, int); static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *); static u_int32_t nfscl_nextcbident(void); -static mount_t nfscl_getmnt(u_int32_t); +static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **); +static struct nfsclclient *nfscl_getclnt(u_int32_t); +static struct nfsclclient *nfscl_getclntsess(uint8_t *); static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *, int); +static void nfscl_retoncloselayout(struct nfsclclient *, uint8_t *, int); +static void nfscl_reldevinfo_locked(struct nfscldevinfo *); +static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *, + int); +static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *); static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *, u_int8_t *, struct nfscllock **); static void nfscl_freealllocks(struct nfscllockownerhead *, int); @@ -145,6 +156,15 @@ static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *, struct nfsmount *, NFSPROC_T *); static void nfscl_emptylockowner(struct nfscllockowner *, struct nfscllockownerfhhead *); +static void nfscl_mergeflayouts(struct nfsclflayouthead *, + struct nfsclflayouthead *); +static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t, + uint64_t, uint32_t, struct nfsclrecalllayout *); +static int nfscl_seq(uint32_t, uint32_t); +static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *, + struct ucred *, NFSPROC_T *); +static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *, + struct ucred *, NFSPROC_T *); static short nfscberr_null[] = { 0, @@ -214,7 +234,7 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg, if (nfhp != NULL) MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + fhlen - 1, M_NFSCLOPEN, M_WAITOK); - ret = nfscl_getcl(vp, cred, p, &clp); + ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (ret != 0) { FREE((caddr_t)nowp, M_NFSCLOWNER); if (nop != NULL) @@ -451,7 +471,7 @@ nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) */ APPLESTATIC int nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, - struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp, + int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp, void **lckpp) { struct nfsclclient *clp; @@ -466,11 +486,14 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, *lckpp = NULL; /* * Initially, just set the special stateid of all zeros. + * (Don't do this for a DS, since the special stateid can't be used.) */ - stateidp->seqid = 0; - stateidp->other[0] = 0; - stateidp->other[1] = 0; - stateidp->other[2] = 0; + if (fords == 0) { + stateidp->seqid = 0; + stateidp->other[0] = 0; + stateidp->other[1] = 0; + stateidp->other[2] = 0; + } if (vnode_vtype(vp) != VREG) return (EISDIR); np = VTONFS(vp); @@ -526,7 +549,8 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, lp = NULL; error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own, mode, &lp, &op); - if (error == 0 && lp != NULL) { + if (error == 0 && lp != NULL && fords == 0) { + /* Don't return a lock stateid for a DS. */ stateidp->seqid = lp->nfsl_stateid.seqid; stateidp->other[0] = @@ -697,21 +721,21 @@ nfscl_openrelease(struct nfsclopen *op, int error, int candelete) * If the "cred" argument is NULL, a new clientid should not be created. * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot * be done. + * The start_renewthread argument tells nfscl_getcl() to start a renew + * thread if this creates a new clp. * It always clpp with a reference count on it, unless returning an error. */ APPLESTATIC int -nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, - struct nfsclclient **clpp) +nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p, + int start_renewthread, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclclient *newclp = NULL; - struct mount *mp; struct nfsmount *nmp; char uuid[HOSTUUIDLEN]; int igotlock = 0, error, trystalecnt, clidinusedelay, i; u_int16_t idlen = 0; - mp = vnode_mount(vp); nmp = VFSTONFS(mp); if (cred != NULL) { getcredhostuuid(cred, uuid, sizeof uuid); @@ -722,7 +746,7 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */ MALLOC(newclp, struct nfsclclient *, sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT, - M_WAITOK); + M_WAITOK | M_ZERO); } NFSLOCKCLSTATE(); /* @@ -743,12 +767,15 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, return (EACCES); } clp = newclp; - NFSBZERO((caddr_t)clp, sizeof(struct nfsclclient) + idlen - 1); clp->nfsc_idlen = idlen; LIST_INIT(&clp->nfsc_owner); TAILQ_INIT(&clp->nfsc_deleg); + TAILQ_INIT(&clp->nfsc_layout); + LIST_INIT(&clp->nfsc_devinfo); for (i = 0; i < NFSCLDELEGHASHSIZE; i++) LIST_INIT(&clp->nfsc_deleghash[i]); + for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) + LIST_INIT(&clp->nfsc_layouthash[i]); clp->nfsc_flags = NFSCLFLAGS_INITED; clp->nfsc_clientidrev = 1; clp->nfsc_cbident = nfscl_nextcbident(); @@ -758,11 +785,12 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, nmp->nm_clp = clp; clp->nfsc_nmp = nmp; NFSUNLOCKCLSTATE(); - nfscl_start_renewthread(clp); + if (start_renewthread != 0) + nfscl_start_renewthread(clp); } else { NFSUNLOCKCLSTATE(); if (newclp != NULL) - FREE((caddr_t)newclp, M_NFSCLCLIENT); + free(newclp, M_NFSCLCLIENT); } NFSLOCKCLSTATE(); while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock && @@ -818,14 +846,15 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, clidinusedelay = 120; trystalecnt = 3; do { - error = nfsrpc_setclient(VFSTONFS(vnode_mount(vp)), - clp, cred, p); + error = nfsrpc_setclient(nmp, clp, 0, cred, p); if (error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || + error == NFSERR_BADSESSION || error == NFSERR_CLIDINUSE) { (void) nfs_catnap(PZERO, error, "nfs_setcl"); } } while (((error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) || (error == NFSERR_CLIDINUSE && --clidinusedelay > 0)); if (error) { @@ -942,7 +971,7 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, if (recovery) clp = rclp; else - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); } if (error) { FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); @@ -1277,7 +1306,7 @@ nfscl_checkwritelocked(vnode_t vp, struct flock *fl, end = NFS64BITSSET; } - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (1); nfscl_filllockowner(id, own, flags); @@ -1825,19 +1854,24 @@ nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p) LIST_REMOVE(clp, nfsc_list); nfscl_delegreturnall(clp, p); cred = newnfs_getcred(); - (void) nfsrpc_setclient(nmp, clp, cred, p); + if (NFSHASNFSV4N(nmp)) { + (void)nfsrpc_destroysession(nmp, clp, cred, p); + (void)nfsrpc_destroyclient(nmp, clp, cred, p); + } else + (void)nfsrpc_setclient(nmp, clp, 0, cred, p); nfscl_cleanclient(clp); nmp->nm_clp = NULL; NFSFREECRED(cred); - FREE((caddr_t)clp, M_NFSCLCLIENT); + free(clp, M_NFSCLCLIENT); } else NFSUNLOCKCLSTATE(); } /* * This function is called when a server replies with NFSERR_STALECLIENTID - * or NFSERR_STALESTATEID. It traverses the clientid lists, doing Opens - * and Locks with reclaim. If these fail, it deletes the corresponding state. + * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists, + * doing Opens and Locks with reclaim. If these fail, it deletes the + * corresponding state. */ static void nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) @@ -1854,7 +1888,8 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) struct nfsreq *rep; u_int64_t len; u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode; - int igotlock = 0, error, trycnt, firstlock, s; + int i, igotlock = 0, error, trycnt, firstlock, s; + struct nfscllayout *lyp, *nlyp; /* * First, lock the client structure, so everyone else will @@ -1871,10 +1906,22 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl recover"); + + /* + * For now, just get rid of all layouts. There may be a need + * to do LayoutCommit Ops with reclaim == true later. + */ + TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) + nfscl_freelayout(lyp); + TAILQ_INIT(&clp->nfsc_layout); + for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) + LIST_INIT(&clp->nfsc_layouthash[i]); + trycnt = 5; do { - error = nfsrpc_setclient(nmp, clp, cred, p); + error = nfsrpc_setclient(nmp, clp, 1, cred, p); } while ((error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { nfscl_cleanclient(clp); @@ -1893,9 +1940,10 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) * Mark requests already queued on the server, so that they don't * initiate another recovery cycle. Any requests already in the * queue that handle state information will have the old stale - * clientid/stateid and will get a NFSERR_STALESTATEID or - * NFSERR_STALECLIENTID reply from the server. This will be - * translated to NFSERR_STALEDONTRECOVER when R_DONTRECOVER is set. + * clientid/stateid and will get a NFSERR_STALESTATEID, + * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server. + * This will be translated to NFSERR_STALEDONTRECOVER when + * R_DONTRECOVER is set. */ s = splsoftclock(); NFSLOCKREQ(); @@ -2136,6 +2184,10 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) FREE((caddr_t)dp, M_NFSCLDELEG); } + /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */ + if (NFSHASNFSV4N(nmp)) + (void)nfsrpc_reclaimcomplete(nmp, cred, p); + NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG; wakeup(&clp->nfsc_flags); @@ -2190,8 +2242,9 @@ nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) cred = newnfs_getcred(); trycnt = 5; do { - error = nfsrpc_setclient(nmp, clp, cred, p); + error = nfsrpc_setclient(nmp, clp, 0, cred, p); } while ((error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { /* @@ -2398,6 +2451,11 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) static time_t prevsec = 0; struct nfscllockownerfh *lfhp, *nlfhp; struct nfscllockownerfhhead lfh; + struct nfscllayout *lyp, *nlyp; + struct nfscldevinfo *dip, *ndip; + struct nfscllayouthead rlh; + struct nfsclrecalllayout *recallp; + struct nfsclds *dsp; cred = newnfs_getcred(); NFSLOCKCLSTATE(); @@ -2425,10 +2483,12 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) { clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clidrev = clp->nfsc_clientidrev; - error = nfsrpc_renew(clp, cred, p); + error = nfsrpc_renew(clp, + TAILQ_FIRST(&clp->nfsc_nmp->nm_sess), cred, p); if (error == NFSERR_CBPATHDOWN) cbpathdown = 1; - else if (error == NFSERR_STALECLIENTID) { + else if (error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION) { NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); @@ -2436,6 +2496,25 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) (void) nfscl_hasexpired(clp, clidrev, p); } + /* Do renews for any DS sessions. */ +checkdsrenew: + NFSLOCKMNT(clp->nfsc_nmp); + /* Skip first entry, since the MDS is handled above. */ + dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess); + if (dsp != NULL) + dsp = TAILQ_NEXT(dsp, nfsclds_list); + while (dsp != NULL) { + if (dsp->nfsclds_expire <= NFSD_MONOSEC) { + dsp->nfsclds_expire = NFSD_MONOSEC + + clp->nfsc_renew; + NFSUNLOCKMNT(clp->nfsc_nmp); + (void)nfsrpc_renew(clp, dsp, cred, p); + goto checkdsrenew; + } + dsp = TAILQ_NEXT(dsp, nfsclds_list); + } + NFSUNLOCKMNT(clp->nfsc_nmp); + TAILQ_INIT(&dh); NFSLOCKCLSTATE(); if (cbpathdown) @@ -2542,8 +2621,90 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); + + /* + * Do the recall on any layouts. To avoid trouble, always + * come back up here after having slept. + */ + TAILQ_INIT(&rlh); +tryagain2: + TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) { + if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) { + /* + * Wait for outstanding I/O ops to be done. + */ + if (lyp->nfsly_lock.nfslock_usecnt > 0 || + (lyp->nfsly_lock.nfslock_lock & + NFSV4LOCK_LOCK) != 0) { + lyp->nfsly_lock.nfslock_lock |= + NFSV4LOCK_WANTED; + (void)nfsmsleep(&lyp->nfsly_lock, + NFSCLSTATEMUTEXPTR, PZERO, "nfslyp", + NULL); + goto tryagain2; + } + /* Move the layout to the recall list. */ + TAILQ_REMOVE(&clp->nfsc_layout, lyp, + nfsly_list); + LIST_REMOVE(lyp, nfsly_hash); + TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list); + + /* Handle any layout commits. */ + if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) && + (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { + lyp->nfsly_flags &= ~NFSLY_WRITTEN; + NFSUNLOCKCLSTATE(); + NFSCL_DEBUG(3, "do layoutcommit\n"); + nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, + cred, p); + NFSLOCKCLSTATE(); + goto tryagain2; + } + } + } + + /* Now, look for stale layouts. */ + lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead); + while (lyp != NULL) { + nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list); + if (lyp->nfsly_timestamp < NFSD_MONOSEC && + (lyp->nfsly_flags & NFSLY_RECALL) == 0 && + lyp->nfsly_lock.nfslock_usecnt == 0 && + lyp->nfsly_lock.nfslock_lock == 0) { + NFSCL_DEBUG(4, "ret stale lay=%d\n", + nfscl_layoutcnt); + recallp = malloc(sizeof(*recallp), + M_NFSLAYRECALL, M_NOWAIT); + if (recallp == NULL) + break; + (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, + lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX, + lyp->nfsly_stateid.seqid, recallp); + } + lyp = nlyp; + } + + /* + * Free up any unreferenced device info structures. + */ + LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) { + if (dip->nfsdi_layoutrefs == 0 && + dip->nfsdi_refcnt == 0) { + NFSCL_DEBUG(4, "freeing devinfo\n"); + LIST_REMOVE(dip, nfsdi_list); + nfscl_freedevinfo(dip); + } + } NFSUNLOCKCLSTATE(); + /* Do layout return(s), as required. */ + TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) { + TAILQ_REMOVE(&rlh, lyp, nfsly_list); + NFSCL_DEBUG(4, "ret layout\n"); + nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p); + nfscl_freelayout(lyp); + } + /* * Delegreturn any delegations cleaned out or recalled. */ @@ -2599,8 +2760,8 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) } /* - * Initiate state recovery. Called when NFSERR_STALECLIENTID or - * NFSERR_STALESTATEID is received. + * Initiate state recovery. Called when NFSERR_STALECLIENTID, + * NFSERR_STALESTATEID or NFSERR_BADSESSION is received. */ APPLESTATIC void nfscl_initiate_recovery(struct nfsclclient *clp) @@ -2832,7 +2993,7 @@ nfscl_getclose(vnode_t vp, struct nfsclclient **clpp) struct nfsfh *nfhp; int error, notdecr; - error = nfscl_getcl(vp, NULL, NULL, &clp); + error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; @@ -2906,7 +3067,7 @@ nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p) struct nfsfh *nfhp; int error; - error = nfscl_getcl(vp, NULL, NULL, &clp); + error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; @@ -2930,6 +3091,9 @@ nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p) } } + /* Return any layouts marked return on close. */ + nfscl_retoncloselayout(clp, nfhp->nfh_fh, nfhp->nfh_len); + /* Now process the opens against the server. */ lookformore: LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { @@ -2979,11 +3143,11 @@ nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p) APPLESTATIC void nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) { - int i, op; + int clist, gotseq_ok, i, j, k, op, rcalls; u_int32_t *tl; struct nfsclclient *clp; struct nfscldeleg *dp = NULL; - int numops, taglen = -1, error = 0, trunc, ret = 0; + int numops, taglen = -1, error = 0, trunc; u_int32_t minorvers, retops = 0, *retopsp = NULL, *repp, cbident; u_char tag[NFSV4_SMALLSTR + 1], *tagstr; vnode_t vp = NULL; @@ -2993,7 +3157,16 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) mount_t mp; nfsattrbit_t attrbits, rattrbits; nfsv4stateid_t stateid; + uint32_t seqid, slotid = 0, highslot, cachethis; + uint8_t sessionid[NFSX_V4SESSIONID]; + struct mbuf *rep; + struct nfscllayout *lyp; + uint64_t filesid[2], len, off; + int changed, gotone, laytype, recalltype; + uint32_t iomode; + struct nfsclrecalllayout *recallp = NULL; + gotseq_ok = 0; nfsrvd_rephead(nd); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); taglen = fxdr_unsigned(int, *tl); @@ -3019,7 +3192,7 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); minorvers = fxdr_unsigned(u_int32_t, *tl++); - if (minorvers != NFSV4_MINORVERSION) + if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) nd->nd_repstat = NFSERR_MINORVERMISMATCH; cbident = fxdr_unsigned(u_int32_t, *tl++); if (nd->nd_repstat) @@ -3034,73 +3207,85 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED); *repp++ = *tl; op = fxdr_unsigned(int, *tl); - if (op < NFSV4OP_CBGETATTR || op > NFSV4OP_CBRECALL) { + if (op < NFSV4OP_CBGETATTR || + (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) || + (op > NFSV4OP_CBNOTIFYDEVID && + minorvers == NFSV41_MINORVERSION)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp = nfscl_errmap(nd); retops++; break; } nd->nd_procnum = op; - newnfsstats.cbrpccnt[nd->nd_procnum]++; + if (op < NFSV4OP_CBNOPS) + newnfsstats.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: - clp = NULL; + NFSCL_DEBUG(4, "cbgetattr\n"); + mp = NULL; + vp = NULL; error = nfsm_getfh(nd, &nfhp); if (!error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); + if (error == 0 && i == 0 && + minorvers != NFSV4_MINORVERSION) + error = NFSERR_OPNOTINSESS; if (!error) { - mp = nfscl_getmnt(cbident); + mp = nfscl_getmnt(minorvers, sessionid, cbident, + &clp); if (mp == NULL) error = NFSERR_SERVERFAULT; } if (!error) { - dp = NULL; - NFSLOCKCLSTATE(); - clp = nfscl_findcl(VFSTONFS(mp)); - if (clp != NULL) - dp = nfscl_finddeleg(clp, nfhp->nfh_fh, - nfhp->nfh_len); - NFSUNLOCKCLSTATE(); - if (dp == NULL) - error = NFSERR_SERVERFAULT; - } - if (!error) { - ret = nfscl_ngetreopen(mp, nfhp->nfh_fh, + error = nfscl_ngetreopen(mp, nfhp->nfh_fh, nfhp->nfh_len, p, &np); - if (!ret) + if (!error) vp = NFSTOV(np); } - if (nfhp != NULL) - FREE((caddr_t)nfhp, M_NFSFH); if (!error) { NFSZERO_ATTRBIT(&rattrbits); - if (NFSISSET_ATTRBIT(&attrbits, - NFSATTRBIT_SIZE)) { - if (!ret) - va.va_size = np->n_size; - else - va.va_size = dp->nfsdl_size; - NFSSETBIT_ATTRBIT(&rattrbits, - NFSATTRBIT_SIZE); - } - if (NFSISSET_ATTRBIT(&attrbits, - NFSATTRBIT_CHANGE)) { - va.va_filerev = dp->nfsdl_change; - if (ret || (np->n_flag & NDELEGMOD)) - va.va_filerev++; - NFSSETBIT_ATTRBIT(&rattrbits, - NFSATTRBIT_CHANGE); - } + NFSLOCKCLSTATE(); + dp = nfscl_finddeleg(clp, nfhp->nfh_fh, + nfhp->nfh_len); + if (dp != NULL) { + if (NFSISSET_ATTRBIT(&attrbits, + NFSATTRBIT_SIZE)) { + if (vp != NULL) + va.va_size = np->n_size; + else + va.va_size = + dp->nfsdl_size; + NFSSETBIT_ATTRBIT(&rattrbits, + NFSATTRBIT_SIZE); + } + if (NFSISSET_ATTRBIT(&attrbits, + NFSATTRBIT_CHANGE)) { + va.va_filerev = + dp->nfsdl_change; + if (vp == NULL || + (np->n_flag & NDELEGMOD)) + va.va_filerev++; + NFSSETBIT_ATTRBIT(&rattrbits, + NFSATTRBIT_CHANGE); + } + } else + error = NFSERR_SERVERFAULT; + NFSUNLOCKCLSTATE(); + } + if (vp != NULL) + vrele(vp); + if (mp != NULL) + vfs_unbusy(mp); + if (nfhp != NULL) + FREE((caddr_t)nfhp, M_NFSFH); + if (!error) (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va, NULL, 0, &rattrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); - if (!ret) - vrele(vp); - } break; case NFSV4OP_CBRECALL: - clp = NULL; + NFSCL_DEBUG(4, "cbrecall\n"); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stateid.seqid = *tl++; @@ -3109,14 +3294,15 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); trunc = fxdr_unsigned(int, *tl); error = nfsm_getfh(nd, &nfhp); - if (!error) { - mp = nfscl_getmnt(cbident); - if (mp == NULL) - error = NFSERR_SERVERFAULT; - } + if (error == 0 && i == 0 && + minorvers != NFSV4_MINORVERSION) + error = NFSERR_OPNOTINSESS; if (!error) { NFSLOCKCLSTATE(); - clp = nfscl_findcl(VFSTONFS(mp)); + if (minorvers == NFSV4_MINORVERSION) + clp = nfscl_getclnt(cbident); + else + clp = nfscl_getclntsess(sessionid); if (clp != NULL) { dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); @@ -3134,6 +3320,195 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) if (nfhp != NULL) FREE((caddr_t)nfhp, M_NFSFH); break; + case NFSV4OP_CBLAYOUTRECALL: + NFSCL_DEBUG(4, "cblayrec\n"); + nfhp = NULL; + NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); + laytype = fxdr_unsigned(int, *tl++); + iomode = fxdr_unsigned(uint32_t, *tl++); + if (newnfs_true == *tl++) + changed = 1; + else + changed = 0; + recalltype = fxdr_unsigned(int, *tl); + recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, + M_WAITOK); + if (laytype != NFSLAYOUT_NFSV4_1_FILES) + error = NFSERR_NOMATCHLAYOUT; + else if (recalltype == NFSLAYOUTRETURN_FILE) { + error = nfsm_getfh(nd, &nfhp); + NFSCL_DEBUG(4, "retfile getfh=%d\n", error); + if (error != 0) + goto nfsmout; + NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER + + NFSX_STATEID); + off = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + if (minorvers == NFSV4_MINORVERSION) + error = NFSERR_NOTSUPP; + else if (i == 0) + error = NFSERR_OPNOTINSESS; + if (error == 0) { + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + NFSCL_DEBUG(4, "cbly clp=%p\n", clp); + if (clp != NULL) { + lyp = nfscl_findlayout(clp, + nfhp->nfh_fh, + nfhp->nfh_len); + NFSCL_DEBUG(4, "cblyp=%p\n", + lyp); + if (lyp != NULL && + (lyp->nfsly_flags & + NFSLY_FILES) != 0 && + !NFSBCMP(stateid.other, + lyp->nfsly_stateid.other, + NFSX_STATEIDOTHER)) { + error = + nfscl_layoutrecall( + recalltype, + lyp, iomode, off, + len, stateid.seqid, + recallp); + recallp = NULL; + wakeup(clp); + NFSCL_DEBUG(4, + "aft layrcal=%d\n", + error); + } else + error = + NFSERR_NOMATCHLAYOUT; + } else + error = NFSERR_NOMATCHLAYOUT; + NFSUNLOCKCLSTATE(); + } + free(nfhp, M_NFSFH); + } else if (recalltype == NFSLAYOUTRETURN_FSID) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER); + filesid[0] = fxdr_hyper(tl); tl += 2; + filesid[1] = fxdr_hyper(tl); tl += 2; + gotone = 0; + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + if (clp != NULL) { + TAILQ_FOREACH(lyp, &clp->nfsc_layout, + nfsly_list) { + if (lyp->nfsly_filesid[0] == + filesid[0] && + lyp->nfsly_filesid[1] == + filesid[1]) { + error = + nfscl_layoutrecall( + recalltype, + lyp, iomode, 0, + UINT64_MAX, + lyp->nfsly_stateid.seqid, + recallp); + recallp = NULL; + gotone = 1; + } + } + if (gotone != 0) + wakeup(clp); + else + error = NFSERR_NOMATCHLAYOUT; + } else + error = NFSERR_NOMATCHLAYOUT; + NFSUNLOCKCLSTATE(); + } else if (recalltype == NFSLAYOUTRETURN_ALL) { + gotone = 0; + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + if (clp != NULL) { + TAILQ_FOREACH(lyp, &clp->nfsc_layout, + nfsly_list) { + error = nfscl_layoutrecall( + recalltype, lyp, iomode, 0, + UINT64_MAX, + lyp->nfsly_stateid.seqid, + recallp); + recallp = NULL; + gotone = 1; + } + if (gotone != 0) + wakeup(clp); + else + error = NFSERR_NOMATCHLAYOUT; + } else + error = NFSERR_NOMATCHLAYOUT; + NFSUNLOCKCLSTATE(); + } else + error = NFSERR_NOMATCHLAYOUT; + if (recallp != NULL) { + free(recallp, M_NFSLAYRECALL); + recallp = NULL; + } + break; + case NFSV4OP_CBSEQUENCE: + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + 5 * NFSX_UNSIGNED); + bcopy(tl, sessionid, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + seqid = fxdr_unsigned(uint32_t, *tl++); + slotid = fxdr_unsigned(uint32_t, *tl++); + highslot = fxdr_unsigned(uint32_t, *tl++); + cachethis = *tl++; + /* Throw away the referring call stuff. */ + clist = fxdr_unsigned(int, *tl); + for (j = 0; j < clist; j++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + NFSX_UNSIGNED); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + rcalls = fxdr_unsigned(int, *tl); + for (k = 0; k < rcalls; k++) { + NFSM_DISSECT(tl, uint32_t *, + 2 * NFSX_UNSIGNED); + } + } + NFSLOCKCLSTATE(); + if (i == 0) { + clp = nfscl_getclntsess(sessionid); + if (clp == NULL) + error = NFSERR_SERVERFAULT; + } else + error = NFSERR_SEQUENCEPOS; + if (error == 0) + error = nfsv4_seqsession(seqid, slotid, + highslot, + NFSMNT_MDSSESSION(clp->nfsc_nmp)-> + nfsess_cbslots, &rep, + NFSMNT_MDSSESSION(clp->nfsc_nmp)-> + nfsess_backslots); + NFSUNLOCKCLSTATE(); + if (error == 0) { + gotseq_ok = 1; + if (rep != NULL) { + NFSCL_DEBUG(4, "Got cbretry\n"); + m_freem(nd->nd_mreq); + nd->nd_mreq = rep; + rep = NULL; + goto out; + } + NFSM_BUILD(tl, uint32_t *, + NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); + bcopy(sessionid, tl, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + *tl++ = txdr_unsigned(seqid); + *tl++ = txdr_unsigned(slotid); + *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1); + *tl = txdr_unsigned(NFSV4_CBSLOTS - 1); + } + break; + default: + if (i == 0 && minorvers == NFSV41_MINORVERSION) + error = NFSERR_OPNOTINSESS; + else { + NFSCL_DEBUG(1, "unsupp callback %d\n", op); + error = NFSERR_NOTSUPP; + } + break; }; if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) { @@ -3151,6 +3526,8 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) *repp = 0; /* NFS4_OK */ } nfsmout: + if (recallp != NULL) + free(recallp, M_NFSLAYRECALL); if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) nd->nd_repstat = NFSERR_BADXDR; @@ -3165,6 +3542,21 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) *retopsp = txdr_unsigned(retops); } *nd->nd_errp = nfscl_errmap(nd); +out: + if (gotseq_ok != 0) { + rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + if (clp != NULL) { + nfsv4_seqsess_cacherep(slotid, + NFSMNT_MDSSESSION(clp->nfsc_nmp)->nfsess_cbslots, + rep); + NFSUNLOCKCLSTATE(); + } else { + NFSUNLOCKCLSTATE(); + m_freem(rep); + } + } } /* @@ -3204,26 +3596,68 @@ nfscl_nextcbident(void) } /* - * Get the mount point related to a given cbident. + * Get the mount point related to a given cbident or session and busy it. */ static mount_t -nfscl_getmnt(u_int32_t cbident) +nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident, + struct nfsclclient **clpp) { struct nfsclclient *clp; - struct nfsmount *nmp; + mount_t mp; + int error; + *clpp = NULL; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { - if (clp->nfsc_cbident == cbident) + if (minorvers == NFSV4_MINORVERSION) { + if (clp->nfsc_cbident == cbident) + break; + } else if (!NFSBCMP(NFSMNT_MDSSESSION(clp->nfsc_nmp)-> + nfsess_sessionid, sessionid, NFSX_V4SESSIONID)) break; } if (clp == NULL) { NFSUNLOCKCLSTATE(); return (NULL); } - nmp = clp->nfsc_nmp; + mp = clp->nfsc_nmp->nm_mountp; + vfs_ref(mp); NFSUNLOCKCLSTATE(); - return (nmp->nm_mountp); + error = vfs_busy(mp, 0); + vfs_rel(mp); + if (error != 0) + return (NULL); + *clpp = clp; + return (mp); +} + +/* + * Get the clientid pointer related to a given cbident. + */ +static struct nfsclclient * +nfscl_getclnt(u_int32_t cbident) +{ + struct nfsclclient *clp; + + LIST_FOREACH(clp, &nfsclhead, nfsc_list) + if (clp->nfsc_cbident == cbident) + break; + return (clp); +} + +/* + * Get the clientid pointer related to a given sessionid. + */ +static struct nfsclclient * +nfscl_getclntsess(uint8_t *sessionid) +{ + struct nfsclclient *clp; + + LIST_FOREACH(clp, &nfsclhead, nfsc_list) + if (!NFSBCMP(NFSMNT_MDSSESSION(clp->nfsc_nmp)->nfsess_sessionid, + sessionid, NFSX_V4SESSIONID)) + break; + return (clp); } /* @@ -3420,7 +3854,8 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) { + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); @@ -3451,7 +3886,8 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, if (ret) { nfscl_freeopenowner(owp, 0); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) { + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); @@ -3475,7 +3911,8 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p); if (ret == NFSERR_STALESTATEID || ret == NFSERR_STALEDONTRECOVER || - ret == NFSERR_STALECLIENTID) { + ret == NFSERR_STALECLIENTID || + ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); @@ -4223,10 +4660,549 @@ nfscl_errmap(struct nfsrv_descript *nd) if (nd->nd_repstat == NFSERR_MINORVERMISMATCH || nd->nd_repstat == NFSERR_OPILLEGAL) return (txdr_unsigned(nd->nd_repstat)); - errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum]; + if (nd->nd_procnum < NFSV4OP_CBNOPS) + errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum]; + else + return (txdr_unsigned(nd->nd_repstat)); while (*++errp) if (*errp == (short)nd->nd_repstat) return (txdr_unsigned(nd->nd_repstat)); return (txdr_unsigned(*defaulterrp)); } +/* + * Called to find/add a layout to a client. + * This function returns the layout with a refcnt (shared lock) upon + * success (returns 0) or with no lock/refcnt on the layout when an + * error is returned. + * If a layout is passed in via lypp, it is locked (exclusively locked). + */ +APPLESTATIC int +nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, + nfsv4stateid_t *stateidp, int retonclose, + struct nfsclflayouthead *fhlp, struct nfscllayout **lypp, + struct ucred *cred, NFSPROC_T *p) +{ + struct nfsclclient *clp; + struct nfscllayout *lyp, *tlyp; + struct nfsclflayout *flp; + struct nfsnode *np = VTONFS(vp); + mount_t mp; + int layout_passed_in; + + mp = nmp->nm_mountp; + layout_passed_in = 1; + tlyp = NULL; + lyp = *lypp; + if (lyp == NULL) { + layout_passed_in = 0; + tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT, + M_WAITOK | M_ZERO); + } + + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + if (clp == NULL) { + if (layout_passed_in != 0) + nfsv4_unlock(&lyp->nfsly_lock, 0); + NFSUNLOCKCLSTATE(); + if (tlyp != NULL) + free(tlyp, M_NFSLAYOUT); + return (EPERM); + } + if (lyp == NULL) { + /* + * Although no lyp was passed in, another thread might have + * allocated one. If one is found, just increment it's ref + * count and return it. + */ + lyp = nfscl_findlayout(clp, fhp, fhlen); + if (lyp == NULL) { + lyp = tlyp; + tlyp = NULL; + lyp->nfsly_stateid.seqid = stateidp->seqid; + lyp->nfsly_stateid.other[0] = stateidp->other[0]; + lyp->nfsly_stateid.other[1] = stateidp->other[1]; + lyp->nfsly_stateid.other[2] = stateidp->other[2]; + lyp->nfsly_lastbyte = 0; + LIST_INIT(&lyp->nfsly_flayread); + LIST_INIT(&lyp->nfsly_flayrw); + LIST_INIT(&lyp->nfsly_recall); + lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0]; + lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1]; + lyp->nfsly_clp = clp; + lyp->nfsly_flags = (retonclose != 0) ? + (NFSLY_FILES | NFSLY_RETONCLOSE) : NFSLY_FILES; + lyp->nfsly_fhlen = fhlen; + NFSBCOPY(fhp, lyp->nfsly_fh, fhlen); + TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); + LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp, + nfsly_hash); + lyp->nfsly_timestamp = NFSD_MONOSEC + 120; + nfscl_layoutcnt++; + } else { + if (retonclose != 0) + lyp->nfsly_flags |= NFSLY_RETONCLOSE; + TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); + TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); + lyp->nfsly_timestamp = NFSD_MONOSEC + 120; + } + nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + NFSUNLOCKCLSTATE(); + if (tlyp != NULL) + free(tlyp, M_NFSLAYOUT); + return (EPERM); + } + *lypp = lyp; + } else + lyp->nfsly_stateid.seqid = stateidp->seqid; + + /* Merge the new list of File Layouts into the list. */ + flp = LIST_FIRST(fhlp); + if (flp != NULL) { + if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ) + nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp); + else + nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp); + } + if (layout_passed_in != 0) + nfsv4_unlock(&lyp->nfsly_lock, 1); + NFSUNLOCKCLSTATE(); + if (tlyp != NULL) + free(tlyp, M_NFSLAYOUT); + return (0); +} + +/* + * Search for a layout by MDS file handle. + * If one is found, it is returned with a refcnt (shared lock) iff + * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is + * returned NULL. + */ +struct nfscllayout * +nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen, + uint64_t off, struct nfsclflayout **retflpp, int *recalledp) +{ + struct nfscllayout *lyp; + mount_t mp; + int error, igotlock; + + mp = clp->nfsc_nmp->nm_mountp; + *recalledp = 0; + *retflpp = NULL; + NFSLOCKCLSTATE(); + lyp = nfscl_findlayout(clp, fhp, fhlen); + if (lyp != NULL) { + if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) { + TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); + TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); + lyp->nfsly_timestamp = NFSD_MONOSEC + 120; + error = nfscl_findlayoutforio(lyp, off, + NFSV4OPEN_ACCESSREAD, retflpp); + if (error == 0) + nfsv4_getref(&lyp->nfsly_lock, NULL, + NFSCLSTATEMUTEXPTR, mp); + else { + do { + igotlock = nfsv4_lock(&lyp->nfsly_lock, + 1, NULL, NFSCLSTATEMUTEXPTR, mp); + } while (igotlock == 0 && + (mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0); + *retflpp = NULL; + } + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + lyp = NULL; + *recalledp = 1; + } + } else { + lyp = NULL; + *recalledp = 1; + } + } + NFSUNLOCKCLSTATE(); + return (lyp); +} + +/* + * Search for a layout by MDS file handle. If one is found that is marked + * "return on close", delete it, since it should now be forgotten. + */ +static void +nfscl_retoncloselayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen) +{ + struct nfscllayout *lyp; + +tryagain: + lyp = nfscl_findlayout(clp, fhp, fhlen); + if (lyp != NULL && (lyp->nfsly_flags & NFSLY_RETONCLOSE) != 0) { + /* + * Wait for outstanding I/O ops to be done. + */ + if (lyp->nfsly_lock.nfslock_usecnt != 0 || + lyp->nfsly_lock.nfslock_lock != 0) { + lyp->nfsly_lock.nfslock_lock |= NFSV4LOCK_WANTED; + (void)mtx_sleep(&lyp->nfsly_lock, + NFSCLSTATEMUTEXPTR, PZERO, "nfslyc", 0); + goto tryagain; + } + nfscl_freelayout(lyp); + } +} + +/* + * Dereference a layout. + */ +void +nfscl_rellayout(struct nfscllayout *lyp, int exclocked) +{ + + NFSLOCKCLSTATE(); + if (exclocked != 0) + nfsv4_unlock(&lyp->nfsly_lock, 0); + else + nfsv4_relref(&lyp->nfsly_lock); + NFSUNLOCKCLSTATE(); +} + +/* + * Search for a devinfo by deviceid. If one is found, return it after + * acquiring a reference count on it. + */ +struct nfscldevinfo * +nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid, + struct nfscldevinfo *dip) +{ + + NFSLOCKCLSTATE(); + if (dip == NULL) + dip = nfscl_finddevinfo(clp, deviceid); + if (dip != NULL) + dip->nfsdi_refcnt++; + NFSUNLOCKCLSTATE(); + return (dip); +} + +/* + * Dereference a devinfo structure. + */ +static void +nfscl_reldevinfo_locked(struct nfscldevinfo *dip) +{ + + dip->nfsdi_refcnt--; + if (dip->nfsdi_refcnt == 0) + wakeup(&dip->nfsdi_refcnt); +} + +/* + * Dereference a devinfo structure. + */ +void +nfscl_reldevinfo(struct nfscldevinfo *dip) +{ + + NFSLOCKCLSTATE(); + nfscl_reldevinfo_locked(dip); + NFSUNLOCKCLSTATE(); +} + +/* + * Find a layout for this file handle. Return NULL upon failure. + */ +static struct nfscllayout * +nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) +{ + struct nfscllayout *lyp; + + LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash) + if (lyp->nfsly_fhlen == fhlen && + !NFSBCMP(lyp->nfsly_fh, fhp, fhlen)) + break; + return (lyp); +} + +/* + * Find a devinfo for this deviceid. Return NULL upon failure. + */ +static struct nfscldevinfo * +nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid) +{ + struct nfscldevinfo *dip; + + LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list) + if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID) + == 0) + break; + return (dip); +} + +/* + * Merge the new file layout list into the main one, maintaining it in + * increasing offset order. + */ +static void +nfscl_mergeflayouts(struct nfsclflayouthead *fhlp, + struct nfsclflayouthead *newfhlp) +{ + struct nfsclflayout *flp, *nflp, *prevflp, *tflp; + + flp = LIST_FIRST(fhlp); + prevflp = NULL; + LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) { + while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) { + prevflp = flp; + flp = LIST_NEXT(flp, nfsfl_list); + } + if (prevflp == NULL) + LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list); + else + LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list); + prevflp = nflp; + } +} + +/* + * Add this nfscldevinfo to the client, if it doesn't already exist. + * This function consumes the structure pointed at by dip, if not NULL. + */ +APPLESTATIC int +nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, + struct nfsclflayout *flp) +{ + struct nfsclclient *clp; + struct nfscldevinfo *tdip; + + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + if (clp == NULL) { + NFSUNLOCKCLSTATE(); + if (dip != NULL) + free(dip, M_NFSDEVINFO); + return (ENODEV); + } + tdip = nfscl_finddevinfo(clp, flp->nfsfl_dev); + if (tdip != NULL) { + tdip->nfsdi_layoutrefs++; + flp->nfsfl_devp = tdip; + nfscl_reldevinfo_locked(tdip); + NFSUNLOCKCLSTATE(); + if (dip != NULL) + free(dip, M_NFSDEVINFO); + return (0); + } + if (dip != NULL) { + LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list); + dip->nfsdi_layoutrefs = 1; + flp->nfsfl_devp = dip; + } + NFSUNLOCKCLSTATE(); + if (dip == NULL) + return (ENODEV); + return (0); +} + +/* + * Free up a layout structure and associated file layout structure(s). + */ +APPLESTATIC void +nfscl_freelayout(struct nfscllayout *layp) +{ + struct nfsclflayout *flp, *nflp; + struct nfsclrecalllayout *rp, *nrp; + + LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) { + LIST_REMOVE(flp, nfsfl_list); + nfscl_freeflayout(flp); + } + LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) { + LIST_REMOVE(flp, nfsfl_list); + nfscl_freeflayout(flp); + } + LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) { + LIST_REMOVE(rp, nfsrecly_list); + free(rp, M_NFSLAYRECALL); + } + nfscl_layoutcnt--; + free(layp, M_NFSLAYOUT); +} + +/* + * Free up a file layout structure. + */ +APPLESTATIC void +nfscl_freeflayout(struct nfsclflayout *flp) +{ + int i; + + for (i = 0; i < flp->nfsfl_fhcnt; i++) + free(flp->nfsfl_fh[i], M_NFSFH); + if (flp->nfsfl_devp != NULL) + flp->nfsfl_devp->nfsdi_layoutrefs--; + free(flp, M_NFSFLAYOUT); +} + +/* + * Free up a file layout devinfo structure. + */ +APPLESTATIC void +nfscl_freedevinfo(struct nfscldevinfo *dip) +{ + + free(dip, M_NFSDEVINFO); +} + +/* + * Mark any layouts that match as recalled. + */ +static int +nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode, + uint64_t off, uint64_t len, uint32_t stateseqid, + struct nfsclrecalllayout *recallp) +{ + struct nfsclrecalllayout *rp, *orp; + + recallp->nfsrecly_recalltype = recalltype; + recallp->nfsrecly_iomode = iomode; + recallp->nfsrecly_stateseqid = stateseqid; + recallp->nfsrecly_off = off; + recallp->nfsrecly_len = len; + /* + * Order the list as file returns first, followed by fsid and any + * returns, both in increasing stateseqid order. + * Note that the seqids wrap around, so 1 is after 0xffffffff. + * (I'm not sure this is correct because I find RFC5661 confusing + * on this, but hopefully it will work ok.) + */ + orp = NULL; + LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { + orp = rp; + if ((recalltype == NFSLAYOUTRETURN_FILE && + (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE || + nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) || + (recalltype != NFSLAYOUTRETURN_FILE && + rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE && + nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) { + LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list); + break; + } + } + if (rp == NULL) { + if (orp == NULL) + LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp, + nfsrecly_list); + else + LIST_INSERT_AFTER(orp, recallp, nfsrecly_list); + } + lyp->nfsly_flags |= NFSLY_RECALL; + return (0); +} + +/* + * Compare the two seqids for ordering. The trick is that the seqids can + * wrap around from 0xffffffff->0, so check for the cases where one + * has wrapped around. + * Return 1 if seqid1 comes before seqid2, 0 otherwise. + */ +static int +nfscl_seq(uint32_t seqid1, uint32_t seqid2) +{ + + if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff) + /* seqid2 has wrapped around. */ + return (0); + if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff) + /* seqid1 has wrapped around. */ + return (1); + if (seqid1 <= seqid2) + return (1); + return (0); +} + +/* + * Do a layout return for each of the recalls. + */ +static void +nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp, + struct ucred *cred, NFSPROC_T *p) +{ + struct nfsclrecalllayout *rp; + nfsv4stateid_t stateid; + + NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER); + LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { + stateid.seqid = rp->nfsrecly_stateseqid; + (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh, + lyp->nfsly_fhlen, 0, NFSLAYOUT_NFSV4_1_FILES, + rp->nfsrecly_iomode, rp->nfsrecly_recalltype, + rp->nfsrecly_off, rp->nfsrecly_len, + &stateid, 0, NULL, cred, p, NULL); + } +} + +/* + * Do the layout commit for a file layout. + */ +static void +nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp, + struct ucred *cred, NFSPROC_T *p) +{ + int error; + + error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh, lyp->nfsly_fhlen, + 0, 0, 0, lyp->nfsly_lastbyte, &lyp->nfsly_stateid, + NFSLAYOUT_NFSV4_1_FILES, 0, NULL, cred, p, NULL); + if (error == NFSERR_NOTSUPP) { + /* If the server doesn't want it, don't bother doing it. */ + NFSLOCKMNT(nmp); + nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT; + NFSUNLOCKMNT(nmp); + } +} + +/* + * Commit all layouts for a file (vnode). + */ +int +nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p) +{ + struct nfsclclient *clp; + struct nfscllayout *lyp; + struct nfsnode *np = VTONFS(vp); + mount_t mp; + struct nfsmount *nmp; + + mp = vnode_mount(vp); + nmp = VFSTONFS(mp); + if (NFSHASNOLAYOUTCOMMIT(nmp)) + return (0); + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + if (clp == NULL) { + NFSUNLOCKCLSTATE(); + return (EPERM); + } + lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); + if (lyp == NULL) { + NFSUNLOCKCLSTATE(); + return (EPERM); + } + nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + NFSUNLOCKCLSTATE(); + return (EPERM); + } +tryagain: + if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { + lyp->nfsly_flags &= ~NFSLY_WRITTEN; + NFSUNLOCKCLSTATE(); + NFSCL_DEBUG(4, "do layoutcommit2\n"); + nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p); + NFSLOCKCLSTATE(); + goto tryagain; + } + nfsv4_relref(&lyp->nfsly_lock); + NFSUNLOCKCLSTATE(); + return (0); +} + diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index 41a6b78aa50a..00dbf906174d 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -80,6 +80,8 @@ extern int nfscl_ticks; extern struct timeval nfsboottime; extern struct nfsstats newnfsstats; extern int nfsrv_useacl; +extern int nfscl_debuglevel; +NFSCLSTATEMUTEX; MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header"); MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct"); @@ -104,7 +106,7 @@ static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, int, u_char *, int, u_char *, int, struct vnode **, struct ucred *, - struct thread *, int, int); + struct thread *, int, int, int); static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, struct sockaddr_storage *, int *, off_t *, struct timeval *); @@ -296,9 +298,11 @@ nfs_statfs(struct mount *mp, struct statfs *sbp) if (!error) error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); + if (error != 0) + NFSCL_DEBUG(2, "statfs=%d\n", error); if (attrflag == 0) { ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, - td->td_ucred, td, &nfsva, NULL); + td->td_ucred, td, &nfsva, NULL, NULL); if (ret) { /* * Just set default values to get things going. @@ -521,7 +525,7 @@ nfs_mountdiskless(char *path, nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen, NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, - NFS_DEFAULT_NEGNAMETIMEO)) != 0) { + NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } @@ -715,8 +719,8 @@ static const char *nfs_opts[] = { "from", "nfs_args", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport", "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec", - "principal", "nfsv4", "gssname", "allgssname", "dirpath", - "nametimeo", "negnametimeo", "nocto", "wcommitsize", + "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion", + "nametimeo", "negnametimeo", "nocto", "pnfs", "wcommitsize", NULL }; /* @@ -763,6 +767,7 @@ nfs_mount(struct mount *mp) char *opt, *name, *secname; int nametimeo = NFS_DEFAULT_NAMETIMEO; int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; + int minvers = 0; int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen; size_t hstlen; @@ -836,6 +841,8 @@ nfs_mount(struct mount *mp) args.flags |= NFSMNT_ALLGSSNAME; if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) args.flags |= NFSMNT_NOCTO; + if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0) + args.flags |= NFSMNT_PNFS; if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readdirsize"); @@ -988,6 +995,16 @@ nfs_mount(struct mount *mp) goto out; } } + if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == + 0) { + ret = sscanf(opt, "%d", &minvers); + if (ret != 1 || minvers < 0 || minvers > 1 || + (args.flags & NFSMNT_NFSV4) == 0) { + vfs_mount_error(mp, "illegal minorversion: %s", opt); + error = EINVAL; + goto out; + } + } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); @@ -1132,7 +1149,7 @@ nfs_mount(struct mount *mp) args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath, dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td, - nametimeo, negnametimeo); + nametimeo, negnametimeo, minvers); out: if (!error) { MNT_ILOCK(mp); @@ -1176,14 +1193,20 @@ static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen, u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp, - struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo) + struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo, + int minvers) { struct nfsmount *nmp; struct nfsnode *np; int error, trycnt, ret; struct nfsvattr nfsva; + struct nfsclclient *clp; + struct nfsclds *dsp, *tdsp; + uint32_t lease; static u_int64_t clval = 0; + NFSCL_DEBUG(3, "in mnt\n"); + clp = NULL; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); printf("%s: MNT_UPDATE is no longer handled here\n", __func__); @@ -1259,6 +1282,10 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000); else nmp->nm_wcommitsize = hibufspace / 10; + if ((argp->flags & NFSMNT_NFSV4) != 0) + nmp->nm_minorvers = minvers; + else + nmp->nm_minorvers = 0; nfs_decode_args(mp, nmp, argp, hst, cred, td); @@ -1306,6 +1333,35 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0))) goto bad; + /* For NFSv4.1, get the clientid now. */ + if (nmp->nm_minorvers > 0) { + NFSCL_DEBUG(3, "at getcl\n"); + error = nfscl_getcl(mp, cred, td, 0, &clp); + NFSCL_DEBUG(3, "aft getcl=%d\n", error); + if (error != 0) + goto bad; + } + + if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && + nmp->nm_dirpathlen > 0) { + NFSCL_DEBUG(3, "in dirp\n"); + /* + * If the fhsize on the mount point == 0 for V4, the mount + * path needs to be looked up. + */ + trycnt = 3; + do { + error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), + cred, td); + NFSCL_DEBUG(3, "aft dirp=%d\n", error); + if (error) + (void) nfs_catnap(PZERO, error, "nfsgetdirp"); + } while (error && --trycnt > 0); + if (error) { + error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); + goto bad; + } + } /* * A reference count is needed on the nfsnode representing the @@ -1315,24 +1371,6 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, * this problem, because one can identify root inodes by their * number == ROOTINO (2). */ - if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && - nmp->nm_dirpathlen > 0) { - /* - * If the fhsize on the mount point == 0 for V4, the mount - * path needs to be looked up. - */ - trycnt = 3; - do { - error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), - cred, td); - if (error) - (void) nfs_catnap(PZERO, error, "nfsgetdirp"); - } while (error && --trycnt > 0); - if (error) { - error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); - goto bad; - } - } if (nmp->nm_fhsize > 0) { /* * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set @@ -1352,7 +1390,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, * (*vpp)->v_type with the correct value. */ ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, - cred, td, &nfsva, NULL); + cred, td, &nfsva, NULL, &lease); if (ret) { /* * Just set default values to get things going. @@ -1367,8 +1405,25 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; + lease = 60; } (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1); + if (nmp->nm_minorvers > 0) { + NFSCL_DEBUG(3, "lease=%d\n", (int)lease); + NFSLOCKCLSTATE(); + clp->nfsc_renew = NFSCL_RENEW(lease); + clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; + clp->nfsc_clientidrev++; + if (clp->nfsc_clientidrev == 0) + clp->nfsc_clientidrev++; + NFSUNLOCKCLSTATE(); + /* + * Mount will succeed, so the renew thread can be + * started now. + */ + nfscl_start_renewthread(clp); + nfscl_clientrelease(clp); + } if (argp->flags & NFSMNT_NFSV3) ncl_fsinfo(nmp, *vpp, cred, td); @@ -1390,10 +1445,20 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, error = EIO; bad: + if (clp != NULL) + nfscl_clientrelease(clp); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); + if (nmp->nm_clp != NULL) { + NFSLOCKCLSTATE(); + LIST_REMOVE(nmp->nm_clp, nfsc_list); + NFSUNLOCKCLSTATE(); + free(nmp->nm_clp, M_NFSCLCLIENT); + } + TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) + nfscl_freenfsclds(dsp); FREE(nmp, M_NEWNFSMNT); FREE(nam, M_SONAME); return (error); @@ -1408,6 +1473,7 @@ nfs_unmount(struct mount *mp, int mntflags) struct thread *td; struct nfsmount *nmp; int error, flags = 0, trycnt = 0; + struct nfsclds *dsp, *tdsp; td = curthread; @@ -1448,6 +1514,8 @@ nfs_unmount(struct mount *mp, int mntflags) mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); + TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) + nfscl_freenfsclds(dsp); FREE(nmp, M_NEWNFSMNT); out: return (error); diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 12e018c7ad36..3a898f26a272 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -103,6 +103,7 @@ uint32_t nfscl_accesscache_load_done_id; extern struct nfsstats newnfsstats; extern int nfsrv_useacl; +extern int nfscl_debuglevel; MALLOC_DECLARE(M_NEWNFSREQ); /* @@ -606,6 +607,10 @@ nfs_open(struct vop_open_args *ap) np->n_directio_opens++; } + /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ + if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) + np->n_flag |= NWRITEOPENED; + /* * If this is an open for writing, capture a reference to the * credentials, so they can be used by ncl_putpages(). Using @@ -619,6 +624,7 @@ nfs_open(struct vop_open_args *ap) } else cred = NULL; mtx_unlock(&np->n_mtx); + if (cred != NULL) crfree(cred); vnode_create_vobject(vp, vattr.va_size, ap->a_td); @@ -1362,9 +1368,18 @@ ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; + struct nfsmount *nmp; - error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, - NULL); + nmp = VFSTONFS(vnode_mount(vp)); + error = EIO; + attrflag = 0; + if (NFSHASPNFS(nmp)) + error = nfscl_doiods(vp, uiop, NULL, NULL, + NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td); + NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); + if (error != 0) + error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, + &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) @@ -1383,10 +1398,20 @@ ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit, int called_from_strategy) { struct nfsvattr nfsva; - int error = 0, attrflag, ret; + int error, attrflag, ret; + struct nfsmount *nmp; - error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, - uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); + nmp = VFSTONFS(vnode_mount(vp)); + error = EIO; + attrflag = 0; + if (NFSHASPNFS(nmp)) + error = nfscl_doiods(vp, uiop, iomode, must_commit, + NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td); + NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); + if (error != 0) + error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, + uiop->uio_td, &nfsva, &attrflag, NULL, + called_from_strategy); if (attrflag) { if (VTONFS(vp)->n_flag & ND_NFSV4) ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, @@ -2534,7 +2559,6 @@ ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct nfsvattr nfsva; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error, attrflag; - u_char verf[NFSX_VERF]; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { @@ -2542,21 +2566,13 @@ ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, return (0); } mtx_unlock(&nmp->nm_mtx); - error = nfsrpc_commit(vp, offset, cnt, cred, td, verf, &nfsva, + error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, &attrflag, NULL); - if (!error) { - mtx_lock(&nmp->nm_mtx); - if (NFSBCMP((caddr_t)nmp->nm_verf, verf, NFSX_VERF)) { - NFSBCOPY(verf, (caddr_t)nmp->nm_verf, NFSX_VERF); - error = NFSERR_STALEWRITEVERF; - } - mtx_unlock(&nmp->nm_mtx); - if (!error && attrflag) - (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, - 0, 1); - } else if (NFS_ISV4(vp)) { + if (attrflag != 0) + (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, + 0, 1); + if (error != 0 && NFS_ISV4(vp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); - } return (error); } @@ -2928,6 +2944,8 @@ ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td, mtx_unlock(&np->n_mtx); } else BO_UNLOCK(bo); + if (NFSHASPNFS(nmp)) + nfscl_layoutcommit(vp, td); mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; diff --git a/sys/fs/nfsclient/nfsmount.h b/sys/fs/nfsclient/nfsmount.h index 8068c28ace2b..f5b56bfef62c 100644 --- a/sys/fs/nfsclient/nfsmount.h +++ b/sys/fs/nfsclient/nfsmount.h @@ -70,10 +70,12 @@ struct nfsmount { int nm_negnametimeo; /* timeout for -ve entries (sec) */ /* Newnfs additions */ + TAILQ_HEAD(, nfsclds) nm_sess; /* Session(s) for NFSv4.1. */ struct nfsclclient *nm_clp; uid_t nm_uid; /* Uid for SetClientID etc. */ u_int64_t nm_clval; /* identifies which clientid */ u_int64_t nm_fsid[2]; /* NFSv4 fsid */ + int nm_minorvers; /* Minor version # for NFSv4 */ u_int16_t nm_krbnamelen; /* Krb5 host principal, if any */ u_int16_t nm_dirpathlen; /* and mount dirpath, for V4 */ u_int16_t nm_srvkrbnamelen; /* and the server's target name */ @@ -107,6 +109,12 @@ struct nfsmount { */ #define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data)) +/* + * Get a pointer to the MDS session, which is always the first element + * in the list. + */ +#define NFSMNT_MDSSESSION(m) (&(TAILQ_FIRST(&((m)->nm_sess))->nfsclds_sess)) + #ifndef NFS_DEFAULT_NAMETIMEO #define NFS_DEFAULT_NAMETIMEO 60 #endif diff --git a/sys/fs/nfsclient/nfsnode.h b/sys/fs/nfsclient/nfsnode.h index 209945a2d6a6..bbb67d713046 100644 --- a/sys/fs/nfsclient/nfsnode.h +++ b/sys/fs/nfsclient/nfsnode.h @@ -155,6 +155,8 @@ struct nfsnode { #define NREMOVEWANT 0x00004000 /* Want notification that remove is done */ #define NLOCK 0x00008000 /* Sleep lock the node */ #define NLOCKWANT 0x00010000 /* Want the sleep lock */ +#define NNOLAYOUT 0x00020000 /* Can't get a layout for this file */ +#define NWRITEOPENED 0x00040000 /* Has been opened for writing */ /* * Convert between nfsnode pointers and vnode pointers diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index 515f8111d423..8b2f8b87782a 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -3780,7 +3780,8 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, newnfs_sndunlock(&clp->lc_req.nr_lock); if (!error) { error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, - NULL, cred, clp->lc_program, NFSV4_CBVERS, NULL, 1, NULL); + NULL, cred, clp->lc_program, NFSV4_CBVERS, NULL, 1, NULL, + NULL); } NFSFREECRED(cred); diff --git a/sys/nfsclient/nfsargs.h b/sys/nfsclient/nfsargs.h index f71a5550ed5c..9cd89459ed42 100644 --- a/sys/nfsclient/nfsargs.h +++ b/sys/nfsclient/nfsargs.h @@ -98,5 +98,6 @@ struct nfs_args { #define NFSMNT_ALLGSSNAME 0x08000000 /* Use principal for all accesses */ #define NFSMNT_STRICT3530 0x10000000 /* Adhere strictly to RFC3530 */ #define NFSMNT_NOCTO 0x20000000 /* Don't flush attrcache on open */ +#define NFSMNT_PNFS 0x40000000 /* Enable pNFS support */ #endif