From c057a378180e630854dae599324be358046ce244 Mon Sep 17 00:00:00 2001 From: Rick Macklem Date: Thu, 12 Dec 2019 23:22:55 +0000 Subject: [PATCH] Add support for NFSv4.2 to the NFS client and server. This patch adds support for NFSv4.2 (RFC-7862) and Extended Attributes (RFC-8276) to the NFS client and server. NFSv4.2 is comprised of several optional features that can be supported in addition to NFSv4.1. This patch adds the following optional features: - posix_fadvise(POSIX_FADV_WILLNEED/POSIX_FADV_DONTNEED) - posix_fallocate() - intra server file range copying via the copy_file_range(2) syscall --> Avoiding data tranfer over the wire to/from the NFS client. - lseek(SEEK_DATA/SEEK_HOLE) - Extended attribute syscalls for "user" namespace attributes as defined by RFC-8276. Although this patch is fairly large, it should not affect support for the other versions of NFS. However it does add two new sysctls that allow a sysadmin to limit which minor versions of NFSv4 a server supports, allowing a sysadmin to disable NFSv4.2. Unfortunately, when the NFS stats structure was last revised, it was assumed that there would be no additional operations added beyond what was specified in RFC-7862. However RFC-8276 did add additional operations, forcing the NFS stats structure to revised again. It now has extra unused entries in all arrays, so that future extensions to NFSv4.2 can be accomodated without revising this structure again. A future commit will update nfsstat(1) to report counts for the new NFSv4.2 specific operations/procedures. This patch affects the internal interface between the nfscommon, nfscl and nfsd modules and, as such, they all must be upgraded simultaneously. I will do a version bump (although arguably not needed), due to this. This code has survived a "make universe" but has not been built with a recent GCC. If you encounter build problems, please email me. Relnotes: yes --- sys/fs/nfs/nfs.h | 2 + sys/fs/nfs/nfs_commonport.c | 143 +++- sys/fs/nfs/nfs_commonsubs.c | 81 ++- sys/fs/nfs/nfs_var.h | 60 +- sys/fs/nfs/nfsclstate.h | 5 +- sys/fs/nfs/nfsport.h | 81 ++- sys/fs/nfs/nfsproto.h | 2 +- sys/fs/nfsclient/nfs_clrpcops.c | 934 ++++++++++++++++++++++++-- sys/fs/nfsclient/nfs_clstate.c | 12 +- sys/fs/nfsclient/nfs_clvfsops.c | 2 +- sys/fs/nfsclient/nfs_clvnops.c | 667 +++++++++++++++++++ sys/fs/nfsclient/nfsmount.h | 8 + sys/fs/nfsserver/nfs_nfsdkrpc.c | 2 +- sys/fs/nfsserver/nfs_nfsdport.c | 727 ++++++++++++++++++--- sys/fs/nfsserver/nfs_nfsdserv.c | 1010 ++++++++++++++++++++++++++++- sys/fs/nfsserver/nfs_nfsdsocket.c | 79 ++- sys/fs/nfsserver/nfs_nfsdstate.c | 36 +- sys/fs/nfsserver/nfs_nfsdsubs.c | 4 +- 18 files changed, 3655 insertions(+), 200 deletions(-) diff --git a/sys/fs/nfs/nfs.h b/sys/fs/nfs/nfs.h index 2c811b7150fc..87f5716ed5bd 100644 --- a/sys/fs/nfs/nfs.h +++ b/sys/fs/nfs/nfs.h @@ -668,6 +668,8 @@ struct nfsrv_descript { uint32_t *nd_sequence; /* Sequence Op. ptr */ nfsv4stateid_t nd_curstateid; /* Current StateID */ nfsv4stateid_t nd_savedcurstateid; /* Saved Current StateID */ + uint32_t nd_maxreq; /* Max. request (session). */ + uint32_t nd_maxresp; /* Max. reply (session). */ }; #define nd_princlen nd_gssnamelen diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index 1a1e64cd823b..5f94ab31e02d 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -80,6 +80,7 @@ int nfs_pnfsio(task_fn_t *, void *); static int nfs_realign_test; static int nfs_realign_count; static struct ext_nfsstats oldnfsstats; +static struct nfsstatsov1 nfsstatsov1; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, @@ -580,11 +581,143 @@ nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) } else { error = copyin(uap->argp, &nfsstatver, sizeof(nfsstatver)); - if (error == 0 && nfsstatver.vers != NFSSTATS_V1) - error = EPERM; - if (error == 0) - error = copyout(&nfsstatsv1, uap->argp, - sizeof (nfsstatsv1)); + if (error == 0) { + if (nfsstatver.vers == NFSSTATS_OV1) { + /* Copy nfsstatsv1 to nfsstatsov1. */ + nfsstatsov1.attrcache_hits = + nfsstatsv1.attrcache_hits; + nfsstatsov1.attrcache_misses = + nfsstatsv1.attrcache_misses; + nfsstatsov1.lookupcache_hits = + nfsstatsv1.lookupcache_hits; + nfsstatsov1.lookupcache_misses = + nfsstatsv1.lookupcache_misses; + nfsstatsov1.direofcache_hits = + nfsstatsv1.direofcache_hits; + nfsstatsov1.direofcache_misses = + nfsstatsv1.direofcache_misses; + nfsstatsov1.accesscache_hits = + nfsstatsv1.accesscache_hits; + nfsstatsov1.accesscache_misses = + nfsstatsv1.accesscache_misses; + nfsstatsov1.biocache_reads = + nfsstatsv1.biocache_reads; + nfsstatsov1.read_bios = + nfsstatsv1.read_bios; + nfsstatsov1.read_physios = + nfsstatsv1.read_physios; + nfsstatsov1.biocache_writes = + nfsstatsv1.biocache_writes; + nfsstatsov1.write_bios = + nfsstatsv1.write_bios; + nfsstatsov1.write_physios = + nfsstatsv1.write_physios; + nfsstatsov1.biocache_readlinks = + nfsstatsv1.biocache_readlinks; + nfsstatsov1.readlink_bios = + nfsstatsv1.readlink_bios; + nfsstatsov1.biocache_readdirs = + nfsstatsv1.biocache_readdirs; + nfsstatsov1.readdir_bios = + nfsstatsv1.readdir_bios; + for (i = 0; i < NFSV42_NPROCS; i++) + nfsstatsov1.rpccnt[i] = + nfsstatsv1.rpccnt[i]; + nfsstatsov1.rpcretries = + nfsstatsv1.rpcretries; + for (i = 0; i < NFSV42_PURENOPS; i++) + nfsstatsov1.srvrpccnt[i] = + nfsstatsv1.srvrpccnt[i]; + for (i = NFSV42_NOPS, + j = NFSV42_PURENOPS; + i < NFSV42_NOPS + NFSV4OP_FAKENOPS; + i++, j++) + nfsstatsov1.srvrpccnt[j] = + nfsstatsv1.srvrpccnt[i]; + nfsstatsov1.srvrpc_errs = + nfsstatsv1.srvrpc_errs; + nfsstatsov1.srv_errs = + nfsstatsv1.srv_errs; + nfsstatsov1.rpcrequests = + nfsstatsv1.rpcrequests; + nfsstatsov1.rpctimeouts = + nfsstatsv1.rpctimeouts; + nfsstatsov1.rpcunexpected = + nfsstatsv1.rpcunexpected; + nfsstatsov1.rpcinvalid = + nfsstatsv1.rpcinvalid; + nfsstatsov1.srvcache_inproghits = + nfsstatsv1.srvcache_inproghits; + nfsstatsov1.srvcache_idemdonehits = + nfsstatsv1.srvcache_idemdonehits; + nfsstatsov1.srvcache_nonidemdonehits = + nfsstatsv1.srvcache_nonidemdonehits; + nfsstatsov1.srvcache_misses = + nfsstatsv1.srvcache_misses; + nfsstatsov1.srvcache_tcppeak = + nfsstatsv1.srvcache_tcppeak; + nfsstatsov1.srvcache_size = + nfsstatsv1.srvcache_size; + nfsstatsov1.srvclients = + nfsstatsv1.srvclients; + nfsstatsov1.srvopenowners = + nfsstatsv1.srvopenowners; + nfsstatsov1.srvopens = + nfsstatsv1.srvopens; + nfsstatsov1.srvlockowners = + nfsstatsv1.srvlockowners; + nfsstatsov1.srvlocks = + nfsstatsv1.srvlocks; + nfsstatsov1.srvdelegates = + nfsstatsv1.srvdelegates; + for (i = 0; i < NFSV42_CBNOPS; i++) + nfsstatsov1.cbrpccnt[i] = + nfsstatsv1.cbrpccnt[i]; + nfsstatsov1.clopenowners = + nfsstatsv1.clopenowners; + nfsstatsov1.clopens = + nfsstatsv1.clopens; + nfsstatsov1.cllockowners = + nfsstatsv1.cllockowners; + nfsstatsov1.cllocks = + nfsstatsv1.cllocks; + nfsstatsov1.cldelegates = + nfsstatsv1.cldelegates; + nfsstatsov1.cllocalopenowners = + nfsstatsv1.cllocalopenowners; + nfsstatsov1.cllocalopens = + nfsstatsv1.cllocalopens; + nfsstatsov1.cllocallockowners = + nfsstatsv1.cllocallockowners; + nfsstatsov1.cllocallocks = + nfsstatsv1.cllocallocks; + nfsstatsov1.srvstartcnt = + nfsstatsv1.srvstartcnt; + nfsstatsov1.srvdonecnt = + nfsstatsv1.srvdonecnt; + for (i = NFSV42_NOPS, + j = NFSV42_PURENOPS; + i < NFSV42_NOPS + NFSV4OP_FAKENOPS; + i++, j++) { + nfsstatsov1.srvbytes[j] = + nfsstatsv1.srvbytes[i]; + nfsstatsov1.srvops[j] = + nfsstatsv1.srvops[i]; + nfsstatsov1.srvduration[j] = + nfsstatsv1.srvduration[i]; + } + nfsstatsov1.busyfrom = + nfsstatsv1.busyfrom; + nfsstatsov1.busyfrom = + nfsstatsv1.busyfrom; + error = copyout(&nfsstatsov1, uap->argp, + sizeof(nfsstatsov1)); + } else if (nfsstatver.vers != NFSSTATS_V1) + error = EPERM; + else + error = copyout(&nfsstatsv1, uap->argp, + sizeof(nfsstatsv1)); + } } if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 0f43cebc5049..9d6511dfcbfd 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -47,6 +47,8 @@ __FBSDID("$FreeBSD$"); #include +#include + #include /* @@ -91,6 +93,10 @@ int nfsrv_maxpnfsmirror = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsmirror, CTLFLAG_RD, &nfsrv_maxpnfsmirror, 0, "Mirror level for pNFS service"); +int nfs_maxcopyrange = 10 * 1024 * 1024; +SYSCTL_INT(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW, + &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable"); + /* * This array of structures indicates, for V4: * retfh - which of 3 types of calling args are used @@ -108,7 +114,7 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsmirror, CTLFLAG_RD, * non-idempotent Ops. * Define it here, since it is used by both the client and server. */ -struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = { +struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS] = { { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ @@ -168,6 +174,23 @@ struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = { { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Want Delegation */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy ClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Reclaim Complete */ + { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Allocate */ + { 2, 1, 1, 0, LK_SHARED, 1, 0 }, /* Copy */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Copy Notify */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Deallocate */ + { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* IO Advise */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Error */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Stats */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Offload Cancel */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Offload Status */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Read Plus */ + { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* Seek */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Write Same */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Clone */ + { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Getxattr */ + { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Setxattr */ + { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Listxattrs */ + { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Removexattr */ }; #endif /* !APPLEKEXT */ @@ -192,9 +215,10 @@ static struct nfsrv_lughash *nfsgroupnamehash; * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ -static int nfs_bigreply[NFSV41_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, +static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 1 }; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); @@ -211,7 +235,7 @@ static struct { int opcnt; const u_char *tag; int taglen; -} nfsv4_opmap[NFSV41_NPROCS] = { +} nfsv4_opmap[NFSV42_NPROCS] = { { 0, 1, "Null", 4 }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_SETATTR, 2, "Setattr", 7, }, @@ -268,15 +292,24 @@ static struct { { NFSV4OP_COMMIT, 1, "CommitDS", 8, }, { NFSV4OP_OPEN, 3, "OpenLayoutGet", 13, }, { NFSV4OP_OPEN, 8, "CreateLayGet", 12, }, + { NFSV4OP_IOADVISE, 1, "Advise", 6, }, + { NFSV4OP_ALLOCATE, 2, "Allocate", 8, }, + { NFSV4OP_SAVEFH, 5, "Copy", 4, }, + { NFSV4OP_SEEK, 2, "Seek", 4, }, + { NFSV4OP_SEEK, 1, "SeekDS", 6, }, + { NFSV4OP_GETXATTR, 2, "Getxattr", 8, }, + { NFSV4OP_SETXATTR, 2, "Setxattr", 8, }, + { NFSV4OP_REMOVEXATTR, 2, "Rmxattr", 7, }, + { NFSV4OP_LISTXATTRS, 2, "Listxattr", 9, }, }; /* * NFS RPCS that have large request message size. */ -static int nfs_bigrequest[NFSV41_NPROCS] = { +static int nfs_bigrequest[NFSV42_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }; /* @@ -301,13 +334,17 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (minorvers == NFSV41_MINORVERSION) nd->nd_flag |= ND_NFSV41; + else if (minorvers == NFSV42_MINORVERSION) + nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); } else if (vers == NFS_VER3) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else { if (NFSHASNFSV4(nmp)) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; - if (NFSHASNFSV4N(nmp)) + if (nmp->nm_minorvers == 1) nd->nd_flag |= ND_NFSV41; + else if (nmp->nm_minorvers == 2) + nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); } else if (NFSHASNFSV3(nmp)) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else @@ -356,7 +393,9 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, (void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag, nfsv4_opmap[procnum].taglen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); - if ((nd->nd_flag & ND_NFSV41) != 0) + if ((nd->nd_flag & ND_NFSV42) != 0) + *tl++ = txdr_unsigned(NFSV42_MINORVERSION); + else if ((nd->nd_flag & ND_NFSV41) != 0) *tl++ = txdr_unsigned(NFSV41_MINORVERSION); else *tl++ = txdr_unsigned(NFSV4_MINORVERSION); @@ -409,7 +448,7 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } - if (procnum < NFSV41_NPROCS) + if (procnum < NFSV42_NPROCS) NFSINCRGLOBAL(nfsstatsv1.rpccnt[procnum]); } @@ -2449,6 +2488,8 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, struct nfsfsinfo fsinf; struct timespec temptime; NFSACL_T *aclp, *naclp = NULL; + size_t atsiz; + bool xattrsupp; #ifdef QUOTA struct dqblk dqb; uid_t savuid; @@ -2523,6 +2564,18 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, } } + /* Check to see if Extended Attributes are supported. */ + xattrsupp = false; + if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_XATTRSUPPORT)) { + if (NFSVOPLOCK(vp, LK_SHARED) == 0) { + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, + "xxx", NULL, &atsiz, cred, p); + NFSVOPUNLOCK(vp, 0); + if (error != EOPNOTSUPP) + xattrsupp = true; + } + } + /* * Put out the attribute bitmap for the ones being filled in * and get the field for the number of attributes returned. @@ -2972,6 +3025,14 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, *tl = txdr_unsigned(NFS_SRVMAXIO); retnum += NFSX_UNSIGNED; break; + case NFSATTRBIT_XATTRSUPPORT: + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + if (xattrsupp) + *tl = newnfs_true; + else + *tl = newnfs_false; + retnum += NFSX_UNSIGNED; + break; default: printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos); } @@ -4629,6 +4690,8 @@ nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, error = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); + nd->nd_maxreq = sep->nfsess_maxreq; + nd->nd_maxresp = sep->nfsess_maxresp; /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index ea2af575cfb8..829448d88233 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -128,7 +128,8 @@ void nfsrv_setupstable(NFSPROC_T *); void nfsrv_updatestable(NFSPROC_T *); void nfsrv_writestable(u_char *, int, int, NFSPROC_T *); void nfsrv_throwawayopens(NFSPROC_T *); -int nfsrv_checkremove(vnode_t, int, NFSPROC_T *); +int nfsrv_checkremove(vnode_t, int, struct nfsrv_descript *, nfsquad_t, + NFSPROC_T *); void nfsd_recalldelegation(vnode_t, NFSPROC_T *); void nfsd_disabledelegation(vnode_t, NFSPROC_T *); int nfsrv_checksetattr(vnode_t, struct nfsrv_descript *, @@ -161,6 +162,7 @@ void nfsrv_freealllayoutsanddevids(void); void nfsrv_freefilelayouts(fhandle_t *); int nfsrv_deldsserver(int, char *, NFSPROC_T *); struct nfsdevice *nfsrv_deldsnmp(int, struct nfsmount *, NFSPROC_T *); +int nfsrv_delds(char *, NFSPROC_T *); int nfsrv_createdevids(struct nfsd_nfsd_args *, NFSPROC_T *); int nfsrv_checkdsattr(vnode_t, NFSPROC_T *); int nfsrv_copymr(vnode_t, vnode_t, vnode_t, struct nfsdevice *, @@ -268,8 +270,28 @@ int nfsrvd_layoutcommit(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); int nfsrvd_layoutreturn(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); +int nfsrvd_ioadvise(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_layouterror(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_layoutstats(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); int nfsrvd_teststateid(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); +int nfsrvd_allocate(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_copy_file_range(struct nfsrv_descript *, int, + vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *); +int nfsrvd_seek(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_getxattr(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_setxattr(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_rmxattr(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_listxattr(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); int nfsrvd_notsupp(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); @@ -501,10 +523,11 @@ int nfsrpc_delegreturn(struct nfscldeleg *, struct ucred *, int nfsrpc_getacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); int nfsrpc_setacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); int nfsrpc_exchangeid(struct nfsmount *, struct nfsclclient *, - struct nfssockreq *, uint32_t, struct nfsclds **, struct ucred *, + struct nfssockreq *, int, uint32_t, struct nfsclds **, struct ucred *, NFSPROC_T *); int nfsrpc_createsession(struct nfsmount *, struct nfsclsession *, - struct nfssockreq *, uint32_t, int, struct ucred *, NFSPROC_T *); + struct nfssockreq *, struct nfsclds *, uint32_t, int, struct ucred *, + NFSPROC_T *); int nfsrpc_destroysession(struct nfsmount *, struct nfsclclient *, struct ucred *, NFSPROC_T *); int nfsrpc_destroyclient(struct nfsmount *, struct nfsclclient *, @@ -518,11 +541,27 @@ int nfsrpc_layoutreturn(struct nfsmount *, uint8_t *, int, int, int, uint32_t, int, uint64_t, uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, uint32_t, uint32_t, char *); int nfsrpc_reclaimcomplete(struct nfsmount *, struct ucred *, NFSPROC_T *); +int nfsrpc_advise(vnode_t, off_t, uint64_t, int, struct ucred *, NFSPROC_T *); int nfscl_doiods(vnode_t, struct uio *, int *, int *, uint32_t, int, struct ucred *, NFSPROC_T *); int nfscl_findlayoutforio(struct nfscllayout *, uint64_t, uint32_t, struct nfsclflayout **); void nfscl_freenfsclds(struct nfsclds *); +int nfsrpc_allocate(vnode_t, off_t, off_t, struct nfsvattr *, int *, + struct ucred *, NFSPROC_T *, void *); +int nfsrpc_copy_file_range(vnode_t, off_t *, vnode_t, off_t *, size_t *, + unsigned int, int *, struct nfsvattr *, int *, struct nfsvattr *, + struct ucred *, bool, bool *); +int nfsrpc_seek(vnode_t, off_t *, bool *, int, struct ucred *, + struct nfsvattr *, int *); +int nfsrpc_getextattr(vnode_t, const char *, struct uio *, ssize_t *, + struct nfsvattr *, int *, struct ucred *, NFSPROC_T *); +int nfsrpc_setextattr(vnode_t, const char *, struct uio *, struct nfsvattr *, + int *, struct ucred *, NFSPROC_T *); +int nfsrpc_listextattr(vnode_t, uint64_t *, struct uio *, size_t *, bool *, + struct nfsvattr *, int *, struct ucred *, NFSPROC_T *); +int nfsrpc_rmextattr(vnode_t, const char *, struct nfsvattr *, int *, + struct ucred *, NFSPROC_T *); /* nfs_clstate.c */ int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int, @@ -644,8 +683,8 @@ int nfsvno_readlink(vnode_t, struct ucred *, NFSPROC_T *, mbuf_t *, mbuf_t *, int *); int nfsvno_read(vnode_t, off_t, int, struct ucred *, NFSPROC_T *, mbuf_t *, mbuf_t *); -int nfsvno_write(vnode_t, off_t, int, int, int *, mbuf_t, - char *, struct ucred *, NFSPROC_T *); +int nfsvno_write(vnode_t, off_t, int, int *, mbuf_t, char *, struct ucred *, + NFSPROC_T *); int nfsvno_createsub(struct nfsrv_descript *, struct nameidata *, vnode_t *, struct nfsvattr *, int *, int32_t *, NFSDEV_T, struct nfsexstuff *); @@ -704,6 +743,17 @@ int nfsrv_dscreate(struct vnode *, struct vattr *, struct vattr *, int nfsrv_updatemdsattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); void nfsrv_killrpcs(struct nfsmount *); int nfsrv_setacl(struct vnode *, NFSACL_T *, struct ucred *, NFSPROC_T *); +int nfsvno_seek(struct nfsrv_descript *, struct vnode *, u_long, off_t *, int, + bool *, struct ucred *, NFSPROC_T *); +int nfsvno_allocate(struct vnode *, off_t, off_t, struct ucred *, NFSPROC_T *); +int nfsvno_getxattr(struct vnode *, char *, uint32_t, struct ucred *, + struct thread *, struct mbuf **, struct mbuf **, int *); +int nfsvno_setxattr(struct vnode *, char *, int, struct mbuf *, char *, + struct ucred *, struct thread *); +int nfsvno_rmxattr(struct nfsrv_descript *, struct vnode *, char *, + struct ucred *, struct thread *); +int nfsvno_listxattr(struct vnode *, uint64_t, struct ucred *, struct thread *, + u_char **, uint32_t *, bool *); /* nfs_commonkrpc.c */ int newnfs_nmcancelreqs(struct nfsmount *); diff --git a/sys/fs/nfs/nfsclstate.h b/sys/fs/nfs/nfsclstate.h index 2ada4bfc5540..e17be74c5581 100644 --- a/sys/fs/nfs/nfsclstate.h +++ b/sys/fs/nfs/nfsclstate.h @@ -64,6 +64,8 @@ struct nfsclsession { uint64_t nfsess_slots; uint32_t nfsess_sequenceid; uint32_t nfsess_maxcache; /* Max size for cached reply. */ + uint32_t nfsess_maxreq; /* Max request size. */ + uint32_t nfsess_maxresp; /* Max reply size. */ uint16_t nfsess_foreslots; uint16_t nfsess_backslots; uint8_t nfsess_sessionid[NFSX_V4SESSIONID]; @@ -72,7 +74,7 @@ struct nfsclsession { /* * This structure holds the session, clientid and related information - * needed for an NFSv4.1 Meta Data Server (MDS) or Data Server (DS). + * needed for an NFSv4.1 or NFSv4.2 Meta Data Server (MDS) or Data Server (DS). * It is malloc'd to the correct length. */ struct nfsclds { @@ -95,6 +97,7 @@ struct nfsclds { #define NFSCLDS_DS 0x0004 #define NFSCLDS_CLOSED 0x0008 #define NFSCLDS_SAMECONN 0x0010 +#define NFSCLDS_MINORV2 0x0020 struct nfsclclient { LIST_ENTRY(nfsclclient) nfsc_list; diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index 3b1309e57220..5cd4ca023911 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -289,7 +289,7 @@ /* * Must be one more than the last NFSv4.2 op#. */ -#define NFSV42_NOPS 72 +#define NFSV42_NOPS 76 /* Quirky case if the illegal op code */ #define NFSV4OP_OPILLEGAL 10044 @@ -423,10 +423,10 @@ #endif /* NFS_V3NPROCS */ /* - * New stats structure. + * Newest stats structure. * The vers field will be set to NFSSTATS_V1 by the caller. */ -#define NFSSTATS_V1 1 +#define NFSSTATS_V1 2 struct nfsstatsv1 { int vers; /* Set to version requested by caller. */ uint64_t attrcache_hits; @@ -447,9 +447,74 @@ struct nfsstatsv1 { uint64_t readlink_bios; uint64_t biocache_readdirs; uint64_t readdir_bios; - uint64_t rpccnt[NFSV41_NPROCS + 13]; + uint64_t rpccnt[NFSV42_NPROCS + 15]; uint64_t rpcretries; - uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + uint64_t srvrpc_errs; + uint64_t srv_errs; + uint64_t rpcrequests; + uint64_t rpctimeouts; + uint64_t rpcunexpected; + uint64_t rpcinvalid; + uint64_t srvcache_inproghits; + uint64_t srvcache_idemdonehits; + uint64_t srvcache_nonidemdonehits; + uint64_t srvcache_misses; + uint64_t srvcache_tcppeak; + int srvcache_size; /* Updated by atomic_xx_int(). */ + uint64_t srvclients; + uint64_t srvopenowners; + uint64_t srvopens; + uint64_t srvlockowners; + uint64_t srvlocks; + uint64_t srvdelegates; + uint64_t cbrpccnt[NFSV42_CBNOPS + 10]; + uint64_t clopenowners; + uint64_t clopens; + uint64_t cllockowners; + uint64_t cllocks; + uint64_t cldelegates; + uint64_t cllocalopenowners; + uint64_t cllocalopens; + uint64_t cllocallockowners; + uint64_t cllocallocks; + uint64_t srvstartcnt; + uint64_t srvdonecnt; + uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + struct bintime busyfrom; + struct bintime busytime; +}; + +/* + * Newer stats structure. + * The vers field will be set to NFSSTATS_OV1 by the caller. + */ +#define NFSSTATS_OV1 1 +struct nfsstatsov1 { + int vers; /* Set to version requested by caller. */ + uint64_t attrcache_hits; + uint64_t attrcache_misses; + uint64_t lookupcache_hits; + uint64_t lookupcache_misses; + uint64_t direofcache_hits; + uint64_t direofcache_misses; + uint64_t accesscache_hits; + uint64_t accesscache_misses; + uint64_t biocache_reads; + uint64_t read_bios; + uint64_t read_physios; + uint64_t biocache_writes; + uint64_t write_bios; + uint64_t write_physios; + uint64_t biocache_readlinks; + uint64_t readlink_bios; + uint64_t biocache_readdirs; + uint64_t readdir_bios; + uint64_t rpccnt[NFSV42_NPROCS + 4]; + uint64_t rpcretries; + uint64_t srvrpccnt[NFSV42_PURENOPS + NFSV4OP_FAKENOPS]; uint64_t srvrpc_errs; uint64_t srv_errs; uint64_t rpcrequests; @@ -480,9 +545,9 @@ struct nfsstatsv1 { uint64_t cllocallocks; uint64_t srvstartcnt; uint64_t srvdonecnt; - uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS]; - uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS]; - struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + uint64_t srvbytes[NFSV42_PURENOPS + NFSV4OP_FAKENOPS]; + uint64_t srvops[NFSV42_PURENOPS + NFSV4OP_FAKENOPS]; + struct bintime srvduration[NFSV42_PURENOPS + NFSV4OP_FAKENOPS]; struct bintime busyfrom; struct bintime busytime; }; diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index 2b26b394f9c9..1578eece7207 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -1070,7 +1070,7 @@ struct nfsv3_sattr { /* Not sure what attribute bit#81/0x00020000 is? */ #define NFSATTRBM_XATTRSUPPORT 0x00040000 -#define NFSATTRBIT_MAX 77 +#define NFSATTRBIT_MAX 83 /* * Sets of attributes that are supported, by words in the bitmap. diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 60a55a4d14aa..f0237649d042 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include +#include +#include #include #include @@ -72,6 +74,8 @@ extern int nfsrv_useacl; extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; extern int nfscl_debuglevel; extern int nfs_pnfsiothreads; +extern u_long sb_max_adj; +extern int nfs_maxcopyrange; NFSCLSTATEMUTEX; int nfstest_outofseq = 0; int nfscl_assumeposixlocks = 1; @@ -110,6 +114,9 @@ struct nfsclwritedsdorpc { struct nfsclds *dsp; uint64_t off; int len; +#ifdef notyet + int advise; +#endif struct nfsfh *fhp; struct mbuf *m; int vers; @@ -142,7 +149,8 @@ static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **, struct ucred *, NFSPROC_T *); static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *, - struct sockaddr_in6 *, sa_family_t, int, struct nfsclds **, NFSPROC_T *); + struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **, + NFSPROC_T *); static void nfscl_initsessionslots(struct nfsclsession *); static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *, nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, @@ -172,12 +180,21 @@ static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *, NFSPROC_T *); static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *, struct nfsfh *, int, int, struct ucred *, NFSPROC_T *); +#ifdef notyet +static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *, + struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *, + NFSPROC_T *); +static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *, + struct nfsfh *, int, int, struct ucred *, NFSPROC_T *); +#endif +static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *, + struct nfsvattr *, int *, struct ucred *, NFSPROC_T *, void *); static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, int, int); static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *, NFSPROC_T *); -static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *, - int *, struct nfsclflayouthead *); +static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *, + nfsv4stateid_t *, int *, struct nfsclflayouthead *); static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *, int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int, struct nfscldeleg **, struct ucred *, NFSPROC_T *); @@ -200,6 +217,11 @@ static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t, static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *, int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **, struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *); +static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *, + nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *, + struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *); +static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *, + int, struct nfsvattr *, int *, struct ucred *); int nfs_pnfsio(task_fn_t *, void *); @@ -935,12 +957,12 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, * previous session has failed, so... * do an ExchangeID followed by the CreateSession. */ - error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, + error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0, NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p); NFSCL_DEBUG(1, "aft exch=%d\n", error); if (error == 0) error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, - &nmp->nm_sockreq, + &nmp->nm_sockreq, NULL, dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p); if (error == 0) { NFSLOCKMNT(nmp); @@ -4647,8 +4669,8 @@ nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, */ int nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, - struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp, - struct ucred *cred, NFSPROC_T *p) + struct nfssockreq *nrp, int minorvers, uint32_t exchflags, + struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl, v41flags; struct nfsrv_descript nfsd; @@ -4658,7 +4680,10 @@ nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, int error, len; *dspp = NULL; - nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, 0, 0); + if (minorvers == 0) + minorvers = nmp->nm_minorvers; + nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, + NFS_VER4, minorvers); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* Client owner */ *tl = txdr_unsigned(clp->nfsc_rev); @@ -4709,6 +4734,8 @@ nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, } if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0) dsp->nfsclds_flags |= NFSCLDS_DS; + if (minorvers == NFSV42_MINORVERSION) + dsp->nfsclds_flags |= NFSCLDS_MINORV2; if (len > 0) nd->nd_repstat = nfsrv_mtostr(nd, dsp->nfsclds_serverown, len); @@ -4732,21 +4759,27 @@ nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, */ int nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, - struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, - NFSPROC_T *p) + struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds, + struct ucred *cred, NFSPROC_T *p) { uint32_t crflags, maxval, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; - int error, irdcnt; + int error, irdcnt, minorvers; /* Make sure nm_rsize, nm_wsize is set. */ if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0) nmp->nm_wsize = NFS_MAXBSIZE; - nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, 0, - 0); + if (dsp == NULL) + minorvers = nmp->nm_minorvers; + else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0) + minorvers = NFSV42_MINORVERSION; + else + minorvers = NFSV41_MINORVERSION; + nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, + NFS_VER4, minorvers); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); *tl++ = sep->nfsess_clientid.lval[0]; *tl++ = sep->nfsess_clientid.lval[1]; @@ -4759,8 +4792,18 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, /* Fill in fore channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ - *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ - *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ + if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >= + nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) { + /* + * NFSv4.2 Extended Attribute operations may want to do + * requests/replies that are larger than nm_rsize/nm_wsize. + */ + *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR); + *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR); + } else { + *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR); + *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR); + } *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ *tl++ = txdr_unsigned(64); /* Max slots */ @@ -4817,6 +4860,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, else break; } + sep->nfsess_maxreq = maxval; /* Make sure nm_rsize is small enough. */ maxval = fxdr_unsigned(uint32_t, *tl++); @@ -4826,6 +4870,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, else break; } + sep->nfsess_maxresp = maxval; sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; @@ -4928,7 +4973,8 @@ nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode, if (error != 0) return (error); if (nd->nd_repstat == 0) - error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); + error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep, + flhp); if (error == 0 && nd->nd_repstat != 0) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); @@ -4950,7 +4996,8 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, struct sockaddr_in6 sin6, ssin6; struct nfsclds *dsp = NULL, **dspp, **gotdspp; struct nfscldevinfo *ndi; - int addrcnt = 0, bitcnt, error, gotvers, i, isudp, j, stripecnt; + int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j; + int stripecnt; uint8_t stripeindex; sa_family_t af, safilled; @@ -5082,7 +5129,8 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, } } - gotvers = NFS_VER4; /* Always NFSv4 for File Layout. */ + gotvers = NFS_VER4; /* Default NFSv4.1 for File Layout. */ + gotminor = NFSV41_MINORVERSION; /* For Flex File, we will take one of the versions to use. */ if (layouttype == NFSLAYOUT_FLEXFILE) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); @@ -5093,14 +5141,19 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, goto nfsmout; } gotvers = 0; + gotminor = 0; for (i = 0; i < j; i++) { NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED); vers = fxdr_unsigned(uint32_t, *tl++); minorvers = fxdr_unsigned(uint32_t, *tl++); - if ((vers == NFS_VER4 && minorvers == - NFSV41_MINORVERSION) || (vers == NFS_VER3 && - gotvers == 0)) { + if (vers == NFS_VER3) + minorvers = 0; + if ((vers == NFS_VER4 && ((minorvers == + NFSV41_MINORVERSION && gotminor == 0) || + minorvers == NFSV42_MINORVERSION)) || + (vers == NFS_VER3 && gotvers == 0)) { gotvers = vers; + gotminor = minorvers; /* We'll take this one. */ ndi->nfsdi_versindex = i; ndi->nfsdi_vers = vers; @@ -5118,7 +5171,7 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, } } if (gotvers == 0) { - printf("pNFS: no NFSv3 or NFSv4.1\n"); + printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n"); error = NFSERR_BADXDR; goto nfsmout; } @@ -5144,7 +5197,7 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, * NFS version and IP address. */ error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled, - gotvers, &dsp, p); + gotvers, gotminor, &dsp, p); } if (error == 0) { KASSERT(gotdspp != NULL, ("gotdspp is NULL")); @@ -5373,15 +5426,15 @@ nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp, */ static int nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin, - struct sockaddr_in6 *sin6, sa_family_t af, int vers, struct nfsclds **dspp, - NFSPROC_T *p) + struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers, + struct nfsclds **dspp, NFSPROC_T *p) { struct sockaddr_in *msad, *sad; struct sockaddr_in6 *msad6, *sad6; struct nfsclclient *clp; struct nfssockreq *nrp; struct nfsclds *dsp, *tdsp; - int error; + int error, firsttry; enum nfsclds_state retv; uint32_t sequenceid; @@ -5492,9 +5545,16 @@ nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin, /* Now, do the exchangeid and create session. */ if (error == 0) { if (vers == NFS_VER4) { - error = nfsrpc_exchangeid(nmp, clp, nrp, - NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p); - NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); + firsttry = 0; + do { + error = nfsrpc_exchangeid(nmp, clp, nrp, + minorvers, NFSV4EXCH_USEPNFSDS, &dsp, + nrp->nr_cred, p); + NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); + if (error == NFSERR_MINORVERMISMATCH) + minorvers = NFSV42_MINORVERSION; + } while (error == NFSERR_MINORVERMISMATCH && + firsttry++ == 0); if (error != 0) newnfs_disconnect(nrp); } else { @@ -5534,7 +5594,7 @@ nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin, dsp->nfsclds_sess.nfsess_sequenceid; NFSUNLOCKMNT(nmp); error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, - nrp, sequenceid, 0, nrp->nr_cred, p); + nrp, dsp, sequenceid, 0, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS createsess=%d\n", error); } } else { @@ -5896,7 +5956,7 @@ nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess, } /* - * Do I/O using an NFSv4.1 file layout. + * Do I/O using an NFSv4.1 or NFSv4.2 file layout. */ static int nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, @@ -5905,7 +5965,7 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p) { uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer; - int commit_thru_mds, error, stripe_index, stripe_pos; + int commit_thru_mds, error, stripe_index, stripe_pos, minorvers; struct nfsnode *np; struct nfsfh *fhp; struct nfsclds **dspp; @@ -5922,6 +5982,10 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, while (len > 0 && error == 0) { stripe_index = nfsfldi_stripeindex(dp, stripe_pos); dspp = nfsfldi_addr(dp, stripe_index); + if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0) + minorvers = NFSV42_MINORVERSION; + else + minorvers = NFSV41_MINORVERSION; if (len > transfer && docommit == 0) xfer = transfer; else @@ -5959,7 +6023,7 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, if (docommit != 0) { if (error == 0) error = nfsrpc_commitds(vp, io_off, xfer, - *dspp, fhp, 0, 0, cred, p); + *dspp, fhp, NFS_VER4, minorvers, cred, p); if (error == 0) { /* * Set both eof and uio_resid = 0 to end any @@ -5974,11 +6038,11 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, } } else if (rwflag == NFSV4OPEN_ACCESSREAD) error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, - io_off, xfer, fhp, 0, 0, 0, cred, p); + io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p); else { error = nfsrpc_writeds(vp, uiop, iomode, must_commit, stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds, - 0, 0, 0, cred, p); + 0, NFS_VER4, minorvers, cred, p); if (error == 0) { NFSLOCKCLSTATE(); lyp->nfsly_flags |= NFSLY_WRITTEN; @@ -6686,6 +6750,259 @@ nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, return (error); } +/* + * NFS Advise rpc + */ +APPLESTATIC int +nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise, + struct ucred *cred, NFSPROC_T *p) +{ + u_int32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + nfsattrbit_t hints; + int error; + + NFSZERO_ATTRBIT(&hints); + if (advise == POSIX_FADV_WILLNEED) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED); + else if (advise == POSIX_FADV_DONTNEED) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED); + else + return (0); + NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp); + nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER); + txdr_hyper(offset, tl); + tl += 2; + txdr_hyper(cnt, tl); + nfsrv_putattrbit(nd, &hints); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +#ifdef notyet +/* + * NFS advise rpc to a NFSv4.2 DS. + */ +static int +nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise, + struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfssockreq *nrp; + nfsattrbit_t hints; + int error; + + /* For NFS DSs prior to NFSv4.2, just return OK. */ + if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION) + return (0); + NFSZERO_ATTRBIT(&hints); + if (advise == POSIX_FADV_WILLNEED) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED); + else if (advise == POSIX_FADV_DONTNEED) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED); + else + return (0); + nd->nd_mrep = NULL; + nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh, + fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); + vers = NFS_VER4; + NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers, + minorvers); + nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(offset, tl); + tl += 2; + *tl = txdr_unsigned(cnt); + nfsrv_putattrbit(nd, &hints); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); + NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error, + nd->nd_repstat); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Start up the thread that will execute nfsrpc_commitds(). + */ +static void +start_adviseds(void *arg, int pending) +{ + struct nfsclwritedsdorpc *drpc; + + drpc = (struct nfsclwritedsdorpc *)arg; + drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len, + drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, + drpc->cred, drpc->p); + drpc->done = 1; + NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err); +} + +/* + * Set up the commit DS mirror call for the pNFS I/O thread. + */ +static int +nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise, + struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers, + struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) +{ + int error, ret; + + error = 0; + drpc->done = 0; + drpc->vp = vp; + drpc->off = offset; + drpc->len = cnt; + drpc->advise = advise; + drpc->dsp = dsp; + drpc->fhp = fhp; + drpc->vers = vers; + drpc->minorvers = minorvers; + drpc->cred = cred; + drpc->p = p; + drpc->inprog = 0; + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_adviseds, drpc); + NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret); + } + if (ret != 0) + error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers, + minorvers, cred, p); + NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error); + return (error); +} +#endif /* notyet */ + +/* + * Do the Allocate operation, retrying for recovery. + */ +APPLESTATIC int +nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap, + int *attrflagp, struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + int error, expireret = 0, retrycnt, nostateid; + uint32_t clidrev = 0; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfsfh *nfhp = NULL; + nfsv4stateid_t stateid; + off_t tmp_off; + void *lckp; + + if (len < 0) + return (EINVAL); + if (len == 0) + return (0); + tmp_off = off + len; + NFSLOCKMNT(nmp); + if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) { + NFSUNLOCKMNT(nmp); + return (EFBIG); + } + if (nmp->nm_clp != NULL) + clidrev = nmp->nm_clp->nfsc_clientidrev; + NFSUNLOCKMNT(nmp); + nfhp = VTONFS(vp)->n_fhp; + retrycnt = 0; + do { + lckp = NULL; + nostateid = 0; + nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, + NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp); + if (stateid.other[0] == 0 && stateid.other[1] == 0 && + stateid.other[2] == 0) { + nostateid = 1; + NFSCL_DEBUG(1, "stateid0 in allocate\n"); + } + + /* + * Not finding a stateid should probably never happen, + * but just return an error for this case. + */ + if (nostateid != 0) + error = EIO; + else + error = nfsrpc_allocaterpc(vp, off, len, &stateid, + nap, attrflagp, cred, p, stuff); + if (error == NFSERR_STALESTATEID) + nfscl_initiate_recovery(nmp->nm_clp); + if (lckp != NULL) + nfscl_lockderef(lckp); + if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { + (void) nfs_catnap(PZERO, error, "nfs_allocate"); + } else if ((error == NFSERR_EXPIRED || + error == NFSERR_BADSTATEID) && clidrev != 0) { + expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); + } + retrycnt++; + } while (error == NFSERR_GRACE || error == NFSERR_DELAY || + error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || + error == NFSERR_STALEDONTRECOVER || + (error == NFSERR_OLDSTATEID && retrycnt < 20) || + ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && + expireret == 0 && clidrev != 0 && retrycnt < 4)); + if (error != 0 && retrycnt >= 4) + error = EIO; + return (error); +} + +/* + * The allocate RPC. + */ +static int +nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp, + struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p, + void *stuff) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(off, tl); tl += 2; + txdr_hyper(len, tl); tl += 2; + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, stuff); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = NFS_LATTR_NOSHRINK; + } else + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + /* * Set up the XDR arguments for the LayoutGet operation. */ @@ -6727,8 +7044,8 @@ nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset, * Parse the reply for a successful LayoutGet operation. */ static int -nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, - int *retonclosep, struct nfsclflayouthead *flhp) +nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd, + nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp) { uint32_t *tl; struct nfsclflayout *flp, *prevflp, *tflp; @@ -6808,6 +7125,11 @@ nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util); + mtx_lock(&nmp->nm_mtx); + if (nmp->nm_minorvers > 1 && (flp->nfsfl_util & + NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0) + nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS; + mtx_unlock(&nmp->nm_mtx); flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++); flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2; NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n", @@ -6956,6 +7278,18 @@ nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++); +#ifdef notnow + /* + * At this time, there is no flag. + * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be + * added, or it may never exist? + */ + mtx_lock(&nmp->nm_mtx); + if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags & + NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0) + nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS; + mtx_unlock(&nmp->nm_mtx); +#endif flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl); NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n", flp->nfsfl_fflags, flp->nfsfl_statshint); @@ -7267,7 +7601,7 @@ nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *++tl); if (*laystatp == 0) { - error = nfsrv_parselayoutget(nd, + error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; @@ -7516,7 +7850,7 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap, NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *(tl + 3)); if (*laystatp == 0) { - error = nfsrv_parselayoutget(nd, + error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; @@ -7664,3 +7998,525 @@ nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp, return (laystat); } +/* + * nfs copy_file_range operation. + */ +APPLESTATIC int +nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp, + off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp, + struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap, + struct ucred *cred, bool consecutive, bool *must_commitp) +{ + int commit, error, expireret = 0, retrycnt; + u_int32_t clidrev = 0; + struct nfsmount *nmp = VFSTONFS(vnode_mount(invp)); + struct nfsfh *innfhp = NULL, *outnfhp = NULL; + nfsv4stateid_t instateid, outstateid; + void *inlckp, *outlckp; + + if (nmp->nm_clp != NULL) + clidrev = nmp->nm_clp->nfsc_clientidrev; + innfhp = VTONFS(invp)->n_fhp; + outnfhp = VTONFS(outvp)->n_fhp; + retrycnt = 0; + do { + /* Get both stateids. */ + inlckp = NULL; + nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len, + NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid, + &inlckp); + outlckp = NULL; + nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len, + NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid, + &outlckp); + + error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp, + &instateid, &outstateid, innap, inattrflagp, outnap, + outattrflagp, consecutive, &commit, cred, curthread); + if (error == 0) { + if (commit != NFSWRITE_FILESYNC) + *must_commitp = true; + *inoffp += *lenp; + *outoffp += *lenp; + } else if (error == NFSERR_STALESTATEID) + nfscl_initiate_recovery(nmp->nm_clp); + if (inlckp != NULL) + nfscl_lockderef(inlckp); + if (outlckp != NULL) + nfscl_lockderef(outlckp); + if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { + (void) nfs_catnap(PZERO, error, "nfs_cfr"); + } else if ((error == NFSERR_EXPIRED || + error == NFSERR_BADSTATEID) && clidrev != 0) { + expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, + curthread); + } + retrycnt++; + } while (error == NFSERR_GRACE || error == NFSERR_DELAY || + error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || + error == NFSERR_STALEDONTRECOVER || + (error == NFSERR_OLDSTATEID && retrycnt < 20) || + ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && + expireret == 0 && clidrev != 0 && retrycnt < 4)); + if (error != 0 && (retrycnt >= 4 || + error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || + error == NFSERR_STALEDONTRECOVER)) + error = EIO; + return (error); +} + +/* + * The copy RPC. + */ +static int +nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff, + size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp, + struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap, + int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred, + NFSPROC_T *p) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct nfsmount *nmp; + nfsattrbit_t attrbits; + uint64_t len; + + nmp = VFSTONFS(outvp->v_mount); + *inattrflagp = *outattrflagp = 0; + *commitp = NFSWRITE_UNSTABLE; + len = *lenp; + *lenp = 0; + if (len > nfs_maxcopyrange) + len = nfs_maxcopyrange; + NFSCL_REQSTART(nd, NFSPROC_COPY, invp); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_PUTFH); + nfsm_fhtom(nd, VTONFS(outvp)->n_fhp->nfh_fh, + VTONFS(outvp)->n_fhp->nfh_len, 0); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_COPY); + nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID); + nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED); + txdr_hyper(inoff, tl); tl += 2; + txdr_hyper(outoff, tl); tl += 2; + txdr_hyper(len, tl); tl += 2; + if (consecutive) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + *tl++ = newnfs_true; + *tl++ = 0; + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSWRITEGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, invp, p, cred, NULL); + if (error != 0) + return (error); + if ((nd->nd_flag & ND_NOMOREDATA) == 0) { + /* Get the input file's attributes. */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (*(tl + 1) == 0) { + error = nfsm_loadattr(nd, innap); + if (error != 0) + goto nfsmout; + *inattrflagp = 1; + } else + nd->nd_flag |= ND_NOMOREDATA; + } + /* Skip over return stat for PutFH. */ + if ((nd->nd_flag & ND_NOMOREDATA) == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (*++tl != 0) + nd->nd_flag |= ND_NOMOREDATA; + } + /* Skip over return stat for Copy. */ + if ((nd->nd_flag & ND_NOMOREDATA) == 0) + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (*tl != 0) { + /* There should be no callback ids. */ + error = NFSERR_BADXDR; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED + + NFSX_VERF); + len = fxdr_hyper(tl); tl += 2; + *commitp = fxdr_unsigned(int, *tl++); + NFSLOCKMNT(nmp); + if (!NFSHASWRITEVERF(nmp)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + NFSSETWRITEVERF(nmp); + } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + nd->nd_repstat = NFSERR_STALEWRITEVERF; + } + NFSUNLOCKMNT(nmp); + tl += (NFSX_VERF / NFSX_UNSIGNED); + if (nd->nd_repstat == 0 && *++tl != newnfs_true) + /* Must be a synchronous copy. */ + nd->nd_repstat = NFSERR_NOTSUPP; + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsm_loadattr(nd, outnap); + if (error == 0) + *outattrflagp = NFS_LATTR_NOSHRINK; + if (nd->nd_repstat == 0) + *lenp = len; + } else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) { + /* + * For the case where consecutive is not supported, but + * synchronous is supported, we can try consecutive == false + * by returning this error. Otherwise, return NFSERR_NOTSUPP, + * since Copy cannot be done. + */ + if ((nd->nd_flag & ND_NOMOREDATA) == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (!consecutive || *++tl == newnfs_false) + nd->nd_repstat = NFSERR_NOTSUPP; + } else + nd->nd_repstat = NFSERR_BADXDR; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Seek operation. + */ +APPLESTATIC int +nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content, + struct ucred *cred, struct nfsvattr *nap, int *attrflagp) +{ + int error, expireret = 0, retrycnt; + u_int32_t clidrev = 0; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfsnode *np = VTONFS(vp); + struct nfsfh *nfhp = NULL; + nfsv4stateid_t stateid; + void *lckp; + + if (nmp->nm_clp != NULL) + clidrev = nmp->nm_clp->nfsc_clientidrev; + nfhp = np->n_fhp; + retrycnt = 0; + do { + lckp = NULL; + nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, + NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp); + error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content, + nap, attrflagp, cred); + if (error == NFSERR_STALESTATEID) + nfscl_initiate_recovery(nmp->nm_clp); + if (lckp != NULL) + nfscl_lockderef(lckp); + if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { + (void) nfs_catnap(PZERO, error, "nfs_seek"); + } else if ((error == NFSERR_EXPIRED || + error == NFSERR_BADSTATEID) && clidrev != 0) { + expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, + curthread); + } + retrycnt++; + } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || + (error == NFSERR_OLDSTATEID && retrycnt < 20) || + ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && + expireret == 0 && clidrev != 0 && retrycnt < 4) || + (error == NFSERR_OPENMODE && retrycnt < 4)); + if (error && retrycnt >= 4) + error = EIO; + return (error); +} + +/* + * The seek RPC. + */ +static int +nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp, + int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_SEEK, vp); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); + txdr_hyper(*offp, tl); tl += 2; + *tl++ = txdr_unsigned(content); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, curthread, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER); + if (*tl++ == newnfs_true) + *eofp = true; + else + *eofp = false; + *offp = fxdr_hyper(tl); + /* Just skip over Getattr op status. */ + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The getextattr RPC. + */ +APPLESTATIC int +nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp, + struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + uint32_t len, len2; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp); + nfsm_strtom(nd, name, strlen(name)); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(uint32_t, *tl); + /* Sanity check lengths. */ + if (uiop != NULL && len > 0 && len <= IOSIZE_MAX && + uiop->uio_resid <= UINT32_MAX) { + len2 = uiop->uio_resid; + if (len2 >= len) + error = nfsm_mbufuio(nd, uiop, len); + else { + error = nfsm_mbufuio(nd, uiop, len2); + if (error == 0) { + /* + * nfsm_mbufuio() advances to a multiple + * of 4, so round up len2 as well. Then + * we need to advance over the rest of + * the data, rounding up the remaining + * length. + */ + len2 = NFSM_RNDUP(len2); + len2 = NFSM_RNDUP(len - len2); + if (len2 > 0) + error = nfsm_advance(nd, len2, + -1); + } + } + } else if (uiop == NULL && len > 0) { + /* Just wants the length and not the data. */ + error = nfsm_advance(nd, NFSM_RNDUP(len), -1); + } else + error = ENOATTR; + if (error != 0) + goto nfsmout; + *lenp = len; + /* Just skip over Getattr op status. */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The setextattr RPC. + */ +APPLESTATIC int +nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop, + struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp); + if (uiop->uio_resid > nd->nd_maxreq) { + /* nd_maxreq is set by NFSCL_REQSTART(). */ + mbuf_freem(nd->nd_mreq); + return (EINVAL); + } + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4SXATTR_EITHER); + nfsm_strtom(nd, name, strlen(name)); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(uiop->uio_resid); + nfsm_uiombuf(nd, uiop, uiop->uio_resid); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + /* Just skip over the reply and Getattr op status. */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 * + NFSX_UNSIGNED); + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The removeextattr RPC. + */ +APPLESTATIC int +nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap, + int *attrflagp, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp); + nfsm_strtom(nd, name, strlen(name)); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + /* Just skip over the reply and Getattr op status. */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 2 * + NFSX_UNSIGNED); + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The listextattr RPC. + */ +APPLESTATIC int +nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop, + size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int cnt, error, i, len; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + u_char c; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); + txdr_hyper(*cookiep, tl); tl += 2; + *tl++ = txdr_unsigned(*lenp); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + *eofp = true; + *lenp = 0; + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + *cookiep = fxdr_hyper(tl); tl += 2; + cnt = fxdr_unsigned(int, *tl); + if (cnt < 0) { + error = EBADRPC; + goto nfsmout; + } + for (i = 0; i < cnt; i++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(int, *tl); + if (len <= 0 || len > EXTATTR_MAXNAMELEN) { + error = EBADRPC; + goto nfsmout; + } + if (uiop == NULL) + error = nfsm_advance(nd, NFSM_RNDUP(len), -1); + else if (uiop->uio_resid >= len + 1) { + c = len; + error = uiomove(&c, sizeof(c), uiop); + if (error == 0) + error = nfsm_mbufuio(nd, uiop, len); + } else { + error = nfsm_advance(nd, NFSM_RNDUP(len), -1); + *eofp = false; + } + if (error != 0) + goto nfsmout; + *lenp += (len + 1); + } + /* Get the eof and skip over the Getattr op status. */ + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED); + /* + * *eofp is set false above, because it wasn't able to copy + * all of the reply. + */ + if (*eofp && *tl == 0) + *eofp = false; + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index 2fa35225bf05..445cd379b95d 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -3292,7 +3292,9 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); minorvers = fxdr_unsigned(u_int32_t, *tl++); - if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) + if (minorvers != NFSV4_MINORVERSION && + minorvers != NFSV41_MINORVERSION && + minorvers != NFSV42_MINORVERSION) nd->nd_repstat = NFSERR_MINORVERMISMATCH; cbident = fxdr_unsigned(u_int32_t, *tl++); if (nd->nd_repstat) @@ -3310,14 +3312,16 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) if (op < NFSV4OP_CBGETATTR || (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) || (op > NFSV4OP_CBNOTIFYDEVID && - minorvers == NFSV41_MINORVERSION)) { + minorvers == NFSV41_MINORVERSION) || + (op > NFSV4OP_CBOFFLOAD && + minorvers == NFSV42_MINORVERSION)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp = nfscl_errmap(nd, minorvers); retops++; break; } nd->nd_procnum = op; - if (op < NFSV41_CBNOPS) + if (op < NFSV42_CBNOPS) nfsstatsv1.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: @@ -3619,7 +3623,7 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) } break; default: - if (i == 0 && minorvers == NFSV41_MINORVERSION) + if (i == 0 && minorvers != NFSV4_MINORVERSION) error = NFSERR_OPNOTINSESS; else { NFSCL_DEBUG(1, "unsupp callback %d\n", op); diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index 133886ab213c..c4aece16ee3b 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -1151,7 +1151,7 @@ nfs_mount(struct mount *mp) if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &minvers); - if (ret != 1 || minvers < 0 || minvers > 1 || + if (ret != 1 || minvers < 0 || minvers > 2 || (args.flags & NFSMNT_NFSV4) == 0) { vfs_mount_error(mp, "illegal minorversion: %s", opt); error = EINVAL; diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 7d721ae8da3e..d95e4abcc631 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -51,6 +51,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include #include @@ -142,6 +144,14 @@ static vop_advlock_t nfs_advlock; static vop_advlockasync_t nfs_advlockasync; static vop_getacl_t nfs_getacl; static vop_setacl_t nfs_setacl; +static vop_advise_t nfs_advise; +static vop_allocate_t nfs_allocate; +static vop_copy_file_range_t nfs_copy_file_range; +static vop_ioctl_t nfs_ioctl; +static vop_getextattr_t nfs_getextattr; +static vop_setextattr_t nfs_setextattr; +static vop_listextattr_t nfs_listextattr; +static vop_deleteextattr_t nfs_deleteextattr; static vop_lock1_t nfs_lock; /* @@ -181,6 +191,14 @@ static struct vop_vector newnfs_vnodeops_nosig = { .vop_write = ncl_write, .vop_getacl = nfs_getacl, .vop_setacl = nfs_setacl, + .vop_advise = nfs_advise, + .vop_allocate = nfs_allocate, + .vop_copy_file_range = nfs_copy_file_range, + .vop_ioctl = nfs_ioctl, + .vop_getextattr = nfs_getextattr, + .vop_setextattr = nfs_setextattr, + .vop_listextattr = nfs_listextattr, + .vop_deleteextattr = nfs_deleteextattr, }; static int @@ -3504,6 +3522,618 @@ nfs_setacl(struct vop_setacl_args *ap) return (error); } +/* + * VOP_ADVISE for NFS. + * Just return 0 for any errors, since it is just a hint. + */ +static int +nfs_advise(struct vop_advise_args *ap) +{ + struct thread *td = curthread; + struct nfsmount *nmp; + uint64_t len; + int error; + + /* + * First do vop_stdadvise() to handle the buffer cache. + */ + error = vop_stdadvise(ap); + if (error != 0) + return (error); + if (ap->a_start < 0 || ap->a_end < 0) + return (0); + if (ap->a_end == OFF_MAX) + len = 0; + else if (ap->a_end < ap->a_start) + return (0); + else + len = ap->a_end - ap->a_start + 1; + nmp = VFSTONFS(ap->a_vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (NFSHASPNFS(nmp) && (nmp->nm_privflag & NFSMNTP_IOADVISETHRUMDS) == + 0) || (nmp->nm_privflag & NFSMNTP_NOADVISE) != 0) { + mtx_unlock(&nmp->nm_mtx); + return (0); + } + mtx_unlock(&nmp->nm_mtx); + error = nfsrpc_advise(ap->a_vp, ap->a_start, len, ap->a_advice, + td->td_ucred, td); + if (error == NFSERR_NOTSUPP) { + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOADVISE; + mtx_unlock(&nmp->nm_mtx); + } + return (0); +} + +/* + * nfs allocate call + */ +static int +nfs_allocate(struct vop_allocate_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct thread *td = curthread; + struct nfsvattr nfsva; + struct nfsmount *nmp; + int attrflag, error, ret; + + attrflag = 0; + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && + (nmp->nm_privflag & NFSMNTP_NOALLOCATE) == 0) { + mtx_unlock(&nmp->nm_mtx); + /* + * Flush first to ensure that the allocate adds to the + * file's allocation on the server. + */ + error = ncl_flush(vp, MNT_WAIT, td, 1, 0); + if (error == 0) + error = nfsrpc_allocate(vp, *ap->a_offset, *ap->a_len, + &nfsva, &attrflag, td->td_ucred, td, NULL); + if (error == 0) { + *ap->a_offset += *ap->a_len; + *ap->a_len = 0; + } else if (error == NFSERR_NOTSUPP) { + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOALLOCATE; + mtx_unlock(&nmp->nm_mtx); + } + } else { + mtx_unlock(&nmp->nm_mtx); + error = EIO; + } + /* + * If the NFS server cannot perform the Allocate operation, just call + * vop_stdallocate() to perform it. + */ + if (error != 0) + error = vop_stdallocate(ap); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + if (error != 0) + error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); + return (error); +} + +/* + * nfs copy_file_range call + */ +static int +nfs_copy_file_range(struct vop_copy_file_range_args *ap) +{ + struct vnode *invp = ap->a_invp; + struct vnode *outvp = ap->a_outvp; + struct mount *mp; + struct nfsvattr innfsva, outnfsva; + struct vattr *vap; + struct uio io; + struct nfsmount *nmp; + size_t len, len2, copiedlen; + int error, inattrflag, outattrflag, ret, ret2; + off_t inoff, outoff; + bool consecutive, must_commit, tryoutcred; + + nmp = VFSTONFS(invp->v_mount); + mtx_lock(&nmp->nm_mtx); + /* NFSv4.2 Copy is not permitted for infile == outfile. */ + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOCOPY) != 0 || invp == outvp) { + mtx_unlock(&nmp->nm_mtx); + error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, + ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, + ap->a_incred, ap->a_outcred, ap->a_fsizetd); + return (error); + } + mtx_unlock(&nmp->nm_mtx); + + /* Lock both vnodes, avoiding risk of deadlock. */ + do { + mp = NULL; + error = vn_start_write(outvp, &mp, V_WAIT); + if (error == 0) { + error = vn_lock(outvp, LK_EXCLUSIVE); + if (error == 0) { + error = vn_lock(invp, LK_SHARED | LK_NOWAIT); + if (error == 0) + break; + VOP_UNLOCK(outvp, 0); + if (mp != NULL) + vn_finished_write(mp); + mp = NULL; + error = vn_lock(invp, LK_SHARED); + if (error == 0) + VOP_UNLOCK(invp, 0); + } + } + if (mp != NULL) + vn_finished_write(mp); + } while (error == 0); + if (error != 0) + return (error); + + /* + * Do the vn_rlimit_fsize() check. Should this be above the VOP layer? + */ + io.uio_offset = *ap->a_outoffp; + io.uio_resid = *ap->a_lenp; + error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); + + /* + * Flush the input file so that the data is up to date before + * the copy. Flush writes for the output file so that they + * do not overwrite the data copied to the output file by the Copy. + * Set the commit argument for both flushes so that the data is on + * stable storage before the Copy RPC. This is done in case the + * server reboots during the Copy and needs to be redone. + */ + if (error == 0) + error = ncl_flush(invp, MNT_WAIT, curthread, 1, 0); + if (error == 0) + error = ncl_flush(outvp, MNT_WAIT, curthread, 1, 0); + + /* Do the actual NFSv4.2 RPC. */ + len = *ap->a_lenp; + mtx_lock(&nmp->nm_mtx); + if ((nmp->nm_privflag & NFSMNTP_NOCONSECUTIVE) == 0) + consecutive = true; + else + consecutive = false; + mtx_unlock(&nmp->nm_mtx); + inoff = *ap->a_inoffp; + outoff = *ap->a_outoffp; + tryoutcred = true; + must_commit = false; + if (error == 0) { + vap = &VTONFS(invp)->n_vattr.na_vattr; + error = VOP_GETATTR(invp, vap, ap->a_incred); + if (error == 0) { + /* + * Clip "len" at va_size so that RFC compliant servers + * will not reply NFSERR_INVAL. + * Setting "len == 0" for the RPC would be preferred, + * but some Linux servers do not support that. + */ + if (inoff >= vap->va_size) + *ap->a_lenp = len = 0; + else if (inoff + len > vap->va_size) + *ap->a_lenp = len = vap->va_size - inoff; + } else + error = 0; + } + copiedlen = 0; + while (len > 0 && error == 0) { + inattrflag = outattrflag = 0; + len2 = len; + if (tryoutcred) + error = nfsrpc_copy_file_range(invp, ap->a_inoffp, + outvp, ap->a_outoffp, &len2, ap->a_flags, + &inattrflag, &innfsva, &outattrflag, &outnfsva, + ap->a_outcred, consecutive, &must_commit); + else + error = nfsrpc_copy_file_range(invp, ap->a_inoffp, + outvp, ap->a_outoffp, &len2, ap->a_flags, + &inattrflag, &innfsva, &outattrflag, &outnfsva, + ap->a_incred, consecutive, &must_commit); + if (inattrflag != 0) + ret = nfscl_loadattrcache(&invp, &innfsva, NULL, NULL, + 0, 1); + if (outattrflag != 0) + ret2 = nfscl_loadattrcache(&outvp, &outnfsva, NULL, + NULL, 1, 1); + if (error == 0) { + if (consecutive == false) { + if (len2 == len) { + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= + NFSMNTP_NOCONSECUTIVE; + mtx_unlock(&nmp->nm_mtx); + } else + error = NFSERR_OFFLOADNOREQS; + } + /* + * If the Copy returns a length == 0, it hit the + * EOF on the input file. + */ + if (len2 == 0) { + *ap->a_lenp = copiedlen; + len = 0; + } else { + len -= len2; + copiedlen += len2; + } + if (len == 0 && must_commit && error == 0) + error = ncl_commit(outvp, outoff, *ap->a_lenp, + ap->a_outcred, curthread); + if (error == 0 && ret != 0) + error = ret; + if (error == 0 && ret2 != 0) + error = ret2; + } else if (error == NFSERR_OFFLOADNOREQS && consecutive) { + /* + * Try consecutive == false, which is ok only if all + * bytes are copied. + */ + consecutive = false; + error = 0; + } else if (error == NFSERR_ACCES && tryoutcred) { + /* Try again with incred. */ + tryoutcred = false; + error = 0; + } + if (error == NFSERR_STALEWRITEVERF) { + /* + * Server rebooted, so do it all again. + */ + *ap->a_inoffp = inoff; + *ap->a_outoffp = outoff; + len = *ap->a_lenp; + must_commit = false; + error = 0; + } + } + VOP_UNLOCK(invp, 0); + VOP_UNLOCK(outvp, 0); + if (mp != NULL) + vn_finished_write(mp); + if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS || + error == NFSERR_ACCES) { + /* + * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can + * use a_incred for the read and a_outcred for the write, so + * try this for NFSERR_ACCES failures for the Copy. + * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can + * never succeed, so disable it. + */ + if (error != NFSERR_ACCES) { + /* Can never do Copy on this mount. */ + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOCOPY; + mtx_unlock(&nmp->nm_mtx); + } + *ap->a_inoffp = inoff; + *ap->a_outoffp = outoff; + error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, + ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, + ap->a_incred, ap->a_outcred, ap->a_fsizetd); + } else if (error != 0) + *ap->a_lenp = 0; + + if (error != 0) + error = nfscl_maperr(curthread, error, (uid_t)0, (gid_t)0); + return (error); +} + +/* + * nfs ioctl call + */ +static int +nfs_ioctl(struct vop_ioctl_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsvattr nfsva; + struct nfsmount *nmp; + int attrflag, content, error, ret; + bool eof = false; /* shut up compiler. */ + + if (vp->v_type != VREG) + return (ENOTTY); + nmp = VFSTONFS(vp->v_mount); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION) { + error = vop_stdioctl(ap); + return (error); + } + + /* Do the actual NFSv4.2 RPC. */ + switch (ap->a_command) { + case FIOSEEKDATA: + content = NFSV4CONTENT_DATA; + break; + case FIOSEEKHOLE: + content = NFSV4CONTENT_HOLE; + break; + default: + return (ENOTTY); + } + + error = vn_lock(vp, LK_SHARED); + if (error != 0) + return (EBADF); + attrflag = 0; + if (*((off_t *)ap->a_data) >= VTONFS(vp)->n_size) + error = ENXIO; + else { + /* + * Flush all writes, so that the server is up to date. + * Although a Commit is not required, the commit argument + * is set so that, for a pNFS File/Flexible File Layout + * server, the LayoutCommit will be done to ensure the file + * size is up to date on the Metadata Server. + */ + error = ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0); + if (error == 0) + error = nfsrpc_seek(vp, (off_t *)ap->a_data, &eof, + content, ap->a_cred, &nfsva, &attrflag); + /* If at eof for FIOSEEKDATA, return ENXIO. */ + if (eof && error == 0 && content == NFSV4CONTENT_DATA) + error = ENXIO; + } + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + NFSVOPUNLOCK(vp, 0); + + if (error != 0) + error = ENXIO; + return (error); +} + +/* + * nfs getextattr call + */ +static int +nfs_getextattr(struct vop_getextattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; + struct ucred *cred; + struct thread *td = ap->a_td; + struct nfsvattr nfsva; + ssize_t len; + int attrflag, error, ret; + + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || + ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { + mtx_unlock(&nmp->nm_mtx); + return (EOPNOTSUPP); + } + mtx_unlock(&nmp->nm_mtx); + + cred = ap->a_cred; + if (cred == NULL) + cred = td->td_ucred; + /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ + attrflag = 0; + error = nfsrpc_getextattr(vp, ap->a_name, ap->a_uio, &len, &nfsva, + &attrflag, cred, td); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + if (error == 0 && ap->a_size != NULL) + *ap->a_size = len; + + switch (error) { + case NFSERR_NOTSUPP: + case NFSERR_OPILLEGAL: + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOXATTR; + mtx_unlock(&nmp->nm_mtx); + error = EOPNOTSUPP; + break; + case NFSERR_NOXATTR: + case NFSERR_XATTR2BIG: + error = ENOATTR; + break; + default: + error = nfscl_maperr(td, error, 0, 0); + break; + } + return (error); +} + +/* + * nfs setextattr call + */ +static int +nfs_setextattr(struct vop_setextattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; + struct ucred *cred; + struct thread *td = ap->a_td; + struct nfsvattr nfsva; + int attrflag, error, ret; + + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || + ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { + mtx_unlock(&nmp->nm_mtx); + return (EOPNOTSUPP); + } + mtx_unlock(&nmp->nm_mtx); + + if (ap->a_uio->uio_resid <= 0) + return (EINVAL); + cred = ap->a_cred; + if (cred == NULL) + cred = td->td_ucred; + /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ + attrflag = 0; + error = nfsrpc_setextattr(vp, ap->a_name, ap->a_uio, &nfsva, + &attrflag, cred, td); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + + switch (error) { + case NFSERR_NOTSUPP: + case NFSERR_OPILLEGAL: + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOXATTR; + mtx_unlock(&nmp->nm_mtx); + error = EOPNOTSUPP; + break; + case NFSERR_NOXATTR: + case NFSERR_XATTR2BIG: + error = ENOATTR; + break; + default: + error = nfscl_maperr(td, error, 0, 0); + break; + } + return (error); +} + +/* + * nfs listextattr call + */ +static int +nfs_listextattr(struct vop_listextattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; + struct ucred *cred; + struct thread *td = ap->a_td; + struct nfsvattr nfsva; + size_t len, len2; + uint64_t cookie; + int attrflag, error, ret; + bool eof; + + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || + ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { + mtx_unlock(&nmp->nm_mtx); + return (EOPNOTSUPP); + } + mtx_unlock(&nmp->nm_mtx); + + cred = ap->a_cred; + if (cred == NULL) + cred = td->td_ucred; + + /* Loop around doing List Extended Attribute RPCs. */ + eof = false; + cookie = 0; + len2 = 0; + error = 0; + while (!eof && error == 0) { + len = nmp->nm_rsize; + attrflag = 0; + error = nfsrpc_listextattr(vp, &cookie, ap->a_uio, &len, &eof, + &nfsva, &attrflag, cred, td); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, + 1); + if (error == 0 && ret != 0) + error = ret; + } + if (error == 0) { + len2 += len; + if (len2 > SSIZE_MAX) + error = ENOATTR; + } + } + if (error == 0 && ap->a_size != NULL) + *ap->a_size = len2; + + switch (error) { + case NFSERR_NOTSUPP: + case NFSERR_OPILLEGAL: + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOXATTR; + mtx_unlock(&nmp->nm_mtx); + error = EOPNOTSUPP; + break; + case NFSERR_NOXATTR: + case NFSERR_XATTR2BIG: + error = ENOATTR; + break; + default: + error = nfscl_maperr(td, error, 0, 0); + break; + } + return (error); +} + +/* + * nfs setextattr call + */ +static int +nfs_deleteextattr(struct vop_deleteextattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; + struct nfsvattr nfsva; + int attrflag, error, ret; + + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || + ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { + mtx_unlock(&nmp->nm_mtx); + return (EOPNOTSUPP); + } + mtx_unlock(&nmp->nm_mtx); + + /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ + attrflag = 0; + error = nfsrpc_rmextattr(vp, ap->a_name, &nfsva, &attrflag, ap->a_cred, + ap->a_td); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + + switch (error) { + case NFSERR_NOTSUPP: + case NFSERR_OPILLEGAL: + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOXATTR; + mtx_unlock(&nmp->nm_mtx); + error = EOPNOTSUPP; + break; + case NFSERR_NOXATTR: + case NFSERR_XATTR2BIG: + error = ENOATTR; + break; + default: + error = nfscl_maperr(ap->a_td, error, 0, 0); + break; + } + return (error); +} + /* * Return POSIX pathconf information applicable to nfs filesystems. */ @@ -3513,7 +4143,10 @@ nfs_pathconf(struct vop_pathconf_args *ap) struct nfsv3_pathconf pc; struct nfsvattr nfsva; struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; struct thread *td = curthread; + off_t off; + bool eof; int attrflag, error; if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || @@ -3612,6 +4245,40 @@ nfs_pathconf(struct vop_pathconf_args *ap) case _PC_SYMLINK_MAX: *ap->a_retval = NFS_MAXPATHLEN; break; + case _PC_MIN_HOLE_SIZE: + /* Only some NFSv4.2 servers support Seek for Holes. */ + *ap->a_retval = 0; + nmp = VFSTONFS(vp->v_mount); + if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) { + /* + * NFSv4.2 doesn't have an attribute for hole size, + * so all we can do is see if the Seek operation is + * supported and then use f_iosize as a "best guess". + */ + mtx_lock(&nmp->nm_mtx); + if ((nmp->nm_privflag & NFSMNTP_SEEKTESTED) == 0) { + mtx_unlock(&nmp->nm_mtx); + off = 0; + attrflag = 0; + error = nfsrpc_seek(vp, &off, &eof, + NFSV4CONTENT_HOLE, td->td_ucred, &nfsva, + &attrflag); + if (attrflag != 0) + nfscl_loadattrcache(&vp, &nfsva, + NULL, NULL, 0, 1); + mtx_lock(&nmp->nm_mtx); + if (error == NFSERR_NOTSUPP) + nmp->nm_privflag |= NFSMNTP_SEEKTESTED; + else + nmp->nm_privflag |= NFSMNTP_SEEKTESTED | + NFSMNTP_SEEK; + error = 0; + } + if ((nmp->nm_privflag & NFSMNTP_SEEK) != 0) + *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; + mtx_unlock(&nmp->nm_mtx); + } + break; default: error = vop_stdpathconf(ap); diff --git a/sys/fs/nfsclient/nfsmount.h b/sys/fs/nfsclient/nfsmount.h index 649e59eff289..3b6312fbc87f 100644 --- a/sys/fs/nfsclient/nfsmount.h +++ b/sys/fs/nfsclient/nfsmount.h @@ -105,6 +105,14 @@ struct nfsmount { /* Private flags. */ #define NFSMNTP_FORCEDISM 0x00000001 #define NFSMNTP_CANCELRPCS 0x00000002 +#define NFSMNTP_IOADVISETHRUMDS 0x00000004 +#define NFSMNTP_NOCOPY 0x00000008 +#define NFSMNTP_NOCONSECUTIVE 0x00000010 +#define NFSMNTP_SEEK 0x00000020 +#define NFSMNTP_SEEKTESTED 0x00000040 +#define NFSMNTP_NOXATTR 0x00000080 +#define NFSMNTP_NOADVISE 0x00000100 +#define NFSMNTP_NOALLOCATE 0x00000200 #define NFSMNT_DIRPATH(m) (&((m)->nm_name[(m)->nm_krbnamelen + 1])) #define NFSMNT_SRVKRBNAME(m) \ diff --git a/sys/fs/nfsserver/nfs_nfsdkrpc.c b/sys/fs/nfsserver/nfs_nfsdkrpc.c index 1435685f3c33..252c501b29e0 100644 --- a/sys/fs/nfsserver/nfs_nfsdkrpc.c +++ b/sys/fs/nfsserver/nfs_nfsdkrpc.c @@ -109,7 +109,7 @@ extern struct proc *nfsd_master_proc; extern time_t nfsdev_time; extern int nfsrv_writerpc[NFS_NPROCS]; extern volatile int nfsrv_devidcnt; -extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; +extern struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS]; /* * NFS server system calls diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 48513080f224..6811c2650de3 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$"); */ #include +#include +#include #include #include #include @@ -104,6 +106,10 @@ extern int nfsrv_dolocallocks; extern int nfsd_enable_stringtouid; extern struct nfsdevicehead nfsrv_devidhead; +static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, + struct iovec **); +static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, + int *); static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, NFSPROC_T *); static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, @@ -112,19 +118,23 @@ static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, NFSPROC_T *); static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, struct thread *, int, struct mbuf **, char *, struct mbuf **, - struct nfsvattr *, struct acl *); + struct nfsvattr *, struct acl *, off_t *, int, bool *); static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, char *, int *); +static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, + NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct acl *, int *); static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount *, struct nfsvattr *); +static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, + NFSPROC_T *, struct nfsmount *); static int nfsrv_putfhname(fhandle_t *, char *); static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, struct pnfsdsfile *, struct vnode **, NFSPROC_T *); @@ -296,7 +306,8 @@ nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, - NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL); + NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, + NULL); if (error == 0) gotattr = 1; } @@ -480,7 +491,7 @@ nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { /* For a pNFS server, set the attributes on the DS file. */ error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, - NULL, NULL, NULL, nvap, NULL); + NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); if (error == ENOENT) error = 0; } @@ -722,43 +733,21 @@ int nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { - struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; - struct iovec *ivp = iv; + struct iovec *iv; struct uio io, *uiop = &io; - struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; - int i, len, tlen, error = 0; + struct mbuf *mp, *mp3; + int len, tlen, error = 0; - len = 0; - i = 0; - while (len < NFS_MAXPATHLEN) { - NFSMGET(mp); - MCLGET(mp, M_WAITOK); - mp->m_len = M_SIZE(mp); - if (len == 0) { - mp3 = mp2 = mp; - } else { - mp2->m_next = mp; - mp2 = mp; - } - if ((len + mp->m_len) > NFS_MAXPATHLEN) { - mp->m_len = NFS_MAXPATHLEN - len; - len = NFS_MAXPATHLEN; - } else { - len += mp->m_len; - } - ivp->iov_base = mtod(mp, caddr_t); - ivp->iov_len = mp->m_len; - i++; - ivp++; - } + len = NFS_MAXPATHLEN; + uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); uiop->uio_iov = iv; - uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; error = VOP_READLINK(vp, uiop, cred); + free(iv, M_TEMP); if (error) { m_freem(mp3); *lenp = 0; @@ -779,31 +768,20 @@ nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, } /* - * Read vnode op call into mbuf list. + * Create an mbuf chain and an associated iovec that can be used to Read + * or Getextattr of data. + * Upon success, return pointers to the first and last mbufs in the chain + * plus the malloc'd iovec and its iovlen. */ -int -nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, - struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) +static int +nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, + struct iovec **ivp) { - struct mbuf *m; - int i; + struct mbuf *m, *m2 = NULL, *m3; struct iovec *iv; - struct iovec *iv2; - int error = 0, len, left, siz, tlen, ioflag = 0; - struct mbuf *m2 = NULL, *m3; - struct uio io, *uiop = &io; - struct nfsheur *nh; + int i, left, siz; - /* - * Attempt to read from a DS file. A return of ENOENT implies - * there is no DS file to read. - */ - error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, - NULL, mpendp, NULL, NULL); - if (error != ENOENT) - return (error); - - len = left = NFSM_RNDUP(cnt); + left = len; m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. @@ -822,9 +800,7 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, m3 = m; m2 = m; } - iv = malloc(i * sizeof (struct iovec), - M_TEMP, M_WAITOK); - uiop->uio_iov = iv2 = iv; + *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); m = m3; left = len; i = 0; @@ -842,7 +818,37 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, } m = m->m_next; } - uiop->uio_iovcnt = i; + *mpp = m3; + *mpendp = m2; + return (i); +} + +/* + * Read vnode op call into mbuf list. + */ +int +nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, + struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) +{ + struct mbuf *m; + struct iovec *iv; + int error = 0, len, tlen, ioflag = 0; + struct mbuf *m3; + struct uio io, *uiop = &io; + struct nfsheur *nh; + + /* + * Attempt to read from a DS file. A return of ENOENT implies + * there is no DS file to read. + */ + error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, + NULL, mpendp, NULL, NULL, NULL, 0, NULL); + if (error != ENOENT) + return (error); + + len = NFSM_RNDUP(cnt); + uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); + uiop->uio_iov = iv; uiop->uio_offset = off; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; @@ -853,7 +859,7 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); - free(iv2, M_TEMP); + free(iv, M_TEMP); if (error) { m_freem(m3); *mpp = NULL; @@ -869,7 +875,7 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, } else if (len != tlen || tlen != cnt) nfsrv_adj(m3, len - tlen, tlen - cnt); *mpp = m3; - *mpendp = m2; + *mpendp = m; out: NFSEXITCODE(error); @@ -877,34 +883,44 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, } /* - * Write vnode op from an mbuf list. + * Create the iovec for the mbuf chain passed in as an argument. + * The "cp" argument is where the data starts within the first mbuf in + * the chain. It returns the iovec and the iovcnt. */ -int -nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable, - struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) +static int +nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, + int *iovcntp) { + struct mbuf *mp; struct iovec *ivp; - int i, len; - struct iovec *iv; - int ioflags, error; - struct uio io, *uiop = &io; - struct nfsheur *nh; + int cnt, i, len; /* - * Attempt to write to a DS file. A return of ENOENT implies - * there is no DS file to write. + * Loop through the mbuf chain, counting how many mbufs are a + * part of this write operation, so the iovec size is known. */ - error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, - &mp, cp, NULL, NULL, NULL); - if (error != ENOENT) { - *stable = NFSWRITE_FILESYNC; - return (error); + cnt = 0; + len = retlen; + mp = m; + i = mtod(mp, caddr_t) + mbuf_len(mp) - cp; + while (len > 0) { + if (i > 0) { + len -= i; + cnt++; + } + mp = mbuf_next(mp); + if (!mp) { + if (len > 0) + return (EBADRPC); + } else + i = mbuf_len(mp); } - ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, + /* Now, create the iovec. */ + mp = m; + *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); - uiop->uio_iov = iv = ivp; - uiop->uio_iovcnt = cnt; + *iovcntp = cnt; i = mtod(mp, caddr_t) + mp->m_len - cp; len = retlen; while (len > 0) { @@ -923,11 +939,42 @@ nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable, cp = mtod(mp, caddr_t); } } + return (0); +} + +/* + * Write vnode op from an mbuf list. + */ +int +nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, + struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) +{ + struct iovec *iv; + int cnt, ioflags, error; + struct uio io, *uiop = &io; + struct nfsheur *nh; + + /* + * Attempt to write to a DS file. A return of ENOENT implies + * there is no DS file to write. + */ + error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, + &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); + if (error != ENOENT) { + *stable = NFSWRITE_FILESYNC; + return (error); + } + if (*stable == NFSWRITE_UNSTABLE) ioflags = IO_NODELOCKED; else ioflags = (IO_SYNC | IO_NODELOCKED); + error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); + if (error != 0) + return (error); + uiop->uio_iov = iv; + uiop->uio_iovcnt = cnt; uiop->uio_resid = retlen; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; @@ -1249,7 +1296,8 @@ nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, if (vp->v_type == VDIR) error = NFSERR_ISDIR; else if (is_v4) - error = nfsrv_checkremove(vp, 1, p); + error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0), + p); if (error == 0) nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); if (!error) @@ -1379,12 +1427,14 @@ nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, } if (ndflag & ND_NFSV4) { if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { - error = nfsrv_checkremove(fvp, 0, p); + error = nfsrv_checkremove(fvp, 0, NULL, + (nfsquad_t)((u_quad_t)0), p); NFSVOPUNLOCK(fvp, 0); } else error = EPERM; if (tvp && !error) - error = nfsrv_checkremove(tvp, 1, p); + error = nfsrv_checkremove(tvp, 1, NULL, + (nfsquad_t)((u_quad_t)0), p); } else { /* * For NFSv2 and NFSv3, try to get rid of the delegation, so @@ -4380,7 +4430,7 @@ nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) /* Do this as root so that it won't fail with EACCES. */ tcred = newnfs_getcred(); error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, - NULL, NULL, NULL, nap, NULL); + NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); NFSFREECRED(tcred); return (error); } @@ -4395,14 +4445,15 @@ nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, int error; error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, - NULL, NULL, NULL, NULL, aclp); + NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); return (error); } static int nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, char *cp, - struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp) + struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, + off_t *offp, int content, bool *eofp) { struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; fhandle_t fh[NFSDEV_MAXMIRRORS]; @@ -4506,7 +4557,7 @@ nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, origmircnt = mirrorcnt; /* * If failpos is set to a mirror#, then that mirror has - * failed and will be disabled. For Read and Getattr, the + * failed and will be disabled. For Read, Getattr and Seek, the * function only tries one mirror, so if that mirror has * failed, it will need to be retried. As such, increment * tryitagain for these cases. @@ -4539,6 +4590,22 @@ nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, else if (ioproc == NFSPROC_SETACL) error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, aclp, &failpos); + else if (ioproc == NFSPROC_SEEKDS) { + error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, + p, nmp[0]); + if (nfsds_failerr(error) && mirrorcnt > 1) { + /* + * Setting failpos will cause the mirror + * to be disabled and then a retry of this + * read is required. + */ + failpos = 0; + error = 0; + trycnt++; + } + } else if (ioproc == NFSPROC_ALLOCATE) + error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, + &nmp[0], mirrorcnt, &failpos); else { error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, vp, nmp[mirrorcnt - 1], nap); @@ -5163,6 +5230,165 @@ nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, return (error); } +/* + * Do a allocate RPC on a DS data file, using this structure for the arguments, + * so that this function can be executed by a separate kernel process. + */ +struct nfsrvallocatedsdorpc { + int done; + int inprog; + struct task tsk; + fhandle_t fh; + off_t off; + off_t len; + struct nfsmount *nmp; + struct ucred *cred; + NFSPROC_T *p; + int err; +}; + +static int +nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, + off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript *nd; + nfsattrbit_t attrbits; + nfsv4stateid_t st; + int error; + + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, + sizeof(fhandle_t), NULL, NULL, 0, 0); + + /* + * Use a stateid where other is an alternating 01010 pattern and + * seqid is 0xffffffff. This value is not defined as special by + * the RFC and is used by the FreeBSD NFS server to indicate an + * MDS->DS proxy operation. + */ + st.other[0] = 0x55555555; + st.other[1] = 0x55555555; + st.other[2] = 0x55555555; + st.seqid = 0xffffffff; + nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(off, tl); tl += 2; + txdr_hyper(len, tl); tl += 2; + NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); + + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", + nd->nd_repstat); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); + } else + error = nd->nd_repstat; + NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); +nfsmout: + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); + return (error); +} + +/* + * Start up the thread that will execute nfsrv_allocatedsdorpc(). + */ +static void +start_allocatedsdorpc(void *arg, int pending) +{ + struct nfsrvallocatedsdorpc *drpc; + + drpc = (struct nfsrvallocatedsdorpc *)arg; + drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, + drpc->len, NULL, drpc->cred, drpc->p); + drpc->done = 1; + NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); +} + +static int +nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, + NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, + int *failposp) +{ + struct nfsrvallocatedsdorpc *drpc, *tdrpc; + struct nfsvattr na; + int error, i, ret, timo; + + NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); + drpc = NULL; + if (mirrorcnt > 1) + tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, + M_WAITOK); + + /* + * Do the allocate RPC for every DS, using a separate kernel process + * for every DS except the last one. + */ + error = 0; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + tdrpc->done = 0; + NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); + tdrpc->off = off; + tdrpc->len = len; + tdrpc->nmp = *nmpp; + tdrpc->cred = cred; + tdrpc->p = p; + tdrpc->inprog = 0; + tdrpc->err = 0; + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); + NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", + ret); + } + if (ret != 0) { + ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, + cred, p); + if (nfsds_failerr(ret) && *failposp == -1) + *failposp = i; + else if (error == 0 && ret != 0) + error = ret; + } + nmpp++; + fhp++; + } + ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); + if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) + *failposp = mirrorcnt - 1; + else if (error == 0 && ret != 0) + error = ret; + if (error == 0) + error = nfsrv_setextattr(vp, &na, p); + NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + /* Wait for RPCs on separate threads to complete. */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); + if (nfsds_failerr(tdrpc->err) && *failposp == -1) + *failposp = i; + else if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; + } + free(drpc, M_TEMP); + return (error); +} + static int nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, @@ -5550,6 +5776,59 @@ nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, return (error); } +/* + * Seek call to a DS. + */ +static int +nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, + struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) +{ + uint32_t *tl; + struct nfsrv_descript *nd; + nfsv4stateid_t st; + int error; + + NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); + /* + * Use a stateid where other is an alternating 01010 pattern and + * seqid is 0xffffffff. This value is not defined as special by + * the RFC and is used by the FreeBSD NFS server to indicate an + * MDS->DS proxy operation. + */ + st.other[0] = 0x55555555; + st.other[1] = 0x55555555; + st.other[2] = 0x55555555; + st.seqid = 0xffffffff; + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, + sizeof(fhandle_t), NULL, NULL, 0, 0); + nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(*offp, tl); tl += 2; + *tl = txdr_unsigned(content); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); + if (*tl++ == newnfs_true) + *eofp = true; + else + *eofp = false; + *offp = fxdr_hyper(tl); + } else + error = nd->nd_repstat; +nfsmout: + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); + return (error); +} + /* * Get the device id and file handle for a DS file. */ @@ -5777,6 +6056,286 @@ nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) return (error); } +/* + * Seek vnode op call (actually it is a VOP_IOCTL()). + * This function is called with the vnode locked, but unlocks and vrele()s + * the vp before returning. + */ +int +nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, + off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) +{ + struct nfsvattr at; + int error, ret; + + ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); + /* + * Attempt to seek on a DS file. A return of ENOENT implies + * there is no DS file to seek on. + */ + error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, + NULL, NULL, NULL, NULL, offp, content, eofp); + if (error != ENOENT) { + vput(vp); + return (error); + } + + /* + * Do the VOP_IOCTL() call. For the case where *offp == file_size, + * VOP_IOCTL() will return ENXIO. However, the correct reply for + * NFSv4.2 is *eofp == true and error == 0 for this case. + */ + NFSVOPUNLOCK(vp, 0); + error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); + *eofp = false; + if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { + /* Handle the cases where we might be at EOF. */ + ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); + if (ret == 0 && *offp == at.na_size) { + *eofp = true; + error = 0; + } + if (ret != 0 && error == 0) + error = ret; + } + vrele(vp); + NFSEXITCODE(error); + return (error); +} + +/* + * Allocate vnode op call. + */ +int +nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, + NFSPROC_T *p) +{ + int error, trycnt; + + ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); + /* + * Attempt to allocate on a DS file. A return of ENOENT implies + * there is no DS file to allocate on. + */ + error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, + NULL, NULL, NULL, NULL, &len, 0, NULL); + if (error != ENOENT) + return (error); + error = 0; + + /* + * Do the actual VOP_ALLOCATE(), looping a reasonable number of + * times to achieve completion. + */ + trycnt = 0; + while (error == 0 && len > 0 && trycnt++ < 20) + error = VOP_ALLOCATE(vp, &off, &len); + if (error == 0 && len > 0) + error = NFSERR_IO; + NFSEXITCODE(error); + return (error); +} + +/* + * Get Extended Atribute vnode op into an mbuf list. + */ +int +nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, + struct ucred *cred, struct thread *p, struct mbuf **mpp, + struct mbuf **mpendp, int *lenp) +{ + struct iovec *iv; + struct uio io, *uiop = &io; + struct mbuf *m, *m2; + int alen, error, len, tlen; + size_t siz; + + /* First, find out the size of the extended attribute. */ + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, + &siz, cred, p); + if (error != 0) + return (NFSERR_NOXATTR); + if (siz > maxresp - NFS_MAXXDR) + return (NFSERR_XATTR2BIG); + len = siz; + tlen = NFSM_RNDUP(len); + uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, &iv); + uiop->uio_iov = iv; + uiop->uio_offset = 0; + uiop->uio_resid = tlen; + uiop->uio_rw = UIO_READ; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_td = p; +#ifdef MAC + error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, + name); + if (error != 0) + goto out; +#endif + + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, NULL, + cred, p); + if (error != 0) + goto out; + if (uiop->uio_resid > 0) { + alen = tlen; + len = tlen - uiop->uio_resid; + tlen = NFSM_RNDUP(len); + if (alen != tlen) + printf("nfsvno_getxattr: weird size read\n"); + nfsrv_adj(m, alen - tlen, tlen - len); + } + *lenp = len; + *mpp = m; + *mpendp = m2; + +out: + if (error != 0) { + m_freem(m); + *lenp = 0; + } + free(iv, M_TEMP); + NFSEXITCODE(error); + return (error); +} + +/* + * Set Extended attribute vnode op from an mbuf list. + */ +int +nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, + char *cp, struct ucred *cred, struct thread *p) +{ + struct iovec *iv; + struct uio uio, *uiop = &uio; + int cnt, error; + +#ifdef MAC + error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, + name); + if (error != 0) + goto out; +#endif + + uiop->uio_rw = UIO_WRITE; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_td = p; + uiop->uio_offset = 0; + uiop->uio_resid = len; + error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); + uiop->uio_iov = iv; + uiop->uio_iovcnt = cnt; + if (error == 0) { + error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, + cred, p); + free(iv, M_TEMP); + } + +out: + NFSEXITCODE(error); + return (error); +} + +/* + * Remove Extended attribute vnode op. + */ +int +nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, + struct ucred *cred, struct thread *p) +{ + int error; + + /* + * Get rid of any delegations. I am not sure why this is required, + * but RFC-8276 says so. + */ + error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); + if (error != 0) + goto out; +#ifdef MAC + error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, + name); + if (error != 0) + goto out; +#endif + + error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); + if (error == EOPNOTSUPP) + error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, + cred, p); +#ifdef MAC +out: +#endif + NFSEXITCODE(error); + return (error); +} + +/* + * List Extended Atribute vnode op into an mbuf list. + */ +int +nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, + struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) +{ + struct iovec iv; + struct uio io; + int error; + size_t siz; + + *bufp = NULL; + /* First, find out the size of the extended attribute. */ + error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, + p); + if (error != 0) + return (NFSERR_NOXATTR); + if (siz <= cookie) { + *lenp = 0; + *eofp = true; + goto out; + } + if (siz > cookie + *lenp) { + siz = cookie + *lenp; + *eofp = false; + } else + *eofp = true; + /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ + if (siz > 10 * 1024 * 1024) { + error = NFSERR_XATTR2BIG; + goto out; + } + *bufp = malloc(siz, M_TEMP, M_WAITOK); + iv.iov_base = *bufp; + iv.iov_len = siz; + io.uio_iovcnt = 1; + io.uio_iov = &iv; + io.uio_offset = 0; + io.uio_resid = siz; + io.uio_rw = UIO_READ; + io.uio_segflg = UIO_SYSSPACE; + io.uio_td = p; +#ifdef MAC + error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); + if (error != 0) + goto out; +#endif + + error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, + p); + if (error != 0) + goto out; + if (io.uio_resid > 0) + siz -= io.uio_resid; + *lenp = siz; + +out: + if (error != 0) { + free(*bufp, M_TEMP); + *bufp = NULL; + } + NFSEXITCODE(error); + return (error); +} + extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index 830fd23d6f1c..76d4b93cfd37 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -50,6 +50,8 @@ __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include +#include +#include /* Global vars */ extern u_int32_t newnfs_false, newnfs_true; @@ -65,6 +67,7 @@ extern int nfsd_debuglevel; extern u_long sb_max_adj; extern int nfsrv_pnfsatime; extern int nfsrv_maxpnfsmirror; +extern int nfs_maxcopyrange; #endif /* !APPLEKEXT */ static int nfs_async = 0; @@ -74,6 +77,10 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, extern int nfsrv_doflexfile; SYSCTL_INT(_vfs_nfsd, OID_AUTO, default_flexfile, CTLFLAG_RW, &nfsrv_doflexfile, 0, "Make Flex File Layout the default for pNFS"); +static int nfsrv_linux42server = 1; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, linux42server, CTLFLAG_RW, + &nfsrv_linux42server, 0, + "Enable Linux style NFSv4.2 server (non-RFC compliant)"); /* * This list defines the GSS mechanisms supported. @@ -121,7 +128,8 @@ nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram, if ((nd->nd_flag & ND_NFSV4) && (nfsmode & ~(NFSACCESS_READ | NFSACCESS_LOOKUP | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE | - NFSACCESS_EXECUTE))) { + NFSACCESS_EXECUTE | NFSACCESS_XAREAD | NFSACCESS_XAWRITE | + NFSACCESS_XALIST))) { nd->nd_repstat = NFSERR_INVAL; vput(vp); goto out; @@ -144,6 +152,24 @@ nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_EXTEND; } + if (nfsmode & NFSACCESS_XAREAD) { + supported |= NFSACCESS_XAREAD; + if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, + NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) + nfsmode &= ~NFSACCESS_XAREAD; + } + if (nfsmode & NFSACCESS_XAWRITE) { + supported |= NFSACCESS_XAWRITE; + if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, + NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) + nfsmode &= ~NFSACCESS_XAWRITE; + } + if (nfsmode & NFSACCESS_XALIST) { + supported |= NFSACCESS_XALIST; + if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, + NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) + nfsmode &= ~NFSACCESS_XALIST; + } if (nfsmode & NFSACCESS_DELETE) { supported |= NFSACCESS_DELETE; if (vp->v_type == VDIR) @@ -864,9 +890,7 @@ APPLESTATIC int nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { - int i, cnt; u_int32_t *tl; - mbuf_t mp; struct nfsvattr nva, forat; int aftat_ret = 1, retlen, len, error = 0, forat_ret = 1; int gotproxystateid, stable = NFSWRITE_FILESYNC; @@ -948,28 +972,6 @@ nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, lop->lo_end = NFS64BITSSET; } - /* - * Loop through the mbuf chain, counting how many mbufs are a - * part of this write operation, so the iovec size is known. - */ - cnt = 0; - mp = nd->nd_md; - i = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - nd->nd_dpos; - while (len > 0) { - if (i > 0) { - len -= i; - cnt++; - } - mp = mbuf_next(mp); - if (!mp) { - if (len > 0) { - error = EBADRPC; - goto nfsmout; - } - } else - i = mbuf_len(mp); - } - if (retlen > NFS_SRVMAXIO || retlen < 0) nd->nd_repstat = EIO; if (vnode_vtype(vp) != VREG && !nd->nd_repstat) { @@ -1011,7 +1013,7 @@ nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, * which is to return ok so long as there are no permission problems. */ if (retlen > 0) { - nd->nd_repstat = nfsvno_write(vp, off, retlen, cnt, &stable, + nd->nd_repstat = nfsvno_write(vp, off, retlen, &stable, nd->nd_md, nd->nd_dpos, nd->nd_cred, p); error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1); if (error) @@ -4075,6 +4077,8 @@ nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, clp->lc_flags |= LCL_GSSPRIVACY; } else clp->lc_flags = LCL_NFSV41; + if ((nd->nd_flag & ND_NFSV42) != 0) + clp->lc_flags |= LCL_NFSV42; if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; @@ -4751,6 +4755,229 @@ nfsrvd_layoutreturn(struct nfsrv_descript *nd, __unused int isdgram, return (error); } +/* + * nfsv4 layout error service + */ +APPLESTATIC int +nfsrvd_layouterror(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + nfsv4stateid_t stateid; + int cnt, error = 0, i, stat; + int opnum __unused; + char devid[NFSX_V4DEVICEID]; + uint64_t offset, len; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + + NFSX_UNSIGNED); + offset = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + cnt = fxdr_unsigned(int, *tl); + NFSD_DEBUG(4, "layouterror off=%ju len=%ju cnt=%d\n", (uintmax_t)offset, + (uintmax_t)len, cnt); + /* + * For the special stateid of other all 0s and seqid == 1, set + * the stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + + /* + * Ignore offset, len and stateid for now. + */ + for (i = 0; i < cnt; i++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4DEVICEID + 2 * + NFSX_UNSIGNED); + NFSBCOPY(tl, devid, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + stat = fxdr_unsigned(int, *tl++); + opnum = fxdr_unsigned(int, *tl); + NFSD_DEBUG(4, "nfsrvd_layouterr op=%d stat=%d\n", opnum, stat); + /* + * Except for NFSERR_ACCES and NFSERR_STALE errors, + * disable the mirror. + */ + if (stat != NFSERR_ACCES && stat != NFSERR_STALE) + nfsrv_delds(devid, curthread); + } +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfsv4 layout stats service + */ +APPLESTATIC int +nfsrvd_layoutstats(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + nfsv4stateid_t stateid; + int cnt, error = 0; + int layouttype __unused; + char devid[NFSX_V4DEVICEID] __unused; + uint64_t offset, len, readcount, readbytes, writecount, writebytes + __unused; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_HYPER + NFSX_STATEID + + NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED); + offset = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + readcount = fxdr_hyper(tl); tl += 2; + readbytes = fxdr_hyper(tl); tl += 2; + writecount = fxdr_hyper(tl); tl += 2; + writebytes = fxdr_hyper(tl); tl += 2; + NFSBCOPY(tl, devid, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + layouttype = fxdr_unsigned(int, *tl++); + cnt = fxdr_unsigned(int, *tl); + error = nfsm_advance(nd, NFSM_RNDUP(cnt), -1); + if (error != 0) + goto nfsmout; + NFSD_DEBUG(4, "layoutstats cnt=%d\n", cnt); + /* + * For the special stateid of other all 0s and seqid == 1, set + * the stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + + /* + * No use for the stats for now. + */ +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfsv4 io_advise service + */ +APPLESTATIC int +nfsrvd_ioadvise(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + nfsv4stateid_t stateid; + nfsattrbit_t hints; + int error = 0, ret; + off_t offset, len; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER); + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + offset = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); + error = nfsrv_getattrbits(nd, &hints, NULL, NULL); + if (error != 0) + goto nfsmout; + /* + * For the special stateid of other all 0s and seqid == 1, set + * the stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + + if (offset < 0) { + nd->nd_repstat = NFSERR_INVAL; + goto nfsmout; + } + if (len < 0) + len = 0; + if (vp->v_type != VREG) { + if (vp->v_type == VDIR) + nd->nd_repstat = NFSERR_ISDIR; + else + nd->nd_repstat = NFSERR_WRONGTYPE; + goto nfsmout; + } + + /* + * For now, we can only handle WILLNEED and DONTNEED and don't use + * the stateid. + */ + if ((NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED) && + !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED)) || + (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED) && + !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED))) { + NFSVOPUNLOCK(vp, 0); + if (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED)) { + ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_WILLNEED); + NFSZERO_ATTRBIT(&hints); + if (ret == 0) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED); + else + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); + } else { + ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_DONTNEED); + NFSZERO_ATTRBIT(&hints); + if (ret == 0) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED); + else + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); + } + vrele(vp); + } else { + NFSZERO_ATTRBIT(&hints); + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); + vput(vp); + } + nfsrv_putattrbit(nd, &hints); + NFSEXITCODE2(error, nd); + return (error); +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + /* * nfsv4 getdeviceinfo service */ @@ -4868,6 +5095,737 @@ nfsrvd_teststateid(struct nfsrv_descript *nd, __unused int isdgram, return (error); } +/* + * nfs allocate service + */ +APPLESTATIC int +nfsrvd_allocate(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + struct nfsvattr forat; + int error = 0, forat_ret = 1, gotproxystateid; + off_t off, len; + struct nfsstate st, *stp = &st; + struct nfslock lo, *lop = &lo; + nfsv4stateid_t stateid; + nfsquad_t clientid; + nfsattrbit_t attrbits; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + gotproxystateid = 0; + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER); + stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); + lop->lo_flags = NFSLCK_WRITE; + stp->ls_ownerlen = 0; + stp->ls_op = NULL; + stp->ls_uid = nd->nd_cred->cr_uid; + stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); + clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; + clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; + if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { + if ((nd->nd_flag & ND_NFSV41) != 0) + clientid.qval = nd->nd_clientid.qval; + else if (nd->nd_clientid.qval != clientid.qval) + printf("EEK2 multiple clids\n"); + } else { + if ((nd->nd_flag & ND_NFSV41) != 0) + printf("EEK! no clientid from session\n"); + nd->nd_flag |= ND_IMPLIEDCLID; + nd->nd_clientid.qval = clientid.qval; + } + stp->ls_stateid.other[2] = *tl++; + /* + * Don't allow this to be done for a DS. + */ + if ((nd->nd_flag & ND_DSSERVER) != 0) + nd->nd_repstat = NFSERR_NOTSUPP; + /* However, allow the proxy stateid. */ + if (stp->ls_stateid.seqid == 0xffffffff && + stp->ls_stateid.other[0] == 0x55555555 && + stp->ls_stateid.other[1] == 0x55555555 && + stp->ls_stateid.other[2] == 0x55555555) + gotproxystateid = 1; + off = fxdr_hyper(tl); tl += 2; + lop->lo_first = off; + len = fxdr_hyper(tl); + lop->lo_end = off + len; + /* + * Paranoia, just in case it wraps around, which shouldn't + * ever happen anyhow. + */ + if (nd->nd_repstat == 0 && (lop->lo_end < lop->lo_first || len <= 0)) + nd->nd_repstat = NFSERR_INVAL; + + if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); + forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits); + if (nd->nd_repstat == 0) + nd->nd_repstat = forat_ret; + if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat == 0 && gotproxystateid == 0) + nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, + &stateid, exp, nd, curthread); + + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_allocate(vp, off, len, nd->nd_cred, + curthread); + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfs copy service + */ +APPLESTATIC int +nfsrvd_copy_file_range(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, vnode_t tovp, struct nfsexstuff *exp, struct nfsexstuff *toexp) +{ + uint32_t *tl; + struct nfsvattr at; + int cnt, error = 0, ret; + off_t inoff, outoff; + uint64_t len; + size_t xfer; + struct nfsstate inst, outst, *instp = &inst, *outstp = &outst; + struct nfslock inlo, outlo, *inlop = &inlo, *outlop = &outlo; + nfsquad_t clientid; + nfsv4stateid_t stateid; + nfsattrbit_t attrbits; + void *rl_rcookie, *rl_wcookie; + + rl_rcookie = rl_wcookie = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + if (nfsrv_devidcnt > 0) { + /* + * For a pNFS server, reply NFSERR_NOTSUPP so that the client + * will do the copy via I/O on the DS(s). + */ + nd->nd_repstat = NFSERR_NOTSUPP; + goto nfsmout; + } + if (vp == tovp) { + /* Copying a byte range within the same file is not allowed. */ + nd->nd_repstat = NFSERR_INVAL; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_STATEID + 3 * NFSX_HYPER + + 3 * NFSX_UNSIGNED); + instp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); + inlop->lo_flags = NFSLCK_READ; + instp->ls_ownerlen = 0; + instp->ls_op = NULL; + instp->ls_uid = nd->nd_cred->cr_uid; + instp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + clientid.lval[0] = instp->ls_stateid.other[0] = *tl++; + clientid.lval[1] = instp->ls_stateid.other[1] = *tl++; + if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) + clientid.qval = nd->nd_clientid.qval; + instp->ls_stateid.other[2] = *tl++; + outstp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); + outlop->lo_flags = NFSLCK_WRITE; + outstp->ls_ownerlen = 0; + outstp->ls_op = NULL; + outstp->ls_uid = nd->nd_cred->cr_uid; + outstp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + outstp->ls_stateid.other[0] = *tl++; + outstp->ls_stateid.other[1] = *tl++; + outstp->ls_stateid.other[2] = *tl++; + inoff = fxdr_hyper(tl); tl += 2; + inlop->lo_first = inoff; + outoff = fxdr_hyper(tl); tl += 2; + outlop->lo_first = outoff; + len = fxdr_hyper(tl); tl += 2; + if (len == 0) { + /* len == 0 means to EOF. */ + inlop->lo_end = OFF_MAX; + outlop->lo_end = OFF_MAX; + } else { + inlop->lo_end = inlop->lo_first + len; + outlop->lo_end = outlop->lo_first + len; + } + + /* + * At this time only consecutive, synchronous copy is supported, + * so ca_consecutive and ca_synchronous can be ignored. + */ + tl += 2; + + cnt = fxdr_unsigned(int, *tl); + if ((nd->nd_flag & ND_DSSERVER) != 0 || cnt != 0) + nd->nd_repstat = NFSERR_NOTSUPP; + if (nd->nd_repstat == 0 && (inoff > OFF_MAX || outoff > OFF_MAX || + inlop->lo_end > OFF_MAX || outlop->lo_end > OFF_MAX || + inlop->lo_end < inlop->lo_first || outlop->lo_end < + outlop->lo_first)) + nd->nd_repstat = NFSERR_INVAL; + + if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + + /* Check permissions for the input file. */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); + ret = nfsvno_getattr(vp, &at, nd, curthread, 1, &attrbits); + if (nd->nd_repstat == 0) + nd->nd_repstat = ret; + if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsrv_lockctrl(vp, &instp, &inlop, NULL, + clientid, &stateid, exp, nd, curthread); + NFSVOPUNLOCK(vp, 0); + if (nd->nd_repstat != 0) + goto out; + + error = NFSVOPLOCK(tovp, LK_SHARED); + if (error != 0) + goto out; + if (vnode_vtype(tovp) != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + + /* For the output file, we only need the Owner attribute. */ + ret = nfsvno_getattr(tovp, &at, nd, curthread, 1, &attrbits); + if (nd->nd_repstat == 0) + nd->nd_repstat = ret; + if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(tovp, VWRITE, nd->nd_cred, toexp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsrv_lockctrl(tovp, &outstp, &outlop, NULL, + clientid, &stateid, toexp, nd, curthread); + NFSVOPUNLOCK(tovp, 0); + + /* Range lock the byte ranges for both invp and outvp. */ + if (nd->nd_repstat == 0) { + for (;;) { + if (len == 0) { + rl_wcookie = vn_rangelock_wlock(tovp, outoff, + OFF_MAX); + rl_rcookie = vn_rangelock_tryrlock(vp, inoff, + OFF_MAX); + } else { + rl_wcookie = vn_rangelock_wlock(tovp, outoff, + outoff + len); + rl_rcookie = vn_rangelock_tryrlock(vp, inoff, + inoff + len); + } + if (rl_rcookie != NULL) + break; + vn_rangelock_unlock(tovp, rl_wcookie); + if (len == 0) + rl_rcookie = vn_rangelock_rlock(vp, inoff, + OFF_MAX); + else + rl_rcookie = vn_rangelock_rlock(vp, inoff, + inoff + len); + vn_rangelock_unlock(vp, rl_rcookie); + } + + error = NFSVOPLOCK(vp, LK_SHARED); + if (error == 0) { + ret = nfsvno_getattr(vp, &at, nd, curthread, 1, NULL); + if (ret == 0) { + /* + * Since invp is range locked, na_size should + * not change. + */ + if (len == 0 && at.na_size > inoff) { + /* + * If len == 0, set it based on invp's + * size. If offset is past EOF, just + * leave len == 0. + */ + len = at.na_size - inoff; + } else if (nfsrv_linux42server == 0 && + inoff + len > at.na_size) { + /* + * RFC-7862 says that NFSERR_INVAL must + * be returned when inoff + len exceeds + * the file size, however the NFSv4.2 + * Linux client likes to do this, so + * only check if nfsrv_linux42server + * is not set. + */ + nd->nd_repstat = NFSERR_INVAL; + } + } + NFSVOPUNLOCK(vp, 0); + if (ret != 0 && nd->nd_repstat == 0) + nd->nd_repstat = ret; + } else if (nd->nd_repstat == 0) + nd->nd_repstat = error; + } + + /* + * Do the actual copy to an upper limit of vfs.nfs.maxcopyrange. + * This limit is applied to ensure that the RPC replies in a + * reasonable time. + */ + if (len > nfs_maxcopyrange) + xfer = nfs_maxcopyrange; + else + xfer = len; + if (nd->nd_repstat == 0) { + nd->nd_repstat = vn_copy_file_range(vp, &inoff, tovp, &outoff, + &xfer, 0, nd->nd_cred, nd->nd_cred, NULL); + if (nd->nd_repstat == 0) + len = xfer; + } + + /* Unlock the ranges. */ + if (rl_rcookie != NULL) + vn_rangelock_unlock(vp, rl_rcookie); + if (rl_wcookie != NULL) + vn_rangelock_unlock(tovp, rl_wcookie); + + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_HYPER + + NFSX_VERF); + *tl++ = txdr_unsigned(0); /* No callback ids. */ + txdr_hyper(len, tl); tl += 2; + *tl++ = txdr_unsigned(NFSWRITE_UNSTABLE); + *tl++ = txdr_unsigned(nfsboottime.tv_sec); + *tl++ = txdr_unsigned(nfsboottime.tv_usec); + *tl++ = newnfs_true; + *tl = newnfs_true; + } +out: + vrele(vp); + vrele(tovp); + NFSEXITCODE2(error, nd); + return (error); +nfsmout: + vput(vp); + vrele(tovp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfs seek service + */ +APPLESTATIC int +nfsrvd_seek(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + struct nfsvattr at; + int content, error = 0; + off_t off; + u_long cmd; + nfsattrbit_t attrbits; + bool eof; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + NFSX_HYPER + NFSX_UNSIGNED); + /* Ignore the stateid for now. */ + tl += (NFSX_STATEID / NFSX_UNSIGNED); + off = fxdr_hyper(tl); tl += 2; + content = fxdr_unsigned(int, *tl); + if (content == NFSV4CONTENT_DATA) + cmd = FIOSEEKDATA; + else if (content == NFSV4CONTENT_HOLE) + cmd = FIOSEEKHOLE; + else + nd->nd_repstat = NFSERR_BADXDR; + if (nd->nd_repstat == 0 && vnode_vtype(vp) == VDIR) + nd->nd_repstat = NFSERR_ISDIR; + if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + if (nd->nd_repstat == 0 && off < 0) + nd->nd_repstat = NFSERR_NXIO; + if (nd->nd_repstat == 0) { + /* Check permissions for the input file. */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); + nd->nd_repstat = nfsvno_getattr(vp, &at, nd, curthread, 1, + &attrbits); + } + if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat != 0) + goto nfsmout; + + /* nfsvno_seek() unlocks and vrele()s the vp. */ + nd->nd_repstat = nfsvno_seek(nd, vp, cmd, &off, content, &eof, + nd->nd_cred, curthread); + if (nd->nd_repstat == 0 && eof && content == NFSV4CONTENT_DATA && + nfsrv_linux42server != 0) + nd->nd_repstat = NFSERR_NXIO; + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); + if (eof) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + txdr_hyper(off, tl); + } + NFSEXITCODE2(error, nd); + return (error); +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfs get extended attribute service + */ +APPLESTATIC int +nfsrvd_getxattr(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, __unused struct nfsexstuff *exp) +{ + uint32_t *tl; + mbuf_t mp = NULL, mpend = NULL; + int error, len; + char *name; + struct thread *p = curthread; + + error = 0; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(int, *tl); + if (len <= 0) { + nd->nd_repstat = NFSERR_BADXDR; + goto nfsmout; + } + if (len > EXTATTR_MAXNAMELEN) { + nd->nd_repstat = NFSERR_NOXATTR; + goto nfsmout; + } + name = malloc(len + 1, M_TEMP, M_WAITOK); + nd->nd_repstat = nfsrv_mtostr(nd, name, len); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_getxattr(vp, name, nd->nd_maxresp, + nd->nd_cred, p, &mp, &mpend, &len); + if (nd->nd_repstat == ENOATTR) + nd->nd_repstat = NFSERR_NOXATTR; + else if (nd->nd_repstat == EOPNOTSUPP) + nd->nd_repstat = NFSERR_NOTSUPP; + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(len); + mbuf_setnext(nd->nd_mb, mp); + nd->nd_mb = mpend; + nd->nd_bpos = NFSMTOD(mpend, caddr_t) + mbuf_len(mpend); + } + free(name, M_TEMP); + +nfsmout: + if (nd->nd_repstat == 0) + nd->nd_repstat = error; + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +} + +/* + * nfs set extended attribute service + */ +APPLESTATIC int +nfsrvd_setxattr(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, __unused struct nfsexstuff *exp) +{ + uint32_t *tl; + struct nfsvattr ova, nva; + nfsattrbit_t attrbits; + int error, len, opt; + char *name; + size_t siz; + struct thread *p = curthread; + + error = 0; + name = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + opt = fxdr_unsigned(int, *tl++); + len = fxdr_unsigned(int, *tl); + if (len <= 0) { + nd->nd_repstat = NFSERR_BADXDR; + goto nfsmout; + } + if (len > EXTATTR_MAXNAMELEN) { + nd->nd_repstat = NFSERR_NOXATTR; + goto nfsmout; + } + name = malloc(len + 1, M_TEMP, M_WAITOK); + error = nfsrv_mtostr(nd, name, len); + if (error != 0) + goto nfsmout; + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(int, *tl); + if (len <= 0 || len > IOSIZE_MAX) { + nd->nd_repstat = NFSERR_XATTR2BIG; + goto nfsmout; + } + switch (opt) { + case NFSV4SXATTR_CREATE: + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, + &siz, nd->nd_cred, p); + if (error != ENOATTR) + nd->nd_repstat = NFSERR_EXIST; + error = 0; + break; + case NFSV4SXATTR_REPLACE: + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, + &siz, nd->nd_cred, p); + if (error != 0) + nd->nd_repstat = NFSERR_NOXATTR; + break; + case NFSV4SXATTR_EITHER: + break; + default: + nd->nd_repstat = NFSERR_BADXDR; + } + if (nd->nd_repstat != 0) + goto nfsmout; + + /* Now, do the Set Extended attribute, with Change before and after. */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); + nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits); + if (nd->nd_repstat == 0) { + nd->nd_repstat = nfsvno_setxattr(vp, name, len, nd->nd_md, + nd->nd_dpos, nd->nd_cred, p); + if (nd->nd_repstat == ENXIO) + nd->nd_repstat = NFSERR_XATTR2BIG; + } + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsm_advance(nd, NFSM_RNDUP(len), -1); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); + *tl++ = newnfs_true; + txdr_hyper(ova.na_filerev, tl); tl += 2; + txdr_hyper(nva.na_filerev, tl); + } + +nfsmout: + free(name, M_TEMP); + if (nd->nd_repstat == 0) + nd->nd_repstat = error; + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +} + +/* + * nfs remove extended attribute service + */ +APPLESTATIC int +nfsrvd_rmxattr(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, __unused struct nfsexstuff *exp) +{ + uint32_t *tl; + struct nfsvattr ova, nva; + nfsattrbit_t attrbits; + int error, len; + char *name; + struct thread *p = curthread; + + error = 0; + name = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(int, *tl); + if (len <= 0) { + nd->nd_repstat = NFSERR_BADXDR; + goto nfsmout; + } + if (len > EXTATTR_MAXNAMELEN) { + nd->nd_repstat = NFSERR_NOXATTR; + goto nfsmout; + } + name = malloc(len + 1, M_TEMP, M_WAITOK); + error = nfsrv_mtostr(nd, name, len); + if (error != 0) + goto nfsmout; + + if ((nd->nd_flag & ND_IMPLIEDCLID) == 0) { + printf("EEK! nfsrvd_rmxattr: no implied clientid\n"); + error = NFSERR_NOXATTR; + goto nfsmout; + } + /* + * Now, do the Remove Extended attribute, with Change before and + * after. + */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); + nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits); + if (nd->nd_repstat == 0) { + nd->nd_repstat = nfsvno_rmxattr(nd, vp, name, nd->nd_cred, p); + if (nd->nd_repstat == ENOATTR) + nd->nd_repstat = NFSERR_NOXATTR; + } + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER); + txdr_hyper(ova.na_filerev, tl); tl += 2; + txdr_hyper(nva.na_filerev, tl); + } + +nfsmout: + free(name, M_TEMP); + if (nd->nd_repstat == 0) + nd->nd_repstat = error; + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +} + +/* + * nfs list extended attribute service + */ +APPLESTATIC int +nfsrvd_listxattr(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, __unused struct nfsexstuff *exp) +{ + uint32_t cnt, *tl, len, len2, i, pos, retlen; + int error; + uint64_t cookie, cookie2; + u_char *buf; + bool eof; + struct thread *p = curthread; + + error = 0; + buf = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + /* + * The cookie doesn't need to be in net byte order, but FreeBSD + * does so to make it more readable in packet traces. + */ + cookie = fxdr_hyper(tl); tl += 2; + len = fxdr_unsigned(uint32_t, *tl); + if (len == 0 || cookie >= IOSIZE_MAX) { + nd->nd_repstat = NFSERR_BADXDR; + goto nfsmout; + } + if (len > nd->nd_maxresp - NFS_MAXXDR) + len = nd->nd_maxresp - NFS_MAXXDR; + len2 = len; + nd->nd_repstat = nfsvno_listxattr(vp, cookie, nd->nd_cred, p, &buf, + &len, &eof); + if (nd->nd_repstat == EOPNOTSUPP) + nd->nd_repstat = NFSERR_NOTSUPP; + if (nd->nd_repstat == 0) { + cookie2 = cookie + len; + if (cookie2 < cookie) + nd->nd_repstat = NFSERR_BADXDR; + } + if (nd->nd_repstat == 0) { + /* Now copy the entries out. */ + retlen = NFSX_HYPER + 2 * NFSX_UNSIGNED; + if (len == 0 && retlen <= len2) { + /* The cookie was at eof. */ + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * + NFSX_UNSIGNED); + txdr_hyper(cookie2, tl); tl += 2; + *tl++ = txdr_unsigned(0); + *tl = newnfs_true; + goto nfsmout; + } + + /* Sanity check the cookie. */ + for (pos = 0; pos < len; pos += (i + 1)) { + if (pos == cookie) + break; + i = buf[pos]; + } + if (pos != cookie) { + nd->nd_repstat = NFSERR_INVAL; + goto nfsmout; + } + + /* Loop around copying the entrie(s) out. */ + cnt = 0; + len -= cookie; + i = buf[pos]; + while (i < len && len2 >= retlen + NFSM_RNDUP(i) + + NFSX_UNSIGNED) { + if (cnt == 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + + NFSX_UNSIGNED); + txdr_hyper(cookie2, tl); tl += 2; + } + retlen += nfsm_strtom(nd, &buf[pos + 1], i); + len -= (i + 1); + pos += (i + 1); + i = buf[pos]; + cnt++; + } + /* + * eof is set true/false by nfsvno_listxattr(), but if we + * can't copy all entries returned by nfsvno_listxattr(), + * we are not at eof. + */ + if (len > 0) + eof = false; + if (cnt > 0) { + /* *tl is set above. */ + *tl = txdr_unsigned(cnt); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + if (eof) + *tl = newnfs_true; + else + *tl = newnfs_false; + } else + nd->nd_repstat = NFSERR_TOOSMALL; + } + +nfsmout: + free(buf, M_TEMP); + if (nd->nd_repstat == 0) + nd->nd_repstat = error; + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +} + /* * nfsv4 service not supported */ diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c index f0e63abebc40..8215b96ba7e1 100644 --- a/sys/fs/nfsserver/nfs_nfsdsocket.c +++ b/sys/fs/nfsserver/nfs_nfsdsocket.c @@ -135,7 +135,7 @@ int (*nfsrv3_procs2[NFS_V3NPROCS])(struct nfsrv_descript *, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, }; -int (*nfsrv4_ops0[NFSV41_NOPS])(struct nfsrv_descript *, +int (*nfsrv4_ops0[NFSV42_NOPS])(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0, @@ -196,9 +196,26 @@ int (*nfsrv4_ops0[NFSV41_NOPS])(struct nfsrv_descript *, nfsrvd_notsupp, nfsrvd_destroyclientid, nfsrvd_reclaimcomplete, + nfsrvd_allocate, + (int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0, + nfsrvd_notsupp, + nfsrvd_notsupp, + nfsrvd_ioadvise, + nfsrvd_layouterror, + nfsrvd_layoutstats, + nfsrvd_notsupp, + nfsrvd_notsupp, + nfsrvd_notsupp, + nfsrvd_seek, + nfsrvd_notsupp, + nfsrvd_notsupp, + nfsrvd_getxattr, + nfsrvd_setxattr, + nfsrvd_listxattr, + nfsrvd_rmxattr, }; -int (*nfsrv4_ops1[NFSV41_NOPS])(struct nfsrv_descript *, +int (*nfsrv4_ops1[NFSV42_NOPS])(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, @@ -259,9 +276,26 @@ int (*nfsrv4_ops1[NFSV41_NOPS])(struct nfsrv_descript *, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, }; -int (*nfsrv4_ops2[NFSV41_NOPS])(struct nfsrv_descript *, +int (*nfsrv4_ops2[NFSV42_NOPS])(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, @@ -322,6 +356,23 @@ int (*nfsrv4_ops2[NFSV41_NOPS])(struct nfsrv_descript *, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + nfsrvd_copy_file_range, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, }; #endif /* !APPLEKEXT */ @@ -361,6 +412,17 @@ int nfsrv_writerpc[NFS_NPROCS] = { 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; +SYSCTL_DECL(_vfs_nfsd); +static int nfs_minminorv4 = NFSV4_MINORVERSION; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_min_minorversion4, CTLFLAG_RWTUN, + &nfs_minminorv4, 0, + "The lowest minor version of NFSv4 handled by the server"); + +static int nfs_maxminorv4 = NFSV42_MINORVERSION; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_max_minorversion4, CTLFLAG_RWTUN, + &nfs_maxminorv4, 0, + "The highest minor version of NFSv4 handled by the server"); + /* local functions */ static void nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, u_int32_t minorvers); @@ -373,7 +435,7 @@ static void nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, static int nfs_retfh[NFS_V3NPROCS] = { 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0 }; -extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; +extern struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS]; static int nfsv3to4op[NFS_V3NPROCS] = { NFSPROC_NULL, @@ -745,7 +807,10 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, (void) nfsm_strtom(nd, tag, taglen); NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); - if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) + if ((minorvers != NFSV4_MINORVERSION && + minorvers != NFSV41_MINORVERSION && + minorvers != NFSV42_MINORVERSION) || + minorvers < nfs_minminorv4 || minorvers > nfs_maxminorv4) nd->nd_repstat = NFSERR_MINORVERMISMATCH; if (nd->nd_repstat) numops = 0; @@ -765,9 +830,9 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, *repp = *tl; op = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "op=%d\n", op); - if (op < NFSV4OP_ACCESS || + if (op < NFSV4OP_ACCESS || op >= NFSV42_NOPS || (op >= NFSV4OP_NOPS && (nd->nd_flag & ND_NFSV41) == 0) || - (op >= NFSV41_NOPS && (nd->nd_flag & ND_NFSV41) != 0)) { + (op >= NFSV41_NOPS && (nd->nd_flag & ND_NFSV42) == 0)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp++ = txdr_unsigned(NFSV4OP_OPILLEGAL); *repp = nfsd_errmap(nd); diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index ce24aa62d636..217e74f72d6b 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -215,7 +215,6 @@ static void nfsrv_freealllayouts(void); static void nfsrv_freedevid(struct nfsdevice *ds); static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, struct nfsdevice **dsp); -static int nfsrv_delds(char *devid, NFSPROC_T *p); static void nfsrv_deleteds(struct nfsdevice *fndds); static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost); static void nfsrv_freealldevids(void); @@ -4455,6 +4454,8 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, nd->nd_flag |= ND_KERBV; if ((clp->lc_flags & LCL_NFSV41) != 0) nd->nd_flag |= ND_NFSV41; + if ((clp->lc_flags & LCL_NFSV42) != 0) + nd->nd_flag |= ND_NFSV42; nd->nd_repstat = 0; cred->cr_uid = clp->lc_uid; cred->cr_gid = clp->lc_gid; @@ -4653,7 +4654,10 @@ nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, (void)nfsm_strtom(nd, optag, len); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV41) != 0) { - *tl++ = txdr_unsigned(NFSV41_MINORVERSION); + if ((nd->nd_flag & ND_NFSV42) != 0) + *tl++ = txdr_unsigned(NFSV42_MINORVERSION); + else + *tl++ = txdr_unsigned(NFSV41_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE); @@ -5386,13 +5390,16 @@ nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, * delegations. */ APPLESTATIC int -nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) +nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd, + nfsquad_t clientid, NFSPROC_T *p) { + struct nfsclient *clp; struct nfsstate *stp; struct nfslockfile *lfp; int error, haslock = 0; fhandle_t nfh; + clp = NULL; /* * First, get the lock file structure. * (A return of -1 means no associated state, so remove ok.) @@ -5400,6 +5407,9 @@ nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); tryagain: NFSLOCKSTATE(); + if (error == 0 && clientid.qval != 0) + error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, + (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { @@ -5417,7 +5427,7 @@ nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) /* * Now, we must Recall any delegations. */ - error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p); + error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (error) { /* * nfsrv_cleandeleg() unlocks state for non-zero @@ -5554,7 +5564,8 @@ nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) starttime = NFSD_MONOSEC; do { if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { - error = nfsrv_checkremove(vp, 0, p); + error = nfsrv_checkremove(vp, 0, NULL, + (nfsquad_t)((u_quad_t)0), p); NFSVOPUNLOCK(vp, 0); } else error = EPERM; @@ -6200,6 +6211,10 @@ nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid, nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval; nd->nd_flag |= ND_IMPLIEDCLID; + /* Save maximum request and reply sizes. */ + nd->nd_maxreq = sep->sess_maxreq; + nd->nd_maxresp = sep->sess_maxresp; + /* * If this session handles the backchannel, save the nd_xprt for this * RPC, since this is the one being used. @@ -7747,7 +7762,7 @@ nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p) * point. * Also, returns an error instead of the nfsdevice found. */ -static int +APPLESTATIC int nfsrv_delds(char *devid, NFSPROC_T *p) { struct nfsdevice *ds, *fndds; @@ -7879,7 +7894,7 @@ nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost) * as defined for Flexible File Layout) in XDR. */ addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) + - 9 * NFSX_UNSIGNED; + 14 * NFSX_UNSIGNED; ds->nfsdev_flexaddrlen = addrlen; tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO); ds->nfsdev_flexaddr = (char *)tl; @@ -7891,7 +7906,12 @@ nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost) *tl++ = txdr_unsigned(strlen(addr)); NFSBCOPY(addr, tl, strlen(addr)); tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED); - *tl++ = txdr_unsigned(1); /* One NFS Version. */ + *tl++ = txdr_unsigned(2); /* Two NFS Versions. */ + *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */ + *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */ + *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */ + *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */ + *tl++ = newnfs_true; /* Tightly coupled. */ *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */ *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */ *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */ diff --git a/sys/fs/nfsserver/nfs_nfsdsubs.c b/sys/fs/nfsserver/nfs_nfsdsubs.c index f7df2793ceea..64ed01ced919 100644 --- a/sys/fs/nfsserver/nfs_nfsdsubs.c +++ b/sys/fs/nfsserver/nfs_nfsdsubs.c @@ -1544,7 +1544,7 @@ nfsrv_isannfserr(u_int32_t errval) if (errval == NFSERR_OK) return (errval); - if (errval >= NFSERR_BADHANDLE && errval <= NFSERR_DELEGREVOKED) + if (errval >= NFSERR_BADHANDLE && errval <= NFSERR_MAXERRVAL) return (errval); if (errval > 0 && errval <= NFSERR_REMOTE) return (nfsrv_v2errmap[errval - 1]); @@ -2121,6 +2121,8 @@ nfsd_getminorvers(struct nfsrv_descript *nd, u_char *tag, u_char **tagstrp, *tagstrp = tagstr; if (*minversp == NFSV41_MINORVERSION) nd->nd_flag |= ND_NFSV41; + else if (*minversp == NFSV42_MINORVERSION) + nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); nfsmout: if (error != 0) { if (tagstr != NULL && taglen > NFSV4_SMALLSTR)