diff --git a/sys/fs/nfs/nfs.h b/sys/fs/nfs/nfs.h index 2c811b7150fc..87f5716ed5bd 100644 --- a/sys/fs/nfs/nfs.h +++ b/sys/fs/nfs/nfs.h @@ -668,6 +668,8 @@ struct nfsrv_descript { uint32_t *nd_sequence; /* Sequence Op. ptr */ nfsv4stateid_t nd_curstateid; /* Current StateID */ nfsv4stateid_t nd_savedcurstateid; /* Saved Current StateID */ + uint32_t nd_maxreq; /* Max. request (session). */ + uint32_t nd_maxresp; /* Max. reply (session). */ }; #define nd_princlen nd_gssnamelen diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index 1a1e64cd823b..5f94ab31e02d 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -80,6 +80,7 @@ int nfs_pnfsio(task_fn_t *, void *); static int nfs_realign_test; static int nfs_realign_count; static struct ext_nfsstats oldnfsstats; +static struct nfsstatsov1 nfsstatsov1; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, @@ -580,11 +581,143 @@ nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) } else { error = copyin(uap->argp, &nfsstatver, sizeof(nfsstatver)); - if (error == 0 && nfsstatver.vers != NFSSTATS_V1) - error = EPERM; - if (error == 0) - error = copyout(&nfsstatsv1, uap->argp, - sizeof (nfsstatsv1)); + if (error == 0) { + if (nfsstatver.vers == NFSSTATS_OV1) { + /* Copy nfsstatsv1 to nfsstatsov1. */ + nfsstatsov1.attrcache_hits = + nfsstatsv1.attrcache_hits; + nfsstatsov1.attrcache_misses = + nfsstatsv1.attrcache_misses; + nfsstatsov1.lookupcache_hits = + nfsstatsv1.lookupcache_hits; + nfsstatsov1.lookupcache_misses = + nfsstatsv1.lookupcache_misses; + nfsstatsov1.direofcache_hits = + nfsstatsv1.direofcache_hits; + nfsstatsov1.direofcache_misses = + nfsstatsv1.direofcache_misses; + nfsstatsov1.accesscache_hits = + nfsstatsv1.accesscache_hits; + nfsstatsov1.accesscache_misses = + nfsstatsv1.accesscache_misses; + nfsstatsov1.biocache_reads = + nfsstatsv1.biocache_reads; + nfsstatsov1.read_bios = + nfsstatsv1.read_bios; + nfsstatsov1.read_physios = + nfsstatsv1.read_physios; + nfsstatsov1.biocache_writes = + nfsstatsv1.biocache_writes; + nfsstatsov1.write_bios = + nfsstatsv1.write_bios; + nfsstatsov1.write_physios = + nfsstatsv1.write_physios; + nfsstatsov1.biocache_readlinks = + nfsstatsv1.biocache_readlinks; + nfsstatsov1.readlink_bios = + nfsstatsv1.readlink_bios; + nfsstatsov1.biocache_readdirs = + nfsstatsv1.biocache_readdirs; + nfsstatsov1.readdir_bios = + nfsstatsv1.readdir_bios; + for (i = 0; i < NFSV42_NPROCS; i++) + nfsstatsov1.rpccnt[i] = + nfsstatsv1.rpccnt[i]; + nfsstatsov1.rpcretries = + nfsstatsv1.rpcretries; + for (i = 0; i < NFSV42_PURENOPS; i++) + nfsstatsov1.srvrpccnt[i] = + nfsstatsv1.srvrpccnt[i]; + for (i = NFSV42_NOPS, + j = NFSV42_PURENOPS; + i < NFSV42_NOPS + NFSV4OP_FAKENOPS; + i++, j++) + nfsstatsov1.srvrpccnt[j] = + nfsstatsv1.srvrpccnt[i]; + nfsstatsov1.srvrpc_errs = + nfsstatsv1.srvrpc_errs; + nfsstatsov1.srv_errs = + nfsstatsv1.srv_errs; + nfsstatsov1.rpcrequests = + nfsstatsv1.rpcrequests; + nfsstatsov1.rpctimeouts = + nfsstatsv1.rpctimeouts; + nfsstatsov1.rpcunexpected = + nfsstatsv1.rpcunexpected; + nfsstatsov1.rpcinvalid = + nfsstatsv1.rpcinvalid; + nfsstatsov1.srvcache_inproghits = + nfsstatsv1.srvcache_inproghits; + nfsstatsov1.srvcache_idemdonehits = + nfsstatsv1.srvcache_idemdonehits; + nfsstatsov1.srvcache_nonidemdonehits = + nfsstatsv1.srvcache_nonidemdonehits; + nfsstatsov1.srvcache_misses = + nfsstatsv1.srvcache_misses; + nfsstatsov1.srvcache_tcppeak = + nfsstatsv1.srvcache_tcppeak; + nfsstatsov1.srvcache_size = + nfsstatsv1.srvcache_size; + nfsstatsov1.srvclients = + nfsstatsv1.srvclients; + nfsstatsov1.srvopenowners = + nfsstatsv1.srvopenowners; + nfsstatsov1.srvopens = + nfsstatsv1.srvopens; + nfsstatsov1.srvlockowners = + nfsstatsv1.srvlockowners; + nfsstatsov1.srvlocks = + nfsstatsv1.srvlocks; + nfsstatsov1.srvdelegates = + nfsstatsv1.srvdelegates; + for (i = 0; i < NFSV42_CBNOPS; i++) + nfsstatsov1.cbrpccnt[i] = + nfsstatsv1.cbrpccnt[i]; + nfsstatsov1.clopenowners = + nfsstatsv1.clopenowners; + nfsstatsov1.clopens = + nfsstatsv1.clopens; + nfsstatsov1.cllockowners = + nfsstatsv1.cllockowners; + nfsstatsov1.cllocks = + nfsstatsv1.cllocks; + nfsstatsov1.cldelegates = + nfsstatsv1.cldelegates; + nfsstatsov1.cllocalopenowners = + nfsstatsv1.cllocalopenowners; + nfsstatsov1.cllocalopens = + nfsstatsv1.cllocalopens; + nfsstatsov1.cllocallockowners = + nfsstatsv1.cllocallockowners; + nfsstatsov1.cllocallocks = + nfsstatsv1.cllocallocks; + nfsstatsov1.srvstartcnt = + nfsstatsv1.srvstartcnt; + nfsstatsov1.srvdonecnt = + nfsstatsv1.srvdonecnt; + for (i = NFSV42_NOPS, + j = NFSV42_PURENOPS; + i < NFSV42_NOPS + NFSV4OP_FAKENOPS; + i++, j++) { + nfsstatsov1.srvbytes[j] = + nfsstatsv1.srvbytes[i]; + nfsstatsov1.srvops[j] = + nfsstatsv1.srvops[i]; + nfsstatsov1.srvduration[j] = + nfsstatsv1.srvduration[i]; + } + nfsstatsov1.busyfrom = + nfsstatsv1.busyfrom; + nfsstatsov1.busyfrom = + nfsstatsv1.busyfrom; + error = copyout(&nfsstatsov1, uap->argp, + sizeof(nfsstatsov1)); + } else if (nfsstatver.vers != NFSSTATS_V1) + error = EPERM; + else + error = copyout(&nfsstatsv1, uap->argp, + sizeof(nfsstatsv1)); + } } if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 0f43cebc5049..9d6511dfcbfd 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -47,6 +47,8 @@ __FBSDID("$FreeBSD$"); #include +#include + #include /* @@ -91,6 +93,10 @@ int nfsrv_maxpnfsmirror = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsmirror, CTLFLAG_RD, &nfsrv_maxpnfsmirror, 0, "Mirror level for pNFS service"); +int nfs_maxcopyrange = 10 * 1024 * 1024; +SYSCTL_INT(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW, + &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable"); + /* * This array of structures indicates, for V4: * retfh - which of 3 types of calling args are used @@ -108,7 +114,7 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsmirror, CTLFLAG_RD, * non-idempotent Ops. * Define it here, since it is used by both the client and server. */ -struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = { +struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS] = { { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ @@ -168,6 +174,23 @@ struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = { { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Want Delegation */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy ClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Reclaim Complete */ + { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Allocate */ + { 2, 1, 1, 0, LK_SHARED, 1, 0 }, /* Copy */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Copy Notify */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Deallocate */ + { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* IO Advise */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Error */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Stats */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Offload Cancel */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Offload Status */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Read Plus */ + { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* Seek */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Write Same */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Clone */ + { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Getxattr */ + { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Setxattr */ + { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Listxattrs */ + { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Removexattr */ }; #endif /* !APPLEKEXT */ @@ -192,9 +215,10 @@ static struct nfsrv_lughash *nfsgroupnamehash; * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ -static int nfs_bigreply[NFSV41_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, +static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 1 }; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); @@ -211,7 +235,7 @@ static struct { int opcnt; const u_char *tag; int taglen; -} nfsv4_opmap[NFSV41_NPROCS] = { +} nfsv4_opmap[NFSV42_NPROCS] = { { 0, 1, "Null", 4 }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_SETATTR, 2, "Setattr", 7, }, @@ -268,15 +292,24 @@ static struct { { NFSV4OP_COMMIT, 1, "CommitDS", 8, }, { NFSV4OP_OPEN, 3, "OpenLayoutGet", 13, }, { NFSV4OP_OPEN, 8, "CreateLayGet", 12, }, + { NFSV4OP_IOADVISE, 1, "Advise", 6, }, + { NFSV4OP_ALLOCATE, 2, "Allocate", 8, }, + { NFSV4OP_SAVEFH, 5, "Copy", 4, }, + { NFSV4OP_SEEK, 2, "Seek", 4, }, + { NFSV4OP_SEEK, 1, "SeekDS", 6, }, + { NFSV4OP_GETXATTR, 2, "Getxattr", 8, }, + { NFSV4OP_SETXATTR, 2, "Setxattr", 8, }, + { NFSV4OP_REMOVEXATTR, 2, "Rmxattr", 7, }, + { NFSV4OP_LISTXATTRS, 2, "Listxattr", 9, }, }; /* * NFS RPCS that have large request message size. */ -static int nfs_bigrequest[NFSV41_NPROCS] = { +static int nfs_bigrequest[NFSV42_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }; /* @@ -301,13 +334,17 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (minorvers == NFSV41_MINORVERSION) nd->nd_flag |= ND_NFSV41; + else if (minorvers == NFSV42_MINORVERSION) + nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); } else if (vers == NFS_VER3) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else { if (NFSHASNFSV4(nmp)) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; - if (NFSHASNFSV4N(nmp)) + if (nmp->nm_minorvers == 1) nd->nd_flag |= ND_NFSV41; + else if (nmp->nm_minorvers == 2) + nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); } else if (NFSHASNFSV3(nmp)) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else @@ -356,7 +393,9 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, (void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag, nfsv4_opmap[procnum].taglen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); - if ((nd->nd_flag & ND_NFSV41) != 0) + if ((nd->nd_flag & ND_NFSV42) != 0) + *tl++ = txdr_unsigned(NFSV42_MINORVERSION); + else if ((nd->nd_flag & ND_NFSV41) != 0) *tl++ = txdr_unsigned(NFSV41_MINORVERSION); else *tl++ = txdr_unsigned(NFSV4_MINORVERSION); @@ -409,7 +448,7 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } - if (procnum < NFSV41_NPROCS) + if (procnum < NFSV42_NPROCS) NFSINCRGLOBAL(nfsstatsv1.rpccnt[procnum]); } @@ -2449,6 +2488,8 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, struct nfsfsinfo fsinf; struct timespec temptime; NFSACL_T *aclp, *naclp = NULL; + size_t atsiz; + bool xattrsupp; #ifdef QUOTA struct dqblk dqb; uid_t savuid; @@ -2523,6 +2564,18 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, } } + /* Check to see if Extended Attributes are supported. */ + xattrsupp = false; + if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_XATTRSUPPORT)) { + if (NFSVOPLOCK(vp, LK_SHARED) == 0) { + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, + "xxx", NULL, &atsiz, cred, p); + NFSVOPUNLOCK(vp, 0); + if (error != EOPNOTSUPP) + xattrsupp = true; + } + } + /* * Put out the attribute bitmap for the ones being filled in * and get the field for the number of attributes returned. @@ -2972,6 +3025,14 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, *tl = txdr_unsigned(NFS_SRVMAXIO); retnum += NFSX_UNSIGNED; break; + case NFSATTRBIT_XATTRSUPPORT: + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + if (xattrsupp) + *tl = newnfs_true; + else + *tl = newnfs_false; + retnum += NFSX_UNSIGNED; + break; default: printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos); } @@ -4629,6 +4690,8 @@ nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, error = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); + nd->nd_maxreq = sep->nfsess_maxreq; + nd->nd_maxresp = sep->nfsess_maxresp; /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index ea2af575cfb8..829448d88233 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -128,7 +128,8 @@ void nfsrv_setupstable(NFSPROC_T *); void nfsrv_updatestable(NFSPROC_T *); void nfsrv_writestable(u_char *, int, int, NFSPROC_T *); void nfsrv_throwawayopens(NFSPROC_T *); -int nfsrv_checkremove(vnode_t, int, NFSPROC_T *); +int nfsrv_checkremove(vnode_t, int, struct nfsrv_descript *, nfsquad_t, + NFSPROC_T *); void nfsd_recalldelegation(vnode_t, NFSPROC_T *); void nfsd_disabledelegation(vnode_t, NFSPROC_T *); int nfsrv_checksetattr(vnode_t, struct nfsrv_descript *, @@ -161,6 +162,7 @@ void nfsrv_freealllayoutsanddevids(void); void nfsrv_freefilelayouts(fhandle_t *); int nfsrv_deldsserver(int, char *, NFSPROC_T *); struct nfsdevice *nfsrv_deldsnmp(int, struct nfsmount *, NFSPROC_T *); +int nfsrv_delds(char *, NFSPROC_T *); int nfsrv_createdevids(struct nfsd_nfsd_args *, NFSPROC_T *); int nfsrv_checkdsattr(vnode_t, NFSPROC_T *); int nfsrv_copymr(vnode_t, vnode_t, vnode_t, struct nfsdevice *, @@ -268,8 +270,28 @@ int nfsrvd_layoutcommit(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); int nfsrvd_layoutreturn(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); +int nfsrvd_ioadvise(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_layouterror(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_layoutstats(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); int nfsrvd_teststateid(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); +int nfsrvd_allocate(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_copy_file_range(struct nfsrv_descript *, int, + vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *); +int nfsrvd_seek(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_getxattr(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_setxattr(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_rmxattr(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); +int nfsrvd_listxattr(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); int nfsrvd_notsupp(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); @@ -501,10 +523,11 @@ int nfsrpc_delegreturn(struct nfscldeleg *, struct ucred *, int nfsrpc_getacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); int nfsrpc_setacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); int nfsrpc_exchangeid(struct nfsmount *, struct nfsclclient *, - struct nfssockreq *, uint32_t, struct nfsclds **, struct ucred *, + struct nfssockreq *, int, uint32_t, struct nfsclds **, struct ucred *, NFSPROC_T *); int nfsrpc_createsession(struct nfsmount *, struct nfsclsession *, - struct nfssockreq *, uint32_t, int, struct ucred *, NFSPROC_T *); + struct nfssockreq *, struct nfsclds *, uint32_t, int, struct ucred *, + NFSPROC_T *); int nfsrpc_destroysession(struct nfsmount *, struct nfsclclient *, struct ucred *, NFSPROC_T *); int nfsrpc_destroyclient(struct nfsmount *, struct nfsclclient *, @@ -518,11 +541,27 @@ int nfsrpc_layoutreturn(struct nfsmount *, uint8_t *, int, int, int, uint32_t, int, uint64_t, uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, uint32_t, uint32_t, char *); int nfsrpc_reclaimcomplete(struct nfsmount *, struct ucred *, NFSPROC_T *); +int nfsrpc_advise(vnode_t, off_t, uint64_t, int, struct ucred *, NFSPROC_T *); int nfscl_doiods(vnode_t, struct uio *, int *, int *, uint32_t, int, struct ucred *, NFSPROC_T *); int nfscl_findlayoutforio(struct nfscllayout *, uint64_t, uint32_t, struct nfsclflayout **); void nfscl_freenfsclds(struct nfsclds *); +int nfsrpc_allocate(vnode_t, off_t, off_t, struct nfsvattr *, int *, + struct ucred *, NFSPROC_T *, void *); +int nfsrpc_copy_file_range(vnode_t, off_t *, vnode_t, off_t *, size_t *, + unsigned int, int *, struct nfsvattr *, int *, struct nfsvattr *, + struct ucred *, bool, bool *); +int nfsrpc_seek(vnode_t, off_t *, bool *, int, struct ucred *, + struct nfsvattr *, int *); +int nfsrpc_getextattr(vnode_t, const char *, struct uio *, ssize_t *, + struct nfsvattr *, int *, struct ucred *, NFSPROC_T *); +int nfsrpc_setextattr(vnode_t, const char *, struct uio *, struct nfsvattr *, + int *, struct ucred *, NFSPROC_T *); +int nfsrpc_listextattr(vnode_t, uint64_t *, struct uio *, size_t *, bool *, + struct nfsvattr *, int *, struct ucred *, NFSPROC_T *); +int nfsrpc_rmextattr(vnode_t, const char *, struct nfsvattr *, int *, + struct ucred *, NFSPROC_T *); /* nfs_clstate.c */ int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int, @@ -644,8 +683,8 @@ int nfsvno_readlink(vnode_t, struct ucred *, NFSPROC_T *, mbuf_t *, mbuf_t *, int *); int nfsvno_read(vnode_t, off_t, int, struct ucred *, NFSPROC_T *, mbuf_t *, mbuf_t *); -int nfsvno_write(vnode_t, off_t, int, int, int *, mbuf_t, - char *, struct ucred *, NFSPROC_T *); +int nfsvno_write(vnode_t, off_t, int, int *, mbuf_t, char *, struct ucred *, + NFSPROC_T *); int nfsvno_createsub(struct nfsrv_descript *, struct nameidata *, vnode_t *, struct nfsvattr *, int *, int32_t *, NFSDEV_T, struct nfsexstuff *); @@ -704,6 +743,17 @@ int nfsrv_dscreate(struct vnode *, struct vattr *, struct vattr *, int nfsrv_updatemdsattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); void nfsrv_killrpcs(struct nfsmount *); int nfsrv_setacl(struct vnode *, NFSACL_T *, struct ucred *, NFSPROC_T *); +int nfsvno_seek(struct nfsrv_descript *, struct vnode *, u_long, off_t *, int, + bool *, struct ucred *, NFSPROC_T *); +int nfsvno_allocate(struct vnode *, off_t, off_t, struct ucred *, NFSPROC_T *); +int nfsvno_getxattr(struct vnode *, char *, uint32_t, struct ucred *, + struct thread *, struct mbuf **, struct mbuf **, int *); +int nfsvno_setxattr(struct vnode *, char *, int, struct mbuf *, char *, + struct ucred *, struct thread *); +int nfsvno_rmxattr(struct nfsrv_descript *, struct vnode *, char *, + struct ucred *, struct thread *); +int nfsvno_listxattr(struct vnode *, uint64_t, struct ucred *, struct thread *, + u_char **, uint32_t *, bool *); /* nfs_commonkrpc.c */ int newnfs_nmcancelreqs(struct nfsmount *); diff --git a/sys/fs/nfs/nfsclstate.h b/sys/fs/nfs/nfsclstate.h index 2ada4bfc5540..e17be74c5581 100644 --- a/sys/fs/nfs/nfsclstate.h +++ b/sys/fs/nfs/nfsclstate.h @@ -64,6 +64,8 @@ struct nfsclsession { uint64_t nfsess_slots; uint32_t nfsess_sequenceid; uint32_t nfsess_maxcache; /* Max size for cached reply. */ + uint32_t nfsess_maxreq; /* Max request size. */ + uint32_t nfsess_maxresp; /* Max reply size. */ uint16_t nfsess_foreslots; uint16_t nfsess_backslots; uint8_t nfsess_sessionid[NFSX_V4SESSIONID]; @@ -72,7 +74,7 @@ struct nfsclsession { /* * This structure holds the session, clientid and related information - * needed for an NFSv4.1 Meta Data Server (MDS) or Data Server (DS). + * needed for an NFSv4.1 or NFSv4.2 Meta Data Server (MDS) or Data Server (DS). * It is malloc'd to the correct length. */ struct nfsclds { @@ -95,6 +97,7 @@ struct nfsclds { #define NFSCLDS_DS 0x0004 #define NFSCLDS_CLOSED 0x0008 #define NFSCLDS_SAMECONN 0x0010 +#define NFSCLDS_MINORV2 0x0020 struct nfsclclient { LIST_ENTRY(nfsclclient) nfsc_list; diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index 3b1309e57220..5cd4ca023911 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -289,7 +289,7 @@ /* * Must be one more than the last NFSv4.2 op#. */ -#define NFSV42_NOPS 72 +#define NFSV42_NOPS 76 /* Quirky case if the illegal op code */ #define NFSV4OP_OPILLEGAL 10044 @@ -423,10 +423,10 @@ #endif /* NFS_V3NPROCS */ /* - * New stats structure. + * Newest stats structure. * The vers field will be set to NFSSTATS_V1 by the caller. */ -#define NFSSTATS_V1 1 +#define NFSSTATS_V1 2 struct nfsstatsv1 { int vers; /* Set to version requested by caller. */ uint64_t attrcache_hits; @@ -447,9 +447,74 @@ struct nfsstatsv1 { uint64_t readlink_bios; uint64_t biocache_readdirs; uint64_t readdir_bios; - uint64_t rpccnt[NFSV41_NPROCS + 13]; + uint64_t rpccnt[NFSV42_NPROCS + 15]; uint64_t rpcretries; - uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + uint64_t srvrpc_errs; + uint64_t srv_errs; + uint64_t rpcrequests; + uint64_t rpctimeouts; + uint64_t rpcunexpected; + uint64_t rpcinvalid; + uint64_t srvcache_inproghits; + uint64_t srvcache_idemdonehits; + uint64_t srvcache_nonidemdonehits; + uint64_t srvcache_misses; + uint64_t srvcache_tcppeak; + int srvcache_size; /* Updated by atomic_xx_int(). */ + uint64_t srvclients; + uint64_t srvopenowners; + uint64_t srvopens; + uint64_t srvlockowners; + uint64_t srvlocks; + uint64_t srvdelegates; + uint64_t cbrpccnt[NFSV42_CBNOPS + 10]; + uint64_t clopenowners; + uint64_t clopens; + uint64_t cllockowners; + uint64_t cllocks; + uint64_t cldelegates; + uint64_t cllocalopenowners; + uint64_t cllocalopens; + uint64_t cllocallockowners; + uint64_t cllocallocks; + uint64_t srvstartcnt; + uint64_t srvdonecnt; + uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + struct bintime busyfrom; + struct bintime busytime; +}; + +/* + * Newer stats structure. + * The vers field will be set to NFSSTATS_OV1 by the caller. + */ +#define NFSSTATS_OV1 1 +struct nfsstatsov1 { + int vers; /* Set to version requested by caller. */ + uint64_t attrcache_hits; + uint64_t attrcache_misses; + uint64_t lookupcache_hits; + uint64_t lookupcache_misses; + uint64_t direofcache_hits; + uint64_t direofcache_misses; + uint64_t accesscache_hits; + uint64_t accesscache_misses; + uint64_t biocache_reads; + uint64_t read_bios; + uint64_t read_physios; + uint64_t biocache_writes; + uint64_t write_bios; + uint64_t write_physios; + uint64_t biocache_readlinks; + uint64_t readlink_bios; + uint64_t biocache_readdirs; + uint64_t readdir_bios; + uint64_t rpccnt[NFSV42_NPROCS + 4]; + uint64_t rpcretries; + uint64_t srvrpccnt[NFSV42_PURENOPS + NFSV4OP_FAKENOPS]; uint64_t srvrpc_errs; uint64_t srv_errs; uint64_t rpcrequests; @@ -480,9 +545,9 @@ struct nfsstatsv1 { uint64_t cllocallocks; uint64_t srvstartcnt; uint64_t srvdonecnt; - uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS]; - uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS]; - struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + uint64_t srvbytes[NFSV42_PURENOPS + NFSV4OP_FAKENOPS]; + uint64_t srvops[NFSV42_PURENOPS + NFSV4OP_FAKENOPS]; + struct bintime srvduration[NFSV42_PURENOPS + NFSV4OP_FAKENOPS]; struct bintime busyfrom; struct bintime busytime; }; diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index 2b26b394f9c9..1578eece7207 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -1070,7 +1070,7 @@ struct nfsv3_sattr { /* Not sure what attribute bit#81/0x00020000 is? */ #define NFSATTRBM_XATTRSUPPORT 0x00040000 -#define NFSATTRBIT_MAX 77 +#define NFSATTRBIT_MAX 83 /* * Sets of attributes that are supported, by words in the bitmap. diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 60a55a4d14aa..f0237649d042 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include +#include +#include #include #include @@ -72,6 +74,8 @@ extern int nfsrv_useacl; extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; extern int nfscl_debuglevel; extern int nfs_pnfsiothreads; +extern u_long sb_max_adj; +extern int nfs_maxcopyrange; NFSCLSTATEMUTEX; int nfstest_outofseq = 0; int nfscl_assumeposixlocks = 1; @@ -110,6 +114,9 @@ struct nfsclwritedsdorpc { struct nfsclds *dsp; uint64_t off; int len; +#ifdef notyet + int advise; +#endif struct nfsfh *fhp; struct mbuf *m; int vers; @@ -142,7 +149,8 @@ static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **, struct ucred *, NFSPROC_T *); static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *, - struct sockaddr_in6 *, sa_family_t, int, struct nfsclds **, NFSPROC_T *); + struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **, + NFSPROC_T *); static void nfscl_initsessionslots(struct nfsclsession *); static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *, nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, @@ -172,12 +180,21 @@ static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *, NFSPROC_T *); static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *, struct nfsfh *, int, int, struct ucred *, NFSPROC_T *); +#ifdef notyet +static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *, + struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *, + NFSPROC_T *); +static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *, + struct nfsfh *, int, int, struct ucred *, NFSPROC_T *); +#endif +static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *, + struct nfsvattr *, int *, struct ucred *, NFSPROC_T *, void *); static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, int, int); static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *, NFSPROC_T *); -static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *, - int *, struct nfsclflayouthead *); +static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *, + nfsv4stateid_t *, int *, struct nfsclflayouthead *); static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *, int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int, struct nfscldeleg **, struct ucred *, NFSPROC_T *); @@ -200,6 +217,11 @@ static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t, static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *, int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **, struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *); +static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *, + nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *, + struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *); +static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *, + int, struct nfsvattr *, int *, struct ucred *); int nfs_pnfsio(task_fn_t *, void *); @@ -935,12 +957,12 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, * previous session has failed, so... * do an ExchangeID followed by the CreateSession. */ - error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, + error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0, NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p); NFSCL_DEBUG(1, "aft exch=%d\n", error); if (error == 0) error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, - &nmp->nm_sockreq, + &nmp->nm_sockreq, NULL, dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p); if (error == 0) { NFSLOCKMNT(nmp); @@ -4647,8 +4669,8 @@ nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, */ int nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, - struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp, - struct ucred *cred, NFSPROC_T *p) + struct nfssockreq *nrp, int minorvers, uint32_t exchflags, + struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl, v41flags; struct nfsrv_descript nfsd; @@ -4658,7 +4680,10 @@ nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, int error, len; *dspp = NULL; - nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, 0, 0); + if (minorvers == 0) + minorvers = nmp->nm_minorvers; + nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, + NFS_VER4, minorvers); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* Client owner */ *tl = txdr_unsigned(clp->nfsc_rev); @@ -4709,6 +4734,8 @@ nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, } if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0) dsp->nfsclds_flags |= NFSCLDS_DS; + if (minorvers == NFSV42_MINORVERSION) + dsp->nfsclds_flags |= NFSCLDS_MINORV2; if (len > 0) nd->nd_repstat = nfsrv_mtostr(nd, dsp->nfsclds_serverown, len); @@ -4732,21 +4759,27 @@ nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, */ int nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, - struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, - NFSPROC_T *p) + struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds, + struct ucred *cred, NFSPROC_T *p) { uint32_t crflags, maxval, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; - int error, irdcnt; + int error, irdcnt, minorvers; /* Make sure nm_rsize, nm_wsize is set. */ if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0) nmp->nm_wsize = NFS_MAXBSIZE; - nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, 0, - 0); + if (dsp == NULL) + minorvers = nmp->nm_minorvers; + else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0) + minorvers = NFSV42_MINORVERSION; + else + minorvers = NFSV41_MINORVERSION; + nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, + NFS_VER4, minorvers); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); *tl++ = sep->nfsess_clientid.lval[0]; *tl++ = sep->nfsess_clientid.lval[1]; @@ -4759,8 +4792,18 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, /* Fill in fore channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ - *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ - *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ + if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >= + nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) { + /* + * NFSv4.2 Extended Attribute operations may want to do + * requests/replies that are larger than nm_rsize/nm_wsize. + */ + *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR); + *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR); + } else { + *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR); + *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR); + } *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ *tl++ = txdr_unsigned(64); /* Max slots */ @@ -4817,6 +4860,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, else break; } + sep->nfsess_maxreq = maxval; /* Make sure nm_rsize is small enough. */ maxval = fxdr_unsigned(uint32_t, *tl++); @@ -4826,6 +4870,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, else break; } + sep->nfsess_maxresp = maxval; sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; @@ -4928,7 +4973,8 @@ nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode, if (error != 0) return (error); if (nd->nd_repstat == 0) - error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); + error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep, + flhp); if (error == 0 && nd->nd_repstat != 0) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); @@ -4950,7 +4996,8 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, struct sockaddr_in6 sin6, ssin6; struct nfsclds *dsp = NULL, **dspp, **gotdspp; struct nfscldevinfo *ndi; - int addrcnt = 0, bitcnt, error, gotvers, i, isudp, j, stripecnt; + int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j; + int stripecnt; uint8_t stripeindex; sa_family_t af, safilled; @@ -5082,7 +5129,8 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, } } - gotvers = NFS_VER4; /* Always NFSv4 for File Layout. */ + gotvers = NFS_VER4; /* Default NFSv4.1 for File Layout. */ + gotminor = NFSV41_MINORVERSION; /* For Flex File, we will take one of the versions to use. */ if (layouttype == NFSLAYOUT_FLEXFILE) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); @@ -5093,14 +5141,19 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, goto nfsmout; } gotvers = 0; + gotminor = 0; for (i = 0; i < j; i++) { NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED); vers = fxdr_unsigned(uint32_t, *tl++); minorvers = fxdr_unsigned(uint32_t, *tl++); - if ((vers == NFS_VER4 && minorvers == - NFSV41_MINORVERSION) || (vers == NFS_VER3 && - gotvers == 0)) { + if (vers == NFS_VER3) + minorvers = 0; + if ((vers == NFS_VER4 && ((minorvers == + NFSV41_MINORVERSION && gotminor == 0) || + minorvers == NFSV42_MINORVERSION)) || + (vers == NFS_VER3 && gotvers == 0)) { gotvers = vers; + gotminor = minorvers; /* We'll take this one. */ ndi->nfsdi_versindex = i; ndi->nfsdi_vers = vers; @@ -5118,7 +5171,7 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, } } if (gotvers == 0) { - printf("pNFS: no NFSv3 or NFSv4.1\n"); + printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n"); error = NFSERR_BADXDR; goto nfsmout; } @@ -5144,7 +5197,7 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, * NFS version and IP address. */ error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled, - gotvers, &dsp, p); + gotvers, gotminor, &dsp, p); } if (error == 0) { KASSERT(gotdspp != NULL, ("gotdspp is NULL")); @@ -5373,15 +5426,15 @@ nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp, */ static int nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin, - struct sockaddr_in6 *sin6, sa_family_t af, int vers, struct nfsclds **dspp, - NFSPROC_T *p) + struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers, + struct nfsclds **dspp, NFSPROC_T *p) { struct sockaddr_in *msad, *sad; struct sockaddr_in6 *msad6, *sad6; struct nfsclclient *clp; struct nfssockreq *nrp; struct nfsclds *dsp, *tdsp; - int error; + int error, firsttry; enum nfsclds_state retv; uint32_t sequenceid; @@ -5492,9 +5545,16 @@ nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin, /* Now, do the exchangeid and create session. */ if (error == 0) { if (vers == NFS_VER4) { - error = nfsrpc_exchangeid(nmp, clp, nrp, - NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p); - NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); + firsttry = 0; + do { + error = nfsrpc_exchangeid(nmp, clp, nrp, + minorvers, NFSV4EXCH_USEPNFSDS, &dsp, + nrp->nr_cred, p); + NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); + if (error == NFSERR_MINORVERMISMATCH) + minorvers = NFSV42_MINORVERSION; + } while (error == NFSERR_MINORVERMISMATCH && + firsttry++ == 0); if (error != 0) newnfs_disconnect(nrp); } else { @@ -5534,7 +5594,7 @@ nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin, dsp->nfsclds_sess.nfsess_sequenceid; NFSUNLOCKMNT(nmp); error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, - nrp, sequenceid, 0, nrp->nr_cred, p); + nrp, dsp, sequenceid, 0, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS createsess=%d\n", error); } } else { @@ -5896,7 +5956,7 @@ nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess, } /* - * Do I/O using an NFSv4.1 file layout. + * Do I/O using an NFSv4.1 or NFSv4.2 file layout. */ static int nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, @@ -5905,7 +5965,7 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p) { uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer; - int commit_thru_mds, error, stripe_index, stripe_pos; + int commit_thru_mds, error, stripe_index, stripe_pos, minorvers; struct nfsnode *np; struct nfsfh *fhp; struct nfsclds **dspp; @@ -5922,6 +5982,10 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, while (len > 0 && error == 0) { stripe_index = nfsfldi_stripeindex(dp, stripe_pos); dspp = nfsfldi_addr(dp, stripe_index); + if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0) + minorvers = NFSV42_MINORVERSION; + else + minorvers = NFSV41_MINORVERSION; if (len > transfer && docommit == 0) xfer = transfer; else @@ -5959,7 +6023,7 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, if (docommit != 0) { if (error == 0) error = nfsrpc_commitds(vp, io_off, xfer, - *dspp, fhp, 0, 0, cred, p); + *dspp, fhp, NFS_VER4, minorvers, cred, p); if (error == 0) { /* * Set both eof and uio_resid = 0 to end any @@ -5974,11 +6038,11 @@ nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, } } else if (rwflag == NFSV4OPEN_ACCESSREAD) error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, - io_off, xfer, fhp, 0, 0, 0, cred, p); + io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p); else { error = nfsrpc_writeds(vp, uiop, iomode, must_commit, stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds, - 0, 0, 0, cred, p); + 0, NFS_VER4, minorvers, cred, p); if (error == 0) { NFSLOCKCLSTATE(); lyp->nfsly_flags |= NFSLY_WRITTEN; @@ -6686,6 +6750,259 @@ nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, return (error); } +/* + * NFS Advise rpc + */ +APPLESTATIC int +nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise, + struct ucred *cred, NFSPROC_T *p) +{ + u_int32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + nfsattrbit_t hints; + int error; + + NFSZERO_ATTRBIT(&hints); + if (advise == POSIX_FADV_WILLNEED) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED); + else if (advise == POSIX_FADV_DONTNEED) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED); + else + return (0); + NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp); + nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER); + txdr_hyper(offset, tl); + tl += 2; + txdr_hyper(cnt, tl); + nfsrv_putattrbit(nd, &hints); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +#ifdef notyet +/* + * NFS advise rpc to a NFSv4.2 DS. + */ +static int +nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise, + struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfssockreq *nrp; + nfsattrbit_t hints; + int error; + + /* For NFS DSs prior to NFSv4.2, just return OK. */ + if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION) + return (0); + NFSZERO_ATTRBIT(&hints); + if (advise == POSIX_FADV_WILLNEED) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED); + else if (advise == POSIX_FADV_DONTNEED) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED); + else + return (0); + nd->nd_mrep = NULL; + nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh, + fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); + vers = NFS_VER4; + NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers, + minorvers); + nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(offset, tl); + tl += 2; + *tl = txdr_unsigned(cnt); + nfsrv_putattrbit(nd, &hints); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); + NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error, + nd->nd_repstat); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Start up the thread that will execute nfsrpc_commitds(). + */ +static void +start_adviseds(void *arg, int pending) +{ + struct nfsclwritedsdorpc *drpc; + + drpc = (struct nfsclwritedsdorpc *)arg; + drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len, + drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, + drpc->cred, drpc->p); + drpc->done = 1; + NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err); +} + +/* + * Set up the commit DS mirror call for the pNFS I/O thread. + */ +static int +nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise, + struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers, + struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) +{ + int error, ret; + + error = 0; + drpc->done = 0; + drpc->vp = vp; + drpc->off = offset; + drpc->len = cnt; + drpc->advise = advise; + drpc->dsp = dsp; + drpc->fhp = fhp; + drpc->vers = vers; + drpc->minorvers = minorvers; + drpc->cred = cred; + drpc->p = p; + drpc->inprog = 0; + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_adviseds, drpc); + NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret); + } + if (ret != 0) + error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers, + minorvers, cred, p); + NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error); + return (error); +} +#endif /* notyet */ + +/* + * Do the Allocate operation, retrying for recovery. + */ +APPLESTATIC int +nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap, + int *attrflagp, struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + int error, expireret = 0, retrycnt, nostateid; + uint32_t clidrev = 0; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfsfh *nfhp = NULL; + nfsv4stateid_t stateid; + off_t tmp_off; + void *lckp; + + if (len < 0) + return (EINVAL); + if (len == 0) + return (0); + tmp_off = off + len; + NFSLOCKMNT(nmp); + if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) { + NFSUNLOCKMNT(nmp); + return (EFBIG); + } + if (nmp->nm_clp != NULL) + clidrev = nmp->nm_clp->nfsc_clientidrev; + NFSUNLOCKMNT(nmp); + nfhp = VTONFS(vp)->n_fhp; + retrycnt = 0; + do { + lckp = NULL; + nostateid = 0; + nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, + NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp); + if (stateid.other[0] == 0 && stateid.other[1] == 0 && + stateid.other[2] == 0) { + nostateid = 1; + NFSCL_DEBUG(1, "stateid0 in allocate\n"); + } + + /* + * Not finding a stateid should probably never happen, + * but just return an error for this case. + */ + if (nostateid != 0) + error = EIO; + else + error = nfsrpc_allocaterpc(vp, off, len, &stateid, + nap, attrflagp, cred, p, stuff); + if (error == NFSERR_STALESTATEID) + nfscl_initiate_recovery(nmp->nm_clp); + if (lckp != NULL) + nfscl_lockderef(lckp); + if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { + (void) nfs_catnap(PZERO, error, "nfs_allocate"); + } else if ((error == NFSERR_EXPIRED || + error == NFSERR_BADSTATEID) && clidrev != 0) { + expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); + } + retrycnt++; + } while (error == NFSERR_GRACE || error == NFSERR_DELAY || + error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || + error == NFSERR_STALEDONTRECOVER || + (error == NFSERR_OLDSTATEID && retrycnt < 20) || + ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && + expireret == 0 && clidrev != 0 && retrycnt < 4)); + if (error != 0 && retrycnt >= 4) + error = EIO; + return (error); +} + +/* + * The allocate RPC. + */ +static int +nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp, + struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p, + void *stuff) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(off, tl); tl += 2; + txdr_hyper(len, tl); tl += 2; + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, stuff); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = NFS_LATTR_NOSHRINK; + } else + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + /* * Set up the XDR arguments for the LayoutGet operation. */ @@ -6727,8 +7044,8 @@ nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset, * Parse the reply for a successful LayoutGet operation. */ static int -nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, - int *retonclosep, struct nfsclflayouthead *flhp) +nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd, + nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp) { uint32_t *tl; struct nfsclflayout *flp, *prevflp, *tflp; @@ -6808,6 +7125,11 @@ nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util); + mtx_lock(&nmp->nm_mtx); + if (nmp->nm_minorvers > 1 && (flp->nfsfl_util & + NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0) + nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS; + mtx_unlock(&nmp->nm_mtx); flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++); flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2; NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n", @@ -6956,6 +7278,18 @@ nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++); +#ifdef notnow + /* + * At this time, there is no flag. + * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be + * added, or it may never exist? + */ + mtx_lock(&nmp->nm_mtx); + if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags & + NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0) + nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS; + mtx_unlock(&nmp->nm_mtx); +#endif flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl); NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n", flp->nfsfl_fflags, flp->nfsfl_statshint); @@ -7267,7 +7601,7 @@ nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *++tl); if (*laystatp == 0) { - error = nfsrv_parselayoutget(nd, + error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; @@ -7516,7 +7850,7 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap, NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *(tl + 3)); if (*laystatp == 0) { - error = nfsrv_parselayoutget(nd, + error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; @@ -7664,3 +7998,525 @@ nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp, return (laystat); } +/* + * nfs copy_file_range operation. + */ +APPLESTATIC int +nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp, + off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp, + struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap, + struct ucred *cred, bool consecutive, bool *must_commitp) +{ + int commit, error, expireret = 0, retrycnt; + u_int32_t clidrev = 0; + struct nfsmount *nmp = VFSTONFS(vnode_mount(invp)); + struct nfsfh *innfhp = NULL, *outnfhp = NULL; + nfsv4stateid_t instateid, outstateid; + void *inlckp, *outlckp; + + if (nmp->nm_clp != NULL) + clidrev = nmp->nm_clp->nfsc_clientidrev; + innfhp = VTONFS(invp)->n_fhp; + outnfhp = VTONFS(outvp)->n_fhp; + retrycnt = 0; + do { + /* Get both stateids. */ + inlckp = NULL; + nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len, + NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid, + &inlckp); + outlckp = NULL; + nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len, + NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid, + &outlckp); + + error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp, + &instateid, &outstateid, innap, inattrflagp, outnap, + outattrflagp, consecutive, &commit, cred, curthread); + if (error == 0) { + if (commit != NFSWRITE_FILESYNC) + *must_commitp = true; + *inoffp += *lenp; + *outoffp += *lenp; + } else if (error == NFSERR_STALESTATEID) + nfscl_initiate_recovery(nmp->nm_clp); + if (inlckp != NULL) + nfscl_lockderef(inlckp); + if (outlckp != NULL) + nfscl_lockderef(outlckp); + if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { + (void) nfs_catnap(PZERO, error, "nfs_cfr"); + } else if ((error == NFSERR_EXPIRED || + error == NFSERR_BADSTATEID) && clidrev != 0) { + expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, + curthread); + } + retrycnt++; + } while (error == NFSERR_GRACE || error == NFSERR_DELAY || + error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || + error == NFSERR_STALEDONTRECOVER || + (error == NFSERR_OLDSTATEID && retrycnt < 20) || + ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && + expireret == 0 && clidrev != 0 && retrycnt < 4)); + if (error != 0 && (retrycnt >= 4 || + error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || + error == NFSERR_STALEDONTRECOVER)) + error = EIO; + return (error); +} + +/* + * The copy RPC. + */ +static int +nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff, + size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp, + struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap, + int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred, + NFSPROC_T *p) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct nfsmount *nmp; + nfsattrbit_t attrbits; + uint64_t len; + + nmp = VFSTONFS(outvp->v_mount); + *inattrflagp = *outattrflagp = 0; + *commitp = NFSWRITE_UNSTABLE; + len = *lenp; + *lenp = 0; + if (len > nfs_maxcopyrange) + len = nfs_maxcopyrange; + NFSCL_REQSTART(nd, NFSPROC_COPY, invp); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_PUTFH); + nfsm_fhtom(nd, VTONFS(outvp)->n_fhp->nfh_fh, + VTONFS(outvp)->n_fhp->nfh_len, 0); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_COPY); + nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID); + nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED); + txdr_hyper(inoff, tl); tl += 2; + txdr_hyper(outoff, tl); tl += 2; + txdr_hyper(len, tl); tl += 2; + if (consecutive) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + *tl++ = newnfs_true; + *tl++ = 0; + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSWRITEGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, invp, p, cred, NULL); + if (error != 0) + return (error); + if ((nd->nd_flag & ND_NOMOREDATA) == 0) { + /* Get the input file's attributes. */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (*(tl + 1) == 0) { + error = nfsm_loadattr(nd, innap); + if (error != 0) + goto nfsmout; + *inattrflagp = 1; + } else + nd->nd_flag |= ND_NOMOREDATA; + } + /* Skip over return stat for PutFH. */ + if ((nd->nd_flag & ND_NOMOREDATA) == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (*++tl != 0) + nd->nd_flag |= ND_NOMOREDATA; + } + /* Skip over return stat for Copy. */ + if ((nd->nd_flag & ND_NOMOREDATA) == 0) + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (*tl != 0) { + /* There should be no callback ids. */ + error = NFSERR_BADXDR; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED + + NFSX_VERF); + len = fxdr_hyper(tl); tl += 2; + *commitp = fxdr_unsigned(int, *tl++); + NFSLOCKMNT(nmp); + if (!NFSHASWRITEVERF(nmp)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + NFSSETWRITEVERF(nmp); + } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + nd->nd_repstat = NFSERR_STALEWRITEVERF; + } + NFSUNLOCKMNT(nmp); + tl += (NFSX_VERF / NFSX_UNSIGNED); + if (nd->nd_repstat == 0 && *++tl != newnfs_true) + /* Must be a synchronous copy. */ + nd->nd_repstat = NFSERR_NOTSUPP; + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsm_loadattr(nd, outnap); + if (error == 0) + *outattrflagp = NFS_LATTR_NOSHRINK; + if (nd->nd_repstat == 0) + *lenp = len; + } else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) { + /* + * For the case where consecutive is not supported, but + * synchronous is supported, we can try consecutive == false + * by returning this error. Otherwise, return NFSERR_NOTSUPP, + * since Copy cannot be done. + */ + if ((nd->nd_flag & ND_NOMOREDATA) == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (!consecutive || *++tl == newnfs_false) + nd->nd_repstat = NFSERR_NOTSUPP; + } else + nd->nd_repstat = NFSERR_BADXDR; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Seek operation. + */ +APPLESTATIC int +nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content, + struct ucred *cred, struct nfsvattr *nap, int *attrflagp) +{ + int error, expireret = 0, retrycnt; + u_int32_t clidrev = 0; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfsnode *np = VTONFS(vp); + struct nfsfh *nfhp = NULL; + nfsv4stateid_t stateid; + void *lckp; + + if (nmp->nm_clp != NULL) + clidrev = nmp->nm_clp->nfsc_clientidrev; + nfhp = np->n_fhp; + retrycnt = 0; + do { + lckp = NULL; + nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, + NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp); + error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content, + nap, attrflagp, cred); + if (error == NFSERR_STALESTATEID) + nfscl_initiate_recovery(nmp->nm_clp); + if (lckp != NULL) + nfscl_lockderef(lckp); + if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { + (void) nfs_catnap(PZERO, error, "nfs_seek"); + } else if ((error == NFSERR_EXPIRED || + error == NFSERR_BADSTATEID) && clidrev != 0) { + expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, + curthread); + } + retrycnt++; + } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || + (error == NFSERR_OLDSTATEID && retrycnt < 20) || + ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && + expireret == 0 && clidrev != 0 && retrycnt < 4) || + (error == NFSERR_OPENMODE && retrycnt < 4)); + if (error && retrycnt >= 4) + error = EIO; + return (error); +} + +/* + * The seek RPC. + */ +static int +nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp, + int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_SEEK, vp); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); + txdr_hyper(*offp, tl); tl += 2; + *tl++ = txdr_unsigned(content); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, curthread, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER); + if (*tl++ == newnfs_true) + *eofp = true; + else + *eofp = false; + *offp = fxdr_hyper(tl); + /* Just skip over Getattr op status. */ + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The getextattr RPC. + */ +APPLESTATIC int +nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp, + struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + uint32_t len, len2; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp); + nfsm_strtom(nd, name, strlen(name)); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(uint32_t, *tl); + /* Sanity check lengths. */ + if (uiop != NULL && len > 0 && len <= IOSIZE_MAX && + uiop->uio_resid <= UINT32_MAX) { + len2 = uiop->uio_resid; + if (len2 >= len) + error = nfsm_mbufuio(nd, uiop, len); + else { + error = nfsm_mbufuio(nd, uiop, len2); + if (error == 0) { + /* + * nfsm_mbufuio() advances to a multiple + * of 4, so round up len2 as well. Then + * we need to advance over the rest of + * the data, rounding up the remaining + * length. + */ + len2 = NFSM_RNDUP(len2); + len2 = NFSM_RNDUP(len - len2); + if (len2 > 0) + error = nfsm_advance(nd, len2, + -1); + } + } + } else if (uiop == NULL && len > 0) { + /* Just wants the length and not the data. */ + error = nfsm_advance(nd, NFSM_RNDUP(len), -1); + } else + error = ENOATTR; + if (error != 0) + goto nfsmout; + *lenp = len; + /* Just skip over Getattr op status. */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The setextattr RPC. + */ +APPLESTATIC int +nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop, + struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp); + if (uiop->uio_resid > nd->nd_maxreq) { + /* nd_maxreq is set by NFSCL_REQSTART(). */ + mbuf_freem(nd->nd_mreq); + return (EINVAL); + } + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4SXATTR_EITHER); + nfsm_strtom(nd, name, strlen(name)); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(uiop->uio_resid); + nfsm_uiombuf(nd, uiop, uiop->uio_resid); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + /* Just skip over the reply and Getattr op status. */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 * + NFSX_UNSIGNED); + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The removeextattr RPC. + */ +APPLESTATIC int +nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap, + int *attrflagp, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int error; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp); + nfsm_strtom(nd, name, strlen(name)); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + /* Just skip over the reply and Getattr op status. */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 2 * + NFSX_UNSIGNED); + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The listextattr RPC. + */ +APPLESTATIC int +nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop, + size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int cnt, error, i, len; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + nfsattrbit_t attrbits; + u_char c; + + *attrflagp = 0; + NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); + txdr_hyper(*cookiep, tl); tl += 2; + *tl++ = txdr_unsigned(*lenp); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = nfscl_request(nd, vp, p, cred, NULL); + if (error != 0) + return (error); + *eofp = true; + *lenp = 0; + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + *cookiep = fxdr_hyper(tl); tl += 2; + cnt = fxdr_unsigned(int, *tl); + if (cnt < 0) { + error = EBADRPC; + goto nfsmout; + } + for (i = 0; i < cnt; i++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(int, *tl); + if (len <= 0 || len > EXTATTR_MAXNAMELEN) { + error = EBADRPC; + goto nfsmout; + } + if (uiop == NULL) + error = nfsm_advance(nd, NFSM_RNDUP(len), -1); + else if (uiop->uio_resid >= len + 1) { + c = len; + error = uiomove(&c, sizeof(c), uiop); + if (error == 0) + error = nfsm_mbufuio(nd, uiop, len); + } else { + error = nfsm_advance(nd, NFSM_RNDUP(len), -1); + *eofp = false; + } + if (error != 0) + goto nfsmout; + *lenp += (len + 1); + } + /* Get the eof and skip over the Getattr op status. */ + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED); + /* + * *eofp is set false above, because it wasn't able to copy + * all of the reply. + */ + if (*eofp && *tl == 0) + *eofp = false; + error = nfsm_loadattr(nd, nap); + if (error == 0) + *attrflagp = 1; + } + if (error == 0) + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index 2fa35225bf05..445cd379b95d 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -3292,7 +3292,9 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); minorvers = fxdr_unsigned(u_int32_t, *tl++); - if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) + if (minorvers != NFSV4_MINORVERSION && + minorvers != NFSV41_MINORVERSION && + minorvers != NFSV42_MINORVERSION) nd->nd_repstat = NFSERR_MINORVERMISMATCH; cbident = fxdr_unsigned(u_int32_t, *tl++); if (nd->nd_repstat) @@ -3310,14 +3312,16 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) if (op < NFSV4OP_CBGETATTR || (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) || (op > NFSV4OP_CBNOTIFYDEVID && - minorvers == NFSV41_MINORVERSION)) { + minorvers == NFSV41_MINORVERSION) || + (op > NFSV4OP_CBOFFLOAD && + minorvers == NFSV42_MINORVERSION)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp = nfscl_errmap(nd, minorvers); retops++; break; } nd->nd_procnum = op; - if (op < NFSV41_CBNOPS) + if (op < NFSV42_CBNOPS) nfsstatsv1.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: @@ -3619,7 +3623,7 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) } break; default: - if (i == 0 && minorvers == NFSV41_MINORVERSION) + if (i == 0 && minorvers != NFSV4_MINORVERSION) error = NFSERR_OPNOTINSESS; else { NFSCL_DEBUG(1, "unsupp callback %d\n", op); diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index 133886ab213c..c4aece16ee3b 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -1151,7 +1151,7 @@ nfs_mount(struct mount *mp) if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &minvers); - if (ret != 1 || minvers < 0 || minvers > 1 || + if (ret != 1 || minvers < 0 || minvers > 2 || (args.flags & NFSMNT_NFSV4) == 0) { vfs_mount_error(mp, "illegal minorversion: %s", opt); error = EINVAL; diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 7d721ae8da3e..d95e4abcc631 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -51,6 +51,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include #include @@ -142,6 +144,14 @@ static vop_advlock_t nfs_advlock; static vop_advlockasync_t nfs_advlockasync; static vop_getacl_t nfs_getacl; static vop_setacl_t nfs_setacl; +static vop_advise_t nfs_advise; +static vop_allocate_t nfs_allocate; +static vop_copy_file_range_t nfs_copy_file_range; +static vop_ioctl_t nfs_ioctl; +static vop_getextattr_t nfs_getextattr; +static vop_setextattr_t nfs_setextattr; +static vop_listextattr_t nfs_listextattr; +static vop_deleteextattr_t nfs_deleteextattr; static vop_lock1_t nfs_lock; /* @@ -181,6 +191,14 @@ static struct vop_vector newnfs_vnodeops_nosig = { .vop_write = ncl_write, .vop_getacl = nfs_getacl, .vop_setacl = nfs_setacl, + .vop_advise = nfs_advise, + .vop_allocate = nfs_allocate, + .vop_copy_file_range = nfs_copy_file_range, + .vop_ioctl = nfs_ioctl, + .vop_getextattr = nfs_getextattr, + .vop_setextattr = nfs_setextattr, + .vop_listextattr = nfs_listextattr, + .vop_deleteextattr = nfs_deleteextattr, }; static int @@ -3504,6 +3522,618 @@ nfs_setacl(struct vop_setacl_args *ap) return (error); } +/* + * VOP_ADVISE for NFS. + * Just return 0 for any errors, since it is just a hint. + */ +static int +nfs_advise(struct vop_advise_args *ap) +{ + struct thread *td = curthread; + struct nfsmount *nmp; + uint64_t len; + int error; + + /* + * First do vop_stdadvise() to handle the buffer cache. + */ + error = vop_stdadvise(ap); + if (error != 0) + return (error); + if (ap->a_start < 0 || ap->a_end < 0) + return (0); + if (ap->a_end == OFF_MAX) + len = 0; + else if (ap->a_end < ap->a_start) + return (0); + else + len = ap->a_end - ap->a_start + 1; + nmp = VFSTONFS(ap->a_vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (NFSHASPNFS(nmp) && (nmp->nm_privflag & NFSMNTP_IOADVISETHRUMDS) == + 0) || (nmp->nm_privflag & NFSMNTP_NOADVISE) != 0) { + mtx_unlock(&nmp->nm_mtx); + return (0); + } + mtx_unlock(&nmp->nm_mtx); + error = nfsrpc_advise(ap->a_vp, ap->a_start, len, ap->a_advice, + td->td_ucred, td); + if (error == NFSERR_NOTSUPP) { + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOADVISE; + mtx_unlock(&nmp->nm_mtx); + } + return (0); +} + +/* + * nfs allocate call + */ +static int +nfs_allocate(struct vop_allocate_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct thread *td = curthread; + struct nfsvattr nfsva; + struct nfsmount *nmp; + int attrflag, error, ret; + + attrflag = 0; + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && + (nmp->nm_privflag & NFSMNTP_NOALLOCATE) == 0) { + mtx_unlock(&nmp->nm_mtx); + /* + * Flush first to ensure that the allocate adds to the + * file's allocation on the server. + */ + error = ncl_flush(vp, MNT_WAIT, td, 1, 0); + if (error == 0) + error = nfsrpc_allocate(vp, *ap->a_offset, *ap->a_len, + &nfsva, &attrflag, td->td_ucred, td, NULL); + if (error == 0) { + *ap->a_offset += *ap->a_len; + *ap->a_len = 0; + } else if (error == NFSERR_NOTSUPP) { + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOALLOCATE; + mtx_unlock(&nmp->nm_mtx); + } + } else { + mtx_unlock(&nmp->nm_mtx); + error = EIO; + } + /* + * If the NFS server cannot perform the Allocate operation, just call + * vop_stdallocate() to perform it. + */ + if (error != 0) + error = vop_stdallocate(ap); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + if (error != 0) + error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); + return (error); +} + +/* + * nfs copy_file_range call + */ +static int +nfs_copy_file_range(struct vop_copy_file_range_args *ap) +{ + struct vnode *invp = ap->a_invp; + struct vnode *outvp = ap->a_outvp; + struct mount *mp; + struct nfsvattr innfsva, outnfsva; + struct vattr *vap; + struct uio io; + struct nfsmount *nmp; + size_t len, len2, copiedlen; + int error, inattrflag, outattrflag, ret, ret2; + off_t inoff, outoff; + bool consecutive, must_commit, tryoutcred; + + nmp = VFSTONFS(invp->v_mount); + mtx_lock(&nmp->nm_mtx); + /* NFSv4.2 Copy is not permitted for infile == outfile. */ + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOCOPY) != 0 || invp == outvp) { + mtx_unlock(&nmp->nm_mtx); + error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, + ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, + ap->a_incred, ap->a_outcred, ap->a_fsizetd); + return (error); + } + mtx_unlock(&nmp->nm_mtx); + + /* Lock both vnodes, avoiding risk of deadlock. */ + do { + mp = NULL; + error = vn_start_write(outvp, &mp, V_WAIT); + if (error == 0) { + error = vn_lock(outvp, LK_EXCLUSIVE); + if (error == 0) { + error = vn_lock(invp, LK_SHARED | LK_NOWAIT); + if (error == 0) + break; + VOP_UNLOCK(outvp, 0); + if (mp != NULL) + vn_finished_write(mp); + mp = NULL; + error = vn_lock(invp, LK_SHARED); + if (error == 0) + VOP_UNLOCK(invp, 0); + } + } + if (mp != NULL) + vn_finished_write(mp); + } while (error == 0); + if (error != 0) + return (error); + + /* + * Do the vn_rlimit_fsize() check. Should this be above the VOP layer? + */ + io.uio_offset = *ap->a_outoffp; + io.uio_resid = *ap->a_lenp; + error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); + + /* + * Flush the input file so that the data is up to date before + * the copy. Flush writes for the output file so that they + * do not overwrite the data copied to the output file by the Copy. + * Set the commit argument for both flushes so that the data is on + * stable storage before the Copy RPC. This is done in case the + * server reboots during the Copy and needs to be redone. + */ + if (error == 0) + error = ncl_flush(invp, MNT_WAIT, curthread, 1, 0); + if (error == 0) + error = ncl_flush(outvp, MNT_WAIT, curthread, 1, 0); + + /* Do the actual NFSv4.2 RPC. */ + len = *ap->a_lenp; + mtx_lock(&nmp->nm_mtx); + if ((nmp->nm_privflag & NFSMNTP_NOCONSECUTIVE) == 0) + consecutive = true; + else + consecutive = false; + mtx_unlock(&nmp->nm_mtx); + inoff = *ap->a_inoffp; + outoff = *ap->a_outoffp; + tryoutcred = true; + must_commit = false; + if (error == 0) { + vap = &VTONFS(invp)->n_vattr.na_vattr; + error = VOP_GETATTR(invp, vap, ap->a_incred); + if (error == 0) { + /* + * Clip "len" at va_size so that RFC compliant servers + * will not reply NFSERR_INVAL. + * Setting "len == 0" for the RPC would be preferred, + * but some Linux servers do not support that. + */ + if (inoff >= vap->va_size) + *ap->a_lenp = len = 0; + else if (inoff + len > vap->va_size) + *ap->a_lenp = len = vap->va_size - inoff; + } else + error = 0; + } + copiedlen = 0; + while (len > 0 && error == 0) { + inattrflag = outattrflag = 0; + len2 = len; + if (tryoutcred) + error = nfsrpc_copy_file_range(invp, ap->a_inoffp, + outvp, ap->a_outoffp, &len2, ap->a_flags, + &inattrflag, &innfsva, &outattrflag, &outnfsva, + ap->a_outcred, consecutive, &must_commit); + else + error = nfsrpc_copy_file_range(invp, ap->a_inoffp, + outvp, ap->a_outoffp, &len2, ap->a_flags, + &inattrflag, &innfsva, &outattrflag, &outnfsva, + ap->a_incred, consecutive, &must_commit); + if (inattrflag != 0) + ret = nfscl_loadattrcache(&invp, &innfsva, NULL, NULL, + 0, 1); + if (outattrflag != 0) + ret2 = nfscl_loadattrcache(&outvp, &outnfsva, NULL, + NULL, 1, 1); + if (error == 0) { + if (consecutive == false) { + if (len2 == len) { + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= + NFSMNTP_NOCONSECUTIVE; + mtx_unlock(&nmp->nm_mtx); + } else + error = NFSERR_OFFLOADNOREQS; + } + /* + * If the Copy returns a length == 0, it hit the + * EOF on the input file. + */ + if (len2 == 0) { + *ap->a_lenp = copiedlen; + len = 0; + } else { + len -= len2; + copiedlen += len2; + } + if (len == 0 && must_commit && error == 0) + error = ncl_commit(outvp, outoff, *ap->a_lenp, + ap->a_outcred, curthread); + if (error == 0 && ret != 0) + error = ret; + if (error == 0 && ret2 != 0) + error = ret2; + } else if (error == NFSERR_OFFLOADNOREQS && consecutive) { + /* + * Try consecutive == false, which is ok only if all + * bytes are copied. + */ + consecutive = false; + error = 0; + } else if (error == NFSERR_ACCES && tryoutcred) { + /* Try again with incred. */ + tryoutcred = false; + error = 0; + } + if (error == NFSERR_STALEWRITEVERF) { + /* + * Server rebooted, so do it all again. + */ + *ap->a_inoffp = inoff; + *ap->a_outoffp = outoff; + len = *ap->a_lenp; + must_commit = false; + error = 0; + } + } + VOP_UNLOCK(invp, 0); + VOP_UNLOCK(outvp, 0); + if (mp != NULL) + vn_finished_write(mp); + if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS || + error == NFSERR_ACCES) { + /* + * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can + * use a_incred for the read and a_outcred for the write, so + * try this for NFSERR_ACCES failures for the Copy. + * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can + * never succeed, so disable it. + */ + if (error != NFSERR_ACCES) { + /* Can never do Copy on this mount. */ + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOCOPY; + mtx_unlock(&nmp->nm_mtx); + } + *ap->a_inoffp = inoff; + *ap->a_outoffp = outoff; + error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, + ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, + ap->a_incred, ap->a_outcred, ap->a_fsizetd); + } else if (error != 0) + *ap->a_lenp = 0; + + if (error != 0) + error = nfscl_maperr(curthread, error, (uid_t)0, (gid_t)0); + return (error); +} + +/* + * nfs ioctl call + */ +static int +nfs_ioctl(struct vop_ioctl_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsvattr nfsva; + struct nfsmount *nmp; + int attrflag, content, error, ret; + bool eof = false; /* shut up compiler. */ + + if (vp->v_type != VREG) + return (ENOTTY); + nmp = VFSTONFS(vp->v_mount); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION) { + error = vop_stdioctl(ap); + return (error); + } + + /* Do the actual NFSv4.2 RPC. */ + switch (ap->a_command) { + case FIOSEEKDATA: + content = NFSV4CONTENT_DATA; + break; + case FIOSEEKHOLE: + content = NFSV4CONTENT_HOLE; + break; + default: + return (ENOTTY); + } + + error = vn_lock(vp, LK_SHARED); + if (error != 0) + return (EBADF); + attrflag = 0; + if (*((off_t *)ap->a_data) >= VTONFS(vp)->n_size) + error = ENXIO; + else { + /* + * Flush all writes, so that the server is up to date. + * Although a Commit is not required, the commit argument + * is set so that, for a pNFS File/Flexible File Layout + * server, the LayoutCommit will be done to ensure the file + * size is up to date on the Metadata Server. + */ + error = ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0); + if (error == 0) + error = nfsrpc_seek(vp, (off_t *)ap->a_data, &eof, + content, ap->a_cred, &nfsva, &attrflag); + /* If at eof for FIOSEEKDATA, return ENXIO. */ + if (eof && error == 0 && content == NFSV4CONTENT_DATA) + error = ENXIO; + } + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + NFSVOPUNLOCK(vp, 0); + + if (error != 0) + error = ENXIO; + return (error); +} + +/* + * nfs getextattr call + */ +static int +nfs_getextattr(struct vop_getextattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; + struct ucred *cred; + struct thread *td = ap->a_td; + struct nfsvattr nfsva; + ssize_t len; + int attrflag, error, ret; + + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || + ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { + mtx_unlock(&nmp->nm_mtx); + return (EOPNOTSUPP); + } + mtx_unlock(&nmp->nm_mtx); + + cred = ap->a_cred; + if (cred == NULL) + cred = td->td_ucred; + /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ + attrflag = 0; + error = nfsrpc_getextattr(vp, ap->a_name, ap->a_uio, &len, &nfsva, + &attrflag, cred, td); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + if (error == 0 && ap->a_size != NULL) + *ap->a_size = len; + + switch (error) { + case NFSERR_NOTSUPP: + case NFSERR_OPILLEGAL: + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOXATTR; + mtx_unlock(&nmp->nm_mtx); + error = EOPNOTSUPP; + break; + case NFSERR_NOXATTR: + case NFSERR_XATTR2BIG: + error = ENOATTR; + break; + default: + error = nfscl_maperr(td, error, 0, 0); + break; + } + return (error); +} + +/* + * nfs setextattr call + */ +static int +nfs_setextattr(struct vop_setextattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; + struct ucred *cred; + struct thread *td = ap->a_td; + struct nfsvattr nfsva; + int attrflag, error, ret; + + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || + ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { + mtx_unlock(&nmp->nm_mtx); + return (EOPNOTSUPP); + } + mtx_unlock(&nmp->nm_mtx); + + if (ap->a_uio->uio_resid <= 0) + return (EINVAL); + cred = ap->a_cred; + if (cred == NULL) + cred = td->td_ucred; + /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ + attrflag = 0; + error = nfsrpc_setextattr(vp, ap->a_name, ap->a_uio, &nfsva, + &attrflag, cred, td); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + + switch (error) { + case NFSERR_NOTSUPP: + case NFSERR_OPILLEGAL: + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOXATTR; + mtx_unlock(&nmp->nm_mtx); + error = EOPNOTSUPP; + break; + case NFSERR_NOXATTR: + case NFSERR_XATTR2BIG: + error = ENOATTR; + break; + default: + error = nfscl_maperr(td, error, 0, 0); + break; + } + return (error); +} + +/* + * nfs listextattr call + */ +static int +nfs_listextattr(struct vop_listextattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; + struct ucred *cred; + struct thread *td = ap->a_td; + struct nfsvattr nfsva; + size_t len, len2; + uint64_t cookie; + int attrflag, error, ret; + bool eof; + + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || + ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { + mtx_unlock(&nmp->nm_mtx); + return (EOPNOTSUPP); + } + mtx_unlock(&nmp->nm_mtx); + + cred = ap->a_cred; + if (cred == NULL) + cred = td->td_ucred; + + /* Loop around doing List Extended Attribute RPCs. */ + eof = false; + cookie = 0; + len2 = 0; + error = 0; + while (!eof && error == 0) { + len = nmp->nm_rsize; + attrflag = 0; + error = nfsrpc_listextattr(vp, &cookie, ap->a_uio, &len, &eof, + &nfsva, &attrflag, cred, td); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, + 1); + if (error == 0 && ret != 0) + error = ret; + } + if (error == 0) { + len2 += len; + if (len2 > SSIZE_MAX) + error = ENOATTR; + } + } + if (error == 0 && ap->a_size != NULL) + *ap->a_size = len2; + + switch (error) { + case NFSERR_NOTSUPP: + case NFSERR_OPILLEGAL: + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOXATTR; + mtx_unlock(&nmp->nm_mtx); + error = EOPNOTSUPP; + break; + case NFSERR_NOXATTR: + case NFSERR_XATTR2BIG: + error = ENOATTR; + break; + default: + error = nfscl_maperr(td, error, 0, 0); + break; + } + return (error); +} + +/* + * nfs setextattr call + */ +static int +nfs_deleteextattr(struct vop_deleteextattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; + struct nfsvattr nfsva; + int attrflag, error, ret; + + nmp = VFSTONFS(vp->v_mount); + mtx_lock(&nmp->nm_mtx); + if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || + (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || + ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { + mtx_unlock(&nmp->nm_mtx); + return (EOPNOTSUPP); + } + mtx_unlock(&nmp->nm_mtx); + + /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ + attrflag = 0; + error = nfsrpc_rmextattr(vp, ap->a_name, &nfsva, &attrflag, ap->a_cred, + ap->a_td); + if (attrflag != 0) { + ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); + if (error == 0 && ret != 0) + error = ret; + } + + switch (error) { + case NFSERR_NOTSUPP: + case NFSERR_OPILLEGAL: + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_NOXATTR; + mtx_unlock(&nmp->nm_mtx); + error = EOPNOTSUPP; + break; + case NFSERR_NOXATTR: + case NFSERR_XATTR2BIG: + error = ENOATTR; + break; + default: + error = nfscl_maperr(ap->a_td, error, 0, 0); + break; + } + return (error); +} + /* * Return POSIX pathconf information applicable to nfs filesystems. */ @@ -3513,7 +4143,10 @@ nfs_pathconf(struct vop_pathconf_args *ap) struct nfsv3_pathconf pc; struct nfsvattr nfsva; struct vnode *vp = ap->a_vp; + struct nfsmount *nmp; struct thread *td = curthread; + off_t off; + bool eof; int attrflag, error; if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || @@ -3612,6 +4245,40 @@ nfs_pathconf(struct vop_pathconf_args *ap) case _PC_SYMLINK_MAX: *ap->a_retval = NFS_MAXPATHLEN; break; + case _PC_MIN_HOLE_SIZE: + /* Only some NFSv4.2 servers support Seek for Holes. */ + *ap->a_retval = 0; + nmp = VFSTONFS(vp->v_mount); + if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) { + /* + * NFSv4.2 doesn't have an attribute for hole size, + * so all we can do is see if the Seek operation is + * supported and then use f_iosize as a "best guess". + */ + mtx_lock(&nmp->nm_mtx); + if ((nmp->nm_privflag & NFSMNTP_SEEKTESTED) == 0) { + mtx_unlock(&nmp->nm_mtx); + off = 0; + attrflag = 0; + error = nfsrpc_seek(vp, &off, &eof, + NFSV4CONTENT_HOLE, td->td_ucred, &nfsva, + &attrflag); + if (attrflag != 0) + nfscl_loadattrcache(&vp, &nfsva, + NULL, NULL, 0, 1); + mtx_lock(&nmp->nm_mtx); + if (error == NFSERR_NOTSUPP) + nmp->nm_privflag |= NFSMNTP_SEEKTESTED; + else + nmp->nm_privflag |= NFSMNTP_SEEKTESTED | + NFSMNTP_SEEK; + error = 0; + } + if ((nmp->nm_privflag & NFSMNTP_SEEK) != 0) + *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; + mtx_unlock(&nmp->nm_mtx); + } + break; default: error = vop_stdpathconf(ap); diff --git a/sys/fs/nfsclient/nfsmount.h b/sys/fs/nfsclient/nfsmount.h index 649e59eff289..3b6312fbc87f 100644 --- a/sys/fs/nfsclient/nfsmount.h +++ b/sys/fs/nfsclient/nfsmount.h @@ -105,6 +105,14 @@ struct nfsmount { /* Private flags. */ #define NFSMNTP_FORCEDISM 0x00000001 #define NFSMNTP_CANCELRPCS 0x00000002 +#define NFSMNTP_IOADVISETHRUMDS 0x00000004 +#define NFSMNTP_NOCOPY 0x00000008 +#define NFSMNTP_NOCONSECUTIVE 0x00000010 +#define NFSMNTP_SEEK 0x00000020 +#define NFSMNTP_SEEKTESTED 0x00000040 +#define NFSMNTP_NOXATTR 0x00000080 +#define NFSMNTP_NOADVISE 0x00000100 +#define NFSMNTP_NOALLOCATE 0x00000200 #define NFSMNT_DIRPATH(m) (&((m)->nm_name[(m)->nm_krbnamelen + 1])) #define NFSMNT_SRVKRBNAME(m) \ diff --git a/sys/fs/nfsserver/nfs_nfsdkrpc.c b/sys/fs/nfsserver/nfs_nfsdkrpc.c index 1435685f3c33..252c501b29e0 100644 --- a/sys/fs/nfsserver/nfs_nfsdkrpc.c +++ b/sys/fs/nfsserver/nfs_nfsdkrpc.c @@ -109,7 +109,7 @@ extern struct proc *nfsd_master_proc; extern time_t nfsdev_time; extern int nfsrv_writerpc[NFS_NPROCS]; extern volatile int nfsrv_devidcnt; -extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; +extern struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS]; /* * NFS server system calls diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 48513080f224..6811c2650de3 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$"); */ #include +#include +#include #include #include #include @@ -104,6 +106,10 @@ extern int nfsrv_dolocallocks; extern int nfsd_enable_stringtouid; extern struct nfsdevicehead nfsrv_devidhead; +static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, + struct iovec **); +static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, + int *); static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, NFSPROC_T *); static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, @@ -112,19 +118,23 @@ static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, NFSPROC_T *); static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, struct thread *, int, struct mbuf **, char *, struct mbuf **, - struct nfsvattr *, struct acl *); + struct nfsvattr *, struct acl *, off_t *, int, bool *); static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, char *, int *); +static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, + NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct acl *, int *); static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount *, struct nfsvattr *); +static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, + NFSPROC_T *, struct nfsmount *); static int nfsrv_putfhname(fhandle_t *, char *); static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, struct pnfsdsfile *, struct vnode **, NFSPROC_T *); @@ -296,7 +306,8 @@ nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, - NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL); + NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, + NULL); if (error == 0) gotattr = 1; } @@ -480,7 +491,7 @@ nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { /* For a pNFS server, set the attributes on the DS file. */ error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, - NULL, NULL, NULL, nvap, NULL); + NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); if (error == ENOENT) error = 0; } @@ -722,43 +733,21 @@ int nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { - struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; - struct iovec *ivp = iv; + struct iovec *iv; struct uio io, *uiop = &io; - struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; - int i, len, tlen, error = 0; + struct mbuf *mp, *mp3; + int len, tlen, error = 0; - len = 0; - i = 0; - while (len < NFS_MAXPATHLEN) { - NFSMGET(mp); - MCLGET(mp, M_WAITOK); - mp->m_len = M_SIZE(mp); - if (len == 0) { - mp3 = mp2 = mp; - } else { - mp2->m_next = mp; - mp2 = mp; - } - if ((len + mp->m_len) > NFS_MAXPATHLEN) { - mp->m_len = NFS_MAXPATHLEN - len; - len = NFS_MAXPATHLEN; - } else { - len += mp->m_len; - } - ivp->iov_base = mtod(mp, caddr_t); - ivp->iov_len = mp->m_len; - i++; - ivp++; - } + len = NFS_MAXPATHLEN; + uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); uiop->uio_iov = iv; - uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; error = VOP_READLINK(vp, uiop, cred); + free(iv, M_TEMP); if (error) { m_freem(mp3); *lenp = 0; @@ -779,31 +768,20 @@ nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, } /* - * Read vnode op call into mbuf list. + * Create an mbuf chain and an associated iovec that can be used to Read + * or Getextattr of data. + * Upon success, return pointers to the first and last mbufs in the chain + * plus the malloc'd iovec and its iovlen. */ -int -nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, - struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) +static int +nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, + struct iovec **ivp) { - struct mbuf *m; - int i; + struct mbuf *m, *m2 = NULL, *m3; struct iovec *iv; - struct iovec *iv2; - int error = 0, len, left, siz, tlen, ioflag = 0; - struct mbuf *m2 = NULL, *m3; - struct uio io, *uiop = &io; - struct nfsheur *nh; + int i, left, siz; - /* - * Attempt to read from a DS file. A return of ENOENT implies - * there is no DS file to read. - */ - error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, - NULL, mpendp, NULL, NULL); - if (error != ENOENT) - return (error); - - len = left = NFSM_RNDUP(cnt); + left = len; m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. @@ -822,9 +800,7 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, m3 = m; m2 = m; } - iv = malloc(i * sizeof (struct iovec), - M_TEMP, M_WAITOK); - uiop->uio_iov = iv2 = iv; + *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); m = m3; left = len; i = 0; @@ -842,7 +818,37 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, } m = m->m_next; } - uiop->uio_iovcnt = i; + *mpp = m3; + *mpendp = m2; + return (i); +} + +/* + * Read vnode op call into mbuf list. + */ +int +nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, + struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) +{ + struct mbuf *m; + struct iovec *iv; + int error = 0, len, tlen, ioflag = 0; + struct mbuf *m3; + struct uio io, *uiop = &io; + struct nfsheur *nh; + + /* + * Attempt to read from a DS file. A return of ENOENT implies + * there is no DS file to read. + */ + error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, + NULL, mpendp, NULL, NULL, NULL, 0, NULL); + if (error != ENOENT) + return (error); + + len = NFSM_RNDUP(cnt); + uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); + uiop->uio_iov = iv; uiop->uio_offset = off; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; @@ -853,7 +859,7 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); - free(iv2, M_TEMP); + free(iv, M_TEMP); if (error) { m_freem(m3); *mpp = NULL; @@ -869,7 +875,7 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, } else if (len != tlen || tlen != cnt) nfsrv_adj(m3, len - tlen, tlen - cnt); *mpp = m3; - *mpendp = m2; + *mpendp = m; out: NFSEXITCODE(error); @@ -877,34 +883,44 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, } /* - * Write vnode op from an mbuf list. + * Create the iovec for the mbuf chain passed in as an argument. + * The "cp" argument is where the data starts within the first mbuf in + * the chain. It returns the iovec and the iovcnt. */ -int -nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable, - struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) +static int +nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, + int *iovcntp) { + struct mbuf *mp; struct iovec *ivp; - int i, len; - struct iovec *iv; - int ioflags, error; - struct uio io, *uiop = &io; - struct nfsheur *nh; + int cnt, i, len; /* - * Attempt to write to a DS file. A return of ENOENT implies - * there is no DS file to write. + * Loop through the mbuf chain, counting how many mbufs are a + * part of this write operation, so the iovec size is known. */ - error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, - &mp, cp, NULL, NULL, NULL); - if (error != ENOENT) { - *stable = NFSWRITE_FILESYNC; - return (error); + cnt = 0; + len = retlen; + mp = m; + i = mtod(mp, caddr_t) + mbuf_len(mp) - cp; + while (len > 0) { + if (i > 0) { + len -= i; + cnt++; + } + mp = mbuf_next(mp); + if (!mp) { + if (len > 0) + return (EBADRPC); + } else + i = mbuf_len(mp); } - ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, + /* Now, create the iovec. */ + mp = m; + *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); - uiop->uio_iov = iv = ivp; - uiop->uio_iovcnt = cnt; + *iovcntp = cnt; i = mtod(mp, caddr_t) + mp->m_len - cp; len = retlen; while (len > 0) { @@ -923,11 +939,42 @@ nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable, cp = mtod(mp, caddr_t); } } + return (0); +} + +/* + * Write vnode op from an mbuf list. + */ +int +nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, + struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) +{ + struct iovec *iv; + int cnt, ioflags, error; + struct uio io, *uiop = &io; + struct nfsheur *nh; + + /* + * Attempt to write to a DS file. A return of ENOENT implies + * there is no DS file to write. + */ + error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, + &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); + if (error != ENOENT) { + *stable = NFSWRITE_FILESYNC; + return (error); + } + if (*stable == NFSWRITE_UNSTABLE) ioflags = IO_NODELOCKED; else ioflags = (IO_SYNC | IO_NODELOCKED); + error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); + if (error != 0) + return (error); + uiop->uio_iov = iv; + uiop->uio_iovcnt = cnt; uiop->uio_resid = retlen; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; @@ -1249,7 +1296,8 @@ nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, if (vp->v_type == VDIR) error = NFSERR_ISDIR; else if (is_v4) - error = nfsrv_checkremove(vp, 1, p); + error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0), + p); if (error == 0) nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); if (!error) @@ -1379,12 +1427,14 @@ nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, } if (ndflag & ND_NFSV4) { if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { - error = nfsrv_checkremove(fvp, 0, p); + error = nfsrv_checkremove(fvp, 0, NULL, + (nfsquad_t)((u_quad_t)0), p); NFSVOPUNLOCK(fvp, 0); } else error = EPERM; if (tvp && !error) - error = nfsrv_checkremove(tvp, 1, p); + error = nfsrv_checkremove(tvp, 1, NULL, + (nfsquad_t)((u_quad_t)0), p); } else { /* * For NFSv2 and NFSv3, try to get rid of the delegation, so @@ -4380,7 +4430,7 @@ nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) /* Do this as root so that it won't fail with EACCES. */ tcred = newnfs_getcred(); error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, - NULL, NULL, NULL, nap, NULL); + NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); NFSFREECRED(tcred); return (error); } @@ -4395,14 +4445,15 @@ nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, int error; error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, - NULL, NULL, NULL, NULL, aclp); + NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); return (error); } static int nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, char *cp, - struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp) + struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, + off_t *offp, int content, bool *eofp) { struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; fhandle_t fh[NFSDEV_MAXMIRRORS]; @@ -4506,7 +4557,7 @@ nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, origmircnt = mirrorcnt; /* * If failpos is set to a mirror#, then that mirror has - * failed and will be disabled. For Read and Getattr, the + * failed and will be disabled. For Read, Getattr and Seek, the * function only tries one mirror, so if that mirror has * failed, it will need to be retried. As such, increment * tryitagain for these cases. @@ -4539,6 +4590,22 @@ nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, else if (ioproc == NFSPROC_SETACL) error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, aclp, &failpos); + else if (ioproc == NFSPROC_SEEKDS) { + error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, + p, nmp[0]); + if (nfsds_failerr(error) && mirrorcnt > 1) { + /* + * Setting failpos will cause the mirror + * to be disabled and then a retry of this + * read is required. + */ + failpos = 0; + error = 0; + trycnt++; + } + } else if (ioproc == NFSPROC_ALLOCATE) + error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, + &nmp[0], mirrorcnt, &failpos); else { error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, vp, nmp[mirrorcnt - 1], nap); @@ -5163,6 +5230,165 @@ nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, return (error); } +/* + * Do a allocate RPC on a DS data file, using this structure for the arguments, + * so that this function can be executed by a separate kernel process. + */ +struct nfsrvallocatedsdorpc { + int done; + int inprog; + struct task tsk; + fhandle_t fh; + off_t off; + off_t len; + struct nfsmount *nmp; + struct ucred *cred; + NFSPROC_T *p; + int err; +}; + +static int +nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, + off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript *nd; + nfsattrbit_t attrbits; + nfsv4stateid_t st; + int error; + + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, + sizeof(fhandle_t), NULL, NULL, 0, 0); + + /* + * Use a stateid where other is an alternating 01010 pattern and + * seqid is 0xffffffff. This value is not defined as special by + * the RFC and is used by the FreeBSD NFS server to indicate an + * MDS->DS proxy operation. + */ + st.other[0] = 0x55555555; + st.other[1] = 0x55555555; + st.other[2] = 0x55555555; + st.seqid = 0xffffffff; + nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(off, tl); tl += 2; + txdr_hyper(len, tl); tl += 2; + NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); + + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", + nd->nd_repstat); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); + } else + error = nd->nd_repstat; + NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); +nfsmout: + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); + return (error); +} + +/* + * Start up the thread that will execute nfsrv_allocatedsdorpc(). + */ +static void +start_allocatedsdorpc(void *arg, int pending) +{ + struct nfsrvallocatedsdorpc *drpc; + + drpc = (struct nfsrvallocatedsdorpc *)arg; + drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, + drpc->len, NULL, drpc->cred, drpc->p); + drpc->done = 1; + NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); +} + +static int +nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, + NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, + int *failposp) +{ + struct nfsrvallocatedsdorpc *drpc, *tdrpc; + struct nfsvattr na; + int error, i, ret, timo; + + NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); + drpc = NULL; + if (mirrorcnt > 1) + tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, + M_WAITOK); + + /* + * Do the allocate RPC for every DS, using a separate kernel process + * for every DS except the last one. + */ + error = 0; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + tdrpc->done = 0; + NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); + tdrpc->off = off; + tdrpc->len = len; + tdrpc->nmp = *nmpp; + tdrpc->cred = cred; + tdrpc->p = p; + tdrpc->inprog = 0; + tdrpc->err = 0; + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); + NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", + ret); + } + if (ret != 0) { + ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, + cred, p); + if (nfsds_failerr(ret) && *failposp == -1) + *failposp = i; + else if (error == 0 && ret != 0) + error = ret; + } + nmpp++; + fhp++; + } + ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); + if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) + *failposp = mirrorcnt - 1; + else if (error == 0 && ret != 0) + error = ret; + if (error == 0) + error = nfsrv_setextattr(vp, &na, p); + NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + /* Wait for RPCs on separate threads to complete. */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); + if (nfsds_failerr(tdrpc->err) && *failposp == -1) + *failposp = i; + else if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; + } + free(drpc, M_TEMP); + return (error); +} + static int nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, @@ -5550,6 +5776,59 @@ nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, return (error); } +/* + * Seek call to a DS. + */ +static int +nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, + struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) +{ + uint32_t *tl; + struct nfsrv_descript *nd; + nfsv4stateid_t st; + int error; + + NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); + /* + * Use a stateid where other is an alternating 01010 pattern and + * seqid is 0xffffffff. This value is not defined as special by + * the RFC and is used by the FreeBSD NFS server to indicate an + * MDS->DS proxy operation. + */ + st.other[0] = 0x55555555; + st.other[1] = 0x55555555; + st.other[2] = 0x55555555; + st.seqid = 0xffffffff; + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, + sizeof(fhandle_t), NULL, NULL, 0, 0); + nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(*offp, tl); tl += 2; + *tl = txdr_unsigned(content); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); + if (*tl++ == newnfs_true) + *eofp = true; + else + *eofp = false; + *offp = fxdr_hyper(tl); + } else + error = nd->nd_repstat; +nfsmout: + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); + return (error); +} + /* * Get the device id and file handle for a DS file. */ @@ -5777,6 +6056,286 @@ nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) return (error); } +/* + * Seek vnode op call (actually it is a VOP_IOCTL()). + * This function is called with the vnode locked, but unlocks and vrele()s + * the vp before returning. + */ +int +nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, + off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) +{ + struct nfsvattr at; + int error, ret; + + ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); + /* + * Attempt to seek on a DS file. A return of ENOENT implies + * there is no DS file to seek on. + */ + error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, + NULL, NULL, NULL, NULL, offp, content, eofp); + if (error != ENOENT) { + vput(vp); + return (error); + } + + /* + * Do the VOP_IOCTL() call. For the case where *offp == file_size, + * VOP_IOCTL() will return ENXIO. However, the correct reply for + * NFSv4.2 is *eofp == true and error == 0 for this case. + */ + NFSVOPUNLOCK(vp, 0); + error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); + *eofp = false; + if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { + /* Handle the cases where we might be at EOF. */ + ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); + if (ret == 0 && *offp == at.na_size) { + *eofp = true; + error = 0; + } + if (ret != 0 && error == 0) + error = ret; + } + vrele(vp); + NFSEXITCODE(error); + return (error); +} + +/* + * Allocate vnode op call. + */ +int +nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, + NFSPROC_T *p) +{ + int error, trycnt; + + ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); + /* + * Attempt to allocate on a DS file. A return of ENOENT implies + * there is no DS file to allocate on. + */ + error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, + NULL, NULL, NULL, NULL, &len, 0, NULL); + if (error != ENOENT) + return (error); + error = 0; + + /* + * Do the actual VOP_ALLOCATE(), looping a reasonable number of + * times to achieve completion. + */ + trycnt = 0; + while (error == 0 && len > 0 && trycnt++ < 20) + error = VOP_ALLOCATE(vp, &off, &len); + if (error == 0 && len > 0) + error = NFSERR_IO; + NFSEXITCODE(error); + return (error); +} + +/* + * Get Extended Atribute vnode op into an mbuf list. + */ +int +nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, + struct ucred *cred, struct thread *p, struct mbuf **mpp, + struct mbuf **mpendp, int *lenp) +{ + struct iovec *iv; + struct uio io, *uiop = &io; + struct mbuf *m, *m2; + int alen, error, len, tlen; + size_t siz; + + /* First, find out the size of the extended attribute. */ + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, + &siz, cred, p); + if (error != 0) + return (NFSERR_NOXATTR); + if (siz > maxresp - NFS_MAXXDR) + return (NFSERR_XATTR2BIG); + len = siz; + tlen = NFSM_RNDUP(len); + uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, &iv); + uiop->uio_iov = iv; + uiop->uio_offset = 0; + uiop->uio_resid = tlen; + uiop->uio_rw = UIO_READ; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_td = p; +#ifdef MAC + error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, + name); + if (error != 0) + goto out; +#endif + + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, NULL, + cred, p); + if (error != 0) + goto out; + if (uiop->uio_resid > 0) { + alen = tlen; + len = tlen - uiop->uio_resid; + tlen = NFSM_RNDUP(len); + if (alen != tlen) + printf("nfsvno_getxattr: weird size read\n"); + nfsrv_adj(m, alen - tlen, tlen - len); + } + *lenp = len; + *mpp = m; + *mpendp = m2; + +out: + if (error != 0) { + m_freem(m); + *lenp = 0; + } + free(iv, M_TEMP); + NFSEXITCODE(error); + return (error); +} + +/* + * Set Extended attribute vnode op from an mbuf list. + */ +int +nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, + char *cp, struct ucred *cred, struct thread *p) +{ + struct iovec *iv; + struct uio uio, *uiop = &uio; + int cnt, error; + +#ifdef MAC + error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, + name); + if (error != 0) + goto out; +#endif + + uiop->uio_rw = UIO_WRITE; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_td = p; + uiop->uio_offset = 0; + uiop->uio_resid = len; + error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); + uiop->uio_iov = iv; + uiop->uio_iovcnt = cnt; + if (error == 0) { + error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, + cred, p); + free(iv, M_TEMP); + } + +out: + NFSEXITCODE(error); + return (error); +} + +/* + * Remove Extended attribute vnode op. + */ +int +nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, + struct ucred *cred, struct thread *p) +{ + int error; + + /* + * Get rid of any delegations. I am not sure why this is required, + * but RFC-8276 says so. + */ + error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); + if (error != 0) + goto out; +#ifdef MAC + error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, + name); + if (error != 0) + goto out; +#endif + + error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); + if (error == EOPNOTSUPP) + error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, + cred, p); +#ifdef MAC +out: +#endif + NFSEXITCODE(error); + return (error); +} + +/* + * List Extended Atribute vnode op into an mbuf list. + */ +int +nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, + struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) +{ + struct iovec iv; + struct uio io; + int error; + size_t siz; + + *bufp = NULL; + /* First, find out the size of the extended attribute. */ + error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, + p); + if (error != 0) + return (NFSERR_NOXATTR); + if (siz <= cookie) { + *lenp = 0; + *eofp = true; + goto out; + } + if (siz > cookie + *lenp) { + siz = cookie + *lenp; + *eofp = false; + } else + *eofp = true; + /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ + if (siz > 10 * 1024 * 1024) { + error = NFSERR_XATTR2BIG; + goto out; + } + *bufp = malloc(siz, M_TEMP, M_WAITOK); + iv.iov_base = *bufp; + iv.iov_len = siz; + io.uio_iovcnt = 1; + io.uio_iov = &iv; + io.uio_offset = 0; + io.uio_resid = siz; + io.uio_rw = UIO_READ; + io.uio_segflg = UIO_SYSSPACE; + io.uio_td = p; +#ifdef MAC + error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); + if (error != 0) + goto out; +#endif + + error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, + p); + if (error != 0) + goto out; + if (io.uio_resid > 0) + siz -= io.uio_resid; + *lenp = siz; + +out: + if (error != 0) { + free(*bufp, M_TEMP); + *bufp = NULL; + } + NFSEXITCODE(error); + return (error); +} + extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index 830fd23d6f1c..76d4b93cfd37 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -50,6 +50,8 @@ __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include +#include +#include /* Global vars */ extern u_int32_t newnfs_false, newnfs_true; @@ -65,6 +67,7 @@ extern int nfsd_debuglevel; extern u_long sb_max_adj; extern int nfsrv_pnfsatime; extern int nfsrv_maxpnfsmirror; +extern int nfs_maxcopyrange; #endif /* !APPLEKEXT */ static int nfs_async = 0; @@ -74,6 +77,10 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, extern int nfsrv_doflexfile; SYSCTL_INT(_vfs_nfsd, OID_AUTO, default_flexfile, CTLFLAG_RW, &nfsrv_doflexfile, 0, "Make Flex File Layout the default for pNFS"); +static int nfsrv_linux42server = 1; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, linux42server, CTLFLAG_RW, + &nfsrv_linux42server, 0, + "Enable Linux style NFSv4.2 server (non-RFC compliant)"); /* * This list defines the GSS mechanisms supported. @@ -121,7 +128,8 @@ nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram, if ((nd->nd_flag & ND_NFSV4) && (nfsmode & ~(NFSACCESS_READ | NFSACCESS_LOOKUP | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE | - NFSACCESS_EXECUTE))) { + NFSACCESS_EXECUTE | NFSACCESS_XAREAD | NFSACCESS_XAWRITE | + NFSACCESS_XALIST))) { nd->nd_repstat = NFSERR_INVAL; vput(vp); goto out; @@ -144,6 +152,24 @@ nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_EXTEND; } + if (nfsmode & NFSACCESS_XAREAD) { + supported |= NFSACCESS_XAREAD; + if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, + NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) + nfsmode &= ~NFSACCESS_XAREAD; + } + if (nfsmode & NFSACCESS_XAWRITE) { + supported |= NFSACCESS_XAWRITE; + if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, + NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) + nfsmode &= ~NFSACCESS_XAWRITE; + } + if (nfsmode & NFSACCESS_XALIST) { + supported |= NFSACCESS_XALIST; + if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, + NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) + nfsmode &= ~NFSACCESS_XALIST; + } if (nfsmode & NFSACCESS_DELETE) { supported |= NFSACCESS_DELETE; if (vp->v_type == VDIR) @@ -864,9 +890,7 @@ APPLESTATIC int nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { - int i, cnt; u_int32_t *tl; - mbuf_t mp; struct nfsvattr nva, forat; int aftat_ret = 1, retlen, len, error = 0, forat_ret = 1; int gotproxystateid, stable = NFSWRITE_FILESYNC; @@ -948,28 +972,6 @@ nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, lop->lo_end = NFS64BITSSET; } - /* - * Loop through the mbuf chain, counting how many mbufs are a - * part of this write operation, so the iovec size is known. - */ - cnt = 0; - mp = nd->nd_md; - i = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - nd->nd_dpos; - while (len > 0) { - if (i > 0) { - len -= i; - cnt++; - } - mp = mbuf_next(mp); - if (!mp) { - if (len > 0) { - error = EBADRPC; - goto nfsmout; - } - } else - i = mbuf_len(mp); - } - if (retlen > NFS_SRVMAXIO || retlen < 0) nd->nd_repstat = EIO; if (vnode_vtype(vp) != VREG && !nd->nd_repstat) { @@ -1011,7 +1013,7 @@ nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, * which is to return ok so long as there are no permission problems. */ if (retlen > 0) { - nd->nd_repstat = nfsvno_write(vp, off, retlen, cnt, &stable, + nd->nd_repstat = nfsvno_write(vp, off, retlen, &stable, nd->nd_md, nd->nd_dpos, nd->nd_cred, p); error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1); if (error) @@ -4075,6 +4077,8 @@ nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, clp->lc_flags |= LCL_GSSPRIVACY; } else clp->lc_flags = LCL_NFSV41; + if ((nd->nd_flag & ND_NFSV42) != 0) + clp->lc_flags |= LCL_NFSV42; if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; @@ -4751,6 +4755,229 @@ nfsrvd_layoutreturn(struct nfsrv_descript *nd, __unused int isdgram, return (error); } +/* + * nfsv4 layout error service + */ +APPLESTATIC int +nfsrvd_layouterror(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + nfsv4stateid_t stateid; + int cnt, error = 0, i, stat; + int opnum __unused; + char devid[NFSX_V4DEVICEID]; + uint64_t offset, len; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + + NFSX_UNSIGNED); + offset = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + cnt = fxdr_unsigned(int, *tl); + NFSD_DEBUG(4, "layouterror off=%ju len=%ju cnt=%d\n", (uintmax_t)offset, + (uintmax_t)len, cnt); + /* + * For the special stateid of other all 0s and seqid == 1, set + * the stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + + /* + * Ignore offset, len and stateid for now. + */ + for (i = 0; i < cnt; i++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4DEVICEID + 2 * + NFSX_UNSIGNED); + NFSBCOPY(tl, devid, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + stat = fxdr_unsigned(int, *tl++); + opnum = fxdr_unsigned(int, *tl); + NFSD_DEBUG(4, "nfsrvd_layouterr op=%d stat=%d\n", opnum, stat); + /* + * Except for NFSERR_ACCES and NFSERR_STALE errors, + * disable the mirror. + */ + if (stat != NFSERR_ACCES && stat != NFSERR_STALE) + nfsrv_delds(devid, curthread); + } +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfsv4 layout stats service + */ +APPLESTATIC int +nfsrvd_layoutstats(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + nfsv4stateid_t stateid; + int cnt, error = 0; + int layouttype __unused; + char devid[NFSX_V4DEVICEID] __unused; + uint64_t offset, len, readcount, readbytes, writecount, writebytes + __unused; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_HYPER + NFSX_STATEID + + NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED); + offset = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + readcount = fxdr_hyper(tl); tl += 2; + readbytes = fxdr_hyper(tl); tl += 2; + writecount = fxdr_hyper(tl); tl += 2; + writebytes = fxdr_hyper(tl); tl += 2; + NFSBCOPY(tl, devid, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + layouttype = fxdr_unsigned(int, *tl++); + cnt = fxdr_unsigned(int, *tl); + error = nfsm_advance(nd, NFSM_RNDUP(cnt), -1); + if (error != 0) + goto nfsmout; + NFSD_DEBUG(4, "layoutstats cnt=%d\n", cnt); + /* + * For the special stateid of other all 0s and seqid == 1, set + * the stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + + /* + * No use for the stats for now. + */ +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfsv4 io_advise service + */ +APPLESTATIC int +nfsrvd_ioadvise(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + nfsv4stateid_t stateid; + nfsattrbit_t hints; + int error = 0, ret; + off_t offset, len; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER); + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + offset = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); + error = nfsrv_getattrbits(nd, &hints, NULL, NULL); + if (error != 0) + goto nfsmout; + /* + * For the special stateid of other all 0s and seqid == 1, set + * the stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + + if (offset < 0) { + nd->nd_repstat = NFSERR_INVAL; + goto nfsmout; + } + if (len < 0) + len = 0; + if (vp->v_type != VREG) { + if (vp->v_type == VDIR) + nd->nd_repstat = NFSERR_ISDIR; + else + nd->nd_repstat = NFSERR_WRONGTYPE; + goto nfsmout; + } + + /* + * For now, we can only handle WILLNEED and DONTNEED and don't use + * the stateid. + */ + if ((NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED) && + !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED)) || + (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED) && + !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED))) { + NFSVOPUNLOCK(vp, 0); + if (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED)) { + ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_WILLNEED); + NFSZERO_ATTRBIT(&hints); + if (ret == 0) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED); + else + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); + } else { + ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_DONTNEED); + NFSZERO_ATTRBIT(&hints); + if (ret == 0) + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED); + else + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); + } + vrele(vp); + } else { + NFSZERO_ATTRBIT(&hints); + NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); + vput(vp); + } + nfsrv_putattrbit(nd, &hints); + NFSEXITCODE2(error, nd); + return (error); +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + /* * nfsv4 getdeviceinfo service */ @@ -4868,6 +5095,737 @@ nfsrvd_teststateid(struct nfsrv_descript *nd, __unused int isdgram, return (error); } +/* + * nfs allocate service + */ +APPLESTATIC int +nfsrvd_allocate(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + struct nfsvattr forat; + int error = 0, forat_ret = 1, gotproxystateid; + off_t off, len; + struct nfsstate st, *stp = &st; + struct nfslock lo, *lop = &lo; + nfsv4stateid_t stateid; + nfsquad_t clientid; + nfsattrbit_t attrbits; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + gotproxystateid = 0; + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER); + stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); + lop->lo_flags = NFSLCK_WRITE; + stp->ls_ownerlen = 0; + stp->ls_op = NULL; + stp->ls_uid = nd->nd_cred->cr_uid; + stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); + clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; + clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; + if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { + if ((nd->nd_flag & ND_NFSV41) != 0) + clientid.qval = nd->nd_clientid.qval; + else if (nd->nd_clientid.qval != clientid.qval) + printf("EEK2 multiple clids\n"); + } else { + if ((nd->nd_flag & ND_NFSV41) != 0) + printf("EEK! no clientid from session\n"); + nd->nd_flag |= ND_IMPLIEDCLID; + nd->nd_clientid.qval = clientid.qval; + } + stp->ls_stateid.other[2] = *tl++; + /* + * Don't allow this to be done for a DS. + */ + if ((nd->nd_flag & ND_DSSERVER) != 0) + nd->nd_repstat = NFSERR_NOTSUPP; + /* However, allow the proxy stateid. */ + if (stp->ls_stateid.seqid == 0xffffffff && + stp->ls_stateid.other[0] == 0x55555555 && + stp->ls_stateid.other[1] == 0x55555555 && + stp->ls_stateid.other[2] == 0x55555555) + gotproxystateid = 1; + off = fxdr_hyper(tl); tl += 2; + lop->lo_first = off; + len = fxdr_hyper(tl); + lop->lo_end = off + len; + /* + * Paranoia, just in case it wraps around, which shouldn't + * ever happen anyhow. + */ + if (nd->nd_repstat == 0 && (lop->lo_end < lop->lo_first || len <= 0)) + nd->nd_repstat = NFSERR_INVAL; + + if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); + forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits); + if (nd->nd_repstat == 0) + nd->nd_repstat = forat_ret; + if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat == 0 && gotproxystateid == 0) + nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, + &stateid, exp, nd, curthread); + + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_allocate(vp, off, len, nd->nd_cred, + curthread); + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfs copy service + */ +APPLESTATIC int +nfsrvd_copy_file_range(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, vnode_t tovp, struct nfsexstuff *exp, struct nfsexstuff *toexp) +{ + uint32_t *tl; + struct nfsvattr at; + int cnt, error = 0, ret; + off_t inoff, outoff; + uint64_t len; + size_t xfer; + struct nfsstate inst, outst, *instp = &inst, *outstp = &outst; + struct nfslock inlo, outlo, *inlop = &inlo, *outlop = &outlo; + nfsquad_t clientid; + nfsv4stateid_t stateid; + nfsattrbit_t attrbits; + void *rl_rcookie, *rl_wcookie; + + rl_rcookie = rl_wcookie = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + if (nfsrv_devidcnt > 0) { + /* + * For a pNFS server, reply NFSERR_NOTSUPP so that the client + * will do the copy via I/O on the DS(s). + */ + nd->nd_repstat = NFSERR_NOTSUPP; + goto nfsmout; + } + if (vp == tovp) { + /* Copying a byte range within the same file is not allowed. */ + nd->nd_repstat = NFSERR_INVAL; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_STATEID + 3 * NFSX_HYPER + + 3 * NFSX_UNSIGNED); + instp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); + inlop->lo_flags = NFSLCK_READ; + instp->ls_ownerlen = 0; + instp->ls_op = NULL; + instp->ls_uid = nd->nd_cred->cr_uid; + instp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + clientid.lval[0] = instp->ls_stateid.other[0] = *tl++; + clientid.lval[1] = instp->ls_stateid.other[1] = *tl++; + if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) + clientid.qval = nd->nd_clientid.qval; + instp->ls_stateid.other[2] = *tl++; + outstp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); + outlop->lo_flags = NFSLCK_WRITE; + outstp->ls_ownerlen = 0; + outstp->ls_op = NULL; + outstp->ls_uid = nd->nd_cred->cr_uid; + outstp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + outstp->ls_stateid.other[0] = *tl++; + outstp->ls_stateid.other[1] = *tl++; + outstp->ls_stateid.other[2] = *tl++; + inoff = fxdr_hyper(tl); tl += 2; + inlop->lo_first = inoff; + outoff = fxdr_hyper(tl); tl += 2; + outlop->lo_first = outoff; + len = fxdr_hyper(tl); tl += 2; + if (len == 0) { + /* len == 0 means to EOF. */ + inlop->lo_end = OFF_MAX; + outlop->lo_end = OFF_MAX; + } else { + inlop->lo_end = inlop->lo_first + len; + outlop->lo_end = outlop->lo_first + len; + } + + /* + * At this time only consecutive, synchronous copy is supported, + * so ca_consecutive and ca_synchronous can be ignored. + */ + tl += 2; + + cnt = fxdr_unsigned(int, *tl); + if ((nd->nd_flag & ND_DSSERVER) != 0 || cnt != 0) + nd->nd_repstat = NFSERR_NOTSUPP; + if (nd->nd_repstat == 0 && (inoff > OFF_MAX || outoff > OFF_MAX || + inlop->lo_end > OFF_MAX || outlop->lo_end > OFF_MAX || + inlop->lo_end < inlop->lo_first || outlop->lo_end < + outlop->lo_first)) + nd->nd_repstat = NFSERR_INVAL; + + if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + + /* Check permissions for the input file. */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); + ret = nfsvno_getattr(vp, &at, nd, curthread, 1, &attrbits); + if (nd->nd_repstat == 0) + nd->nd_repstat = ret; + if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsrv_lockctrl(vp, &instp, &inlop, NULL, + clientid, &stateid, exp, nd, curthread); + NFSVOPUNLOCK(vp, 0); + if (nd->nd_repstat != 0) + goto out; + + error = NFSVOPLOCK(tovp, LK_SHARED); + if (error != 0) + goto out; + if (vnode_vtype(tovp) != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + + /* For the output file, we only need the Owner attribute. */ + ret = nfsvno_getattr(tovp, &at, nd, curthread, 1, &attrbits); + if (nd->nd_repstat == 0) + nd->nd_repstat = ret; + if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(tovp, VWRITE, nd->nd_cred, toexp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsrv_lockctrl(tovp, &outstp, &outlop, NULL, + clientid, &stateid, toexp, nd, curthread); + NFSVOPUNLOCK(tovp, 0); + + /* Range lock the byte ranges for both invp and outvp. */ + if (nd->nd_repstat == 0) { + for (;;) { + if (len == 0) { + rl_wcookie = vn_rangelock_wlock(tovp, outoff, + OFF_MAX); + rl_rcookie = vn_rangelock_tryrlock(vp, inoff, + OFF_MAX); + } else { + rl_wcookie = vn_rangelock_wlock(tovp, outoff, + outoff + len); + rl_rcookie = vn_rangelock_tryrlock(vp, inoff, + inoff + len); + } + if (rl_rcookie != NULL) + break; + vn_rangelock_unlock(tovp, rl_wcookie); + if (len == 0) + rl_rcookie = vn_rangelock_rlock(vp, inoff, + OFF_MAX); + else + rl_rcookie = vn_rangelock_rlock(vp, inoff, + inoff + len); + vn_rangelock_unlock(vp, rl_rcookie); + } + + error = NFSVOPLOCK(vp, LK_SHARED); + if (error == 0) { + ret = nfsvno_getattr(vp, &at, nd, curthread, 1, NULL); + if (ret == 0) { + /* + * Since invp is range locked, na_size should + * not change. + */ + if (len == 0 && at.na_size > inoff) { + /* + * If len == 0, set it based on invp's + * size. If offset is past EOF, just + * leave len == 0. + */ + len = at.na_size - inoff; + } else if (nfsrv_linux42server == 0 && + inoff + len > at.na_size) { + /* + * RFC-7862 says that NFSERR_INVAL must + * be returned when inoff + len exceeds + * the file size, however the NFSv4.2 + * Linux client likes to do this, so + * only check if nfsrv_linux42server + * is not set. + */ + nd->nd_repstat = NFSERR_INVAL; + } + } + NFSVOPUNLOCK(vp, 0); + if (ret != 0 && nd->nd_repstat == 0) + nd->nd_repstat = ret; + } else if (nd->nd_repstat == 0) + nd->nd_repstat = error; + } + + /* + * Do the actual copy to an upper limit of vfs.nfs.maxcopyrange. + * This limit is applied to ensure that the RPC replies in a + * reasonable time. + */ + if (len > nfs_maxcopyrange) + xfer = nfs_maxcopyrange; + else + xfer = len; + if (nd->nd_repstat == 0) { + nd->nd_repstat = vn_copy_file_range(vp, &inoff, tovp, &outoff, + &xfer, 0, nd->nd_cred, nd->nd_cred, NULL); + if (nd->nd_repstat == 0) + len = xfer; + } + + /* Unlock the ranges. */ + if (rl_rcookie != NULL) + vn_rangelock_unlock(vp, rl_rcookie); + if (rl_wcookie != NULL) + vn_rangelock_unlock(tovp, rl_wcookie); + + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_HYPER + + NFSX_VERF); + *tl++ = txdr_unsigned(0); /* No callback ids. */ + txdr_hyper(len, tl); tl += 2; + *tl++ = txdr_unsigned(NFSWRITE_UNSTABLE); + *tl++ = txdr_unsigned(nfsboottime.tv_sec); + *tl++ = txdr_unsigned(nfsboottime.tv_usec); + *tl++ = newnfs_true; + *tl = newnfs_true; + } +out: + vrele(vp); + vrele(tovp); + NFSEXITCODE2(error, nd); + return (error); +nfsmout: + vput(vp); + vrele(tovp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfs seek service + */ +APPLESTATIC int +nfsrvd_seek(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + struct nfsvattr at; + int content, error = 0; + off_t off; + u_long cmd; + nfsattrbit_t attrbits; + bool eof; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + NFSX_HYPER + NFSX_UNSIGNED); + /* Ignore the stateid for now. */ + tl += (NFSX_STATEID / NFSX_UNSIGNED); + off = fxdr_hyper(tl); tl += 2; + content = fxdr_unsigned(int, *tl); + if (content == NFSV4CONTENT_DATA) + cmd = FIOSEEKDATA; + else if (content == NFSV4CONTENT_HOLE) + cmd = FIOSEEKHOLE; + else + nd->nd_repstat = NFSERR_BADXDR; + if (nd->nd_repstat == 0 && vnode_vtype(vp) == VDIR) + nd->nd_repstat = NFSERR_ISDIR; + if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + if (nd->nd_repstat == 0 && off < 0) + nd->nd_repstat = NFSERR_NXIO; + if (nd->nd_repstat == 0) { + /* Check permissions for the input file. */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); + nd->nd_repstat = nfsvno_getattr(vp, &at, nd, curthread, 1, + &attrbits); + } + if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat != 0) + goto nfsmout; + + /* nfsvno_seek() unlocks and vrele()s the vp. */ + nd->nd_repstat = nfsvno_seek(nd, vp, cmd, &off, content, &eof, + nd->nd_cred, curthread); + if (nd->nd_repstat == 0 && eof && content == NFSV4CONTENT_DATA && + nfsrv_linux42server != 0) + nd->nd_repstat = NFSERR_NXIO; + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); + if (eof) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + txdr_hyper(off, tl); + } + NFSEXITCODE2(error, nd); + return (error); +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfs get extended attribute service + */ +APPLESTATIC int +nfsrvd_getxattr(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, __unused struct nfsexstuff *exp) +{ + uint32_t *tl; + mbuf_t mp = NULL, mpend = NULL; + int error, len; + char *name; + struct thread *p = curthread; + + error = 0; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(int, *tl); + if (len <= 0) { + nd->nd_repstat = NFSERR_BADXDR; + goto nfsmout; + } + if (len > EXTATTR_MAXNAMELEN) { + nd->nd_repstat = NFSERR_NOXATTR; + goto nfsmout; + } + name = malloc(len + 1, M_TEMP, M_WAITOK); + nd->nd_repstat = nfsrv_mtostr(nd, name, len); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_getxattr(vp, name, nd->nd_maxresp, + nd->nd_cred, p, &mp, &mpend, &len); + if (nd->nd_repstat == ENOATTR) + nd->nd_repstat = NFSERR_NOXATTR; + else if (nd->nd_repstat == EOPNOTSUPP) + nd->nd_repstat = NFSERR_NOTSUPP; + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(len); + mbuf_setnext(nd->nd_mb, mp); + nd->nd_mb = mpend; + nd->nd_bpos = NFSMTOD(mpend, caddr_t) + mbuf_len(mpend); + } + free(name, M_TEMP); + +nfsmout: + if (nd->nd_repstat == 0) + nd->nd_repstat = error; + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +} + +/* + * nfs set extended attribute service + */ +APPLESTATIC int +nfsrvd_setxattr(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, __unused struct nfsexstuff *exp) +{ + uint32_t *tl; + struct nfsvattr ova, nva; + nfsattrbit_t attrbits; + int error, len, opt; + char *name; + size_t siz; + struct thread *p = curthread; + + error = 0; + name = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + opt = fxdr_unsigned(int, *tl++); + len = fxdr_unsigned(int, *tl); + if (len <= 0) { + nd->nd_repstat = NFSERR_BADXDR; + goto nfsmout; + } + if (len > EXTATTR_MAXNAMELEN) { + nd->nd_repstat = NFSERR_NOXATTR; + goto nfsmout; + } + name = malloc(len + 1, M_TEMP, M_WAITOK); + error = nfsrv_mtostr(nd, name, len); + if (error != 0) + goto nfsmout; + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(int, *tl); + if (len <= 0 || len > IOSIZE_MAX) { + nd->nd_repstat = NFSERR_XATTR2BIG; + goto nfsmout; + } + switch (opt) { + case NFSV4SXATTR_CREATE: + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, + &siz, nd->nd_cred, p); + if (error != ENOATTR) + nd->nd_repstat = NFSERR_EXIST; + error = 0; + break; + case NFSV4SXATTR_REPLACE: + error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, + &siz, nd->nd_cred, p); + if (error != 0) + nd->nd_repstat = NFSERR_NOXATTR; + break; + case NFSV4SXATTR_EITHER: + break; + default: + nd->nd_repstat = NFSERR_BADXDR; + } + if (nd->nd_repstat != 0) + goto nfsmout; + + /* Now, do the Set Extended attribute, with Change before and after. */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); + nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits); + if (nd->nd_repstat == 0) { + nd->nd_repstat = nfsvno_setxattr(vp, name, len, nd->nd_md, + nd->nd_dpos, nd->nd_cred, p); + if (nd->nd_repstat == ENXIO) + nd->nd_repstat = NFSERR_XATTR2BIG; + } + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsm_advance(nd, NFSM_RNDUP(len), -1); + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); + *tl++ = newnfs_true; + txdr_hyper(ova.na_filerev, tl); tl += 2; + txdr_hyper(nva.na_filerev, tl); + } + +nfsmout: + free(name, M_TEMP); + if (nd->nd_repstat == 0) + nd->nd_repstat = error; + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +} + +/* + * nfs remove extended attribute service + */ +APPLESTATIC int +nfsrvd_rmxattr(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, __unused struct nfsexstuff *exp) +{ + uint32_t *tl; + struct nfsvattr ova, nva; + nfsattrbit_t attrbits; + int error, len; + char *name; + struct thread *p = curthread; + + error = 0; + name = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + len = fxdr_unsigned(int, *tl); + if (len <= 0) { + nd->nd_repstat = NFSERR_BADXDR; + goto nfsmout; + } + if (len > EXTATTR_MAXNAMELEN) { + nd->nd_repstat = NFSERR_NOXATTR; + goto nfsmout; + } + name = malloc(len + 1, M_TEMP, M_WAITOK); + error = nfsrv_mtostr(nd, name, len); + if (error != 0) + goto nfsmout; + + if ((nd->nd_flag & ND_IMPLIEDCLID) == 0) { + printf("EEK! nfsrvd_rmxattr: no implied clientid\n"); + error = NFSERR_NOXATTR; + goto nfsmout; + } + /* + * Now, do the Remove Extended attribute, with Change before and + * after. + */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); + nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits); + if (nd->nd_repstat == 0) { + nd->nd_repstat = nfsvno_rmxattr(nd, vp, name, nd->nd_cred, p); + if (nd->nd_repstat == ENOATTR) + nd->nd_repstat = NFSERR_NOXATTR; + } + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER); + txdr_hyper(ova.na_filerev, tl); tl += 2; + txdr_hyper(nva.na_filerev, tl); + } + +nfsmout: + free(name, M_TEMP); + if (nd->nd_repstat == 0) + nd->nd_repstat = error; + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +} + +/* + * nfs list extended attribute service + */ +APPLESTATIC int +nfsrvd_listxattr(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, __unused struct nfsexstuff *exp) +{ + uint32_t cnt, *tl, len, len2, i, pos, retlen; + int error; + uint64_t cookie, cookie2; + u_char *buf; + bool eof; + struct thread *p = curthread; + + error = 0; + buf = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + /* + * The cookie doesn't need to be in net byte order, but FreeBSD + * does so to make it more readable in packet traces. + */ + cookie = fxdr_hyper(tl); tl += 2; + len = fxdr_unsigned(uint32_t, *tl); + if (len == 0 || cookie >= IOSIZE_MAX) { + nd->nd_repstat = NFSERR_BADXDR; + goto nfsmout; + } + if (len > nd->nd_maxresp - NFS_MAXXDR) + len = nd->nd_maxresp - NFS_MAXXDR; + len2 = len; + nd->nd_repstat = nfsvno_listxattr(vp, cookie, nd->nd_cred, p, &buf, + &len, &eof); + if (nd->nd_repstat == EOPNOTSUPP) + nd->nd_repstat = NFSERR_NOTSUPP; + if (nd->nd_repstat == 0) { + cookie2 = cookie + len; + if (cookie2 < cookie) + nd->nd_repstat = NFSERR_BADXDR; + } + if (nd->nd_repstat == 0) { + /* Now copy the entries out. */ + retlen = NFSX_HYPER + 2 * NFSX_UNSIGNED; + if (len == 0 && retlen <= len2) { + /* The cookie was at eof. */ + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * + NFSX_UNSIGNED); + txdr_hyper(cookie2, tl); tl += 2; + *tl++ = txdr_unsigned(0); + *tl = newnfs_true; + goto nfsmout; + } + + /* Sanity check the cookie. */ + for (pos = 0; pos < len; pos += (i + 1)) { + if (pos == cookie) + break; + i = buf[pos]; + } + if (pos != cookie) { + nd->nd_repstat = NFSERR_INVAL; + goto nfsmout; + } + + /* Loop around copying the entrie(s) out. */ + cnt = 0; + len -= cookie; + i = buf[pos]; + while (i < len && len2 >= retlen + NFSM_RNDUP(i) + + NFSX_UNSIGNED) { + if (cnt == 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + + NFSX_UNSIGNED); + txdr_hyper(cookie2, tl); tl += 2; + } + retlen += nfsm_strtom(nd, &buf[pos + 1], i); + len -= (i + 1); + pos += (i + 1); + i = buf[pos]; + cnt++; + } + /* + * eof is set true/false by nfsvno_listxattr(), but if we + * can't copy all entries returned by nfsvno_listxattr(), + * we are not at eof. + */ + if (len > 0) + eof = false; + if (cnt > 0) { + /* *tl is set above. */ + *tl = txdr_unsigned(cnt); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + if (eof) + *tl = newnfs_true; + else + *tl = newnfs_false; + } else + nd->nd_repstat = NFSERR_TOOSMALL; + } + +nfsmout: + free(buf, M_TEMP); + if (nd->nd_repstat == 0) + nd->nd_repstat = error; + vput(vp); + NFSEXITCODE2(0, nd); + return (0); +} + /* * nfsv4 service not supported */ diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c index f0e63abebc40..8215b96ba7e1 100644 --- a/sys/fs/nfsserver/nfs_nfsdsocket.c +++ b/sys/fs/nfsserver/nfs_nfsdsocket.c @@ -135,7 +135,7 @@ int (*nfsrv3_procs2[NFS_V3NPROCS])(struct nfsrv_descript *, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, }; -int (*nfsrv4_ops0[NFSV41_NOPS])(struct nfsrv_descript *, +int (*nfsrv4_ops0[NFSV42_NOPS])(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0, @@ -196,9 +196,26 @@ int (*nfsrv4_ops0[NFSV41_NOPS])(struct nfsrv_descript *, nfsrvd_notsupp, nfsrvd_destroyclientid, nfsrvd_reclaimcomplete, + nfsrvd_allocate, + (int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0, + nfsrvd_notsupp, + nfsrvd_notsupp, + nfsrvd_ioadvise, + nfsrvd_layouterror, + nfsrvd_layoutstats, + nfsrvd_notsupp, + nfsrvd_notsupp, + nfsrvd_notsupp, + nfsrvd_seek, + nfsrvd_notsupp, + nfsrvd_notsupp, + nfsrvd_getxattr, + nfsrvd_setxattr, + nfsrvd_listxattr, + nfsrvd_rmxattr, }; -int (*nfsrv4_ops1[NFSV41_NOPS])(struct nfsrv_descript *, +int (*nfsrv4_ops1[NFSV42_NOPS])(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, @@ -259,9 +276,26 @@ int (*nfsrv4_ops1[NFSV41_NOPS])(struct nfsrv_descript *, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, struct nfsexstuff *))0, }; -int (*nfsrv4_ops2[NFSV41_NOPS])(struct nfsrv_descript *, +int (*nfsrv4_ops2[NFSV42_NOPS])(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, @@ -322,6 +356,23 @@ int (*nfsrv4_ops2[NFSV41_NOPS])(struct nfsrv_descript *, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + nfsrvd_copy_file_range, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, + (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , struct nfsexstuff *, struct nfsexstuff *))0, }; #endif /* !APPLEKEXT */ @@ -361,6 +412,17 @@ int nfsrv_writerpc[NFS_NPROCS] = { 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; +SYSCTL_DECL(_vfs_nfsd); +static int nfs_minminorv4 = NFSV4_MINORVERSION; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_min_minorversion4, CTLFLAG_RWTUN, + &nfs_minminorv4, 0, + "The lowest minor version of NFSv4 handled by the server"); + +static int nfs_maxminorv4 = NFSV42_MINORVERSION; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_max_minorversion4, CTLFLAG_RWTUN, + &nfs_maxminorv4, 0, + "The highest minor version of NFSv4 handled by the server"); + /* local functions */ static void nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, u_int32_t minorvers); @@ -373,7 +435,7 @@ static void nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, static int nfs_retfh[NFS_V3NPROCS] = { 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0 }; -extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; +extern struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS]; static int nfsv3to4op[NFS_V3NPROCS] = { NFSPROC_NULL, @@ -745,7 +807,10 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, (void) nfsm_strtom(nd, tag, taglen); NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); - if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) + if ((minorvers != NFSV4_MINORVERSION && + minorvers != NFSV41_MINORVERSION && + minorvers != NFSV42_MINORVERSION) || + minorvers < nfs_minminorv4 || minorvers > nfs_maxminorv4) nd->nd_repstat = NFSERR_MINORVERMISMATCH; if (nd->nd_repstat) numops = 0; @@ -765,9 +830,9 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, *repp = *tl; op = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "op=%d\n", op); - if (op < NFSV4OP_ACCESS || + if (op < NFSV4OP_ACCESS || op >= NFSV42_NOPS || (op >= NFSV4OP_NOPS && (nd->nd_flag & ND_NFSV41) == 0) || - (op >= NFSV41_NOPS && (nd->nd_flag & ND_NFSV41) != 0)) { + (op >= NFSV41_NOPS && (nd->nd_flag & ND_NFSV42) == 0)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp++ = txdr_unsigned(NFSV4OP_OPILLEGAL); *repp = nfsd_errmap(nd); diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index ce24aa62d636..217e74f72d6b 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -215,7 +215,6 @@ static void nfsrv_freealllayouts(void); static void nfsrv_freedevid(struct nfsdevice *ds); static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, struct nfsdevice **dsp); -static int nfsrv_delds(char *devid, NFSPROC_T *p); static void nfsrv_deleteds(struct nfsdevice *fndds); static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost); static void nfsrv_freealldevids(void); @@ -4455,6 +4454,8 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, nd->nd_flag |= ND_KERBV; if ((clp->lc_flags & LCL_NFSV41) != 0) nd->nd_flag |= ND_NFSV41; + if ((clp->lc_flags & LCL_NFSV42) != 0) + nd->nd_flag |= ND_NFSV42; nd->nd_repstat = 0; cred->cr_uid = clp->lc_uid; cred->cr_gid = clp->lc_gid; @@ -4653,7 +4654,10 @@ nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, (void)nfsm_strtom(nd, optag, len); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV41) != 0) { - *tl++ = txdr_unsigned(NFSV41_MINORVERSION); + if ((nd->nd_flag & ND_NFSV42) != 0) + *tl++ = txdr_unsigned(NFSV42_MINORVERSION); + else + *tl++ = txdr_unsigned(NFSV41_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE); @@ -5386,13 +5390,16 @@ nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, * delegations. */ APPLESTATIC int -nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) +nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd, + nfsquad_t clientid, NFSPROC_T *p) { + struct nfsclient *clp; struct nfsstate *stp; struct nfslockfile *lfp; int error, haslock = 0; fhandle_t nfh; + clp = NULL; /* * First, get the lock file structure. * (A return of -1 means no associated state, so remove ok.) @@ -5400,6 +5407,9 @@ nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); tryagain: NFSLOCKSTATE(); + if (error == 0 && clientid.qval != 0) + error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, + (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { @@ -5417,7 +5427,7 @@ nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) /* * Now, we must Recall any delegations. */ - error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p); + error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (error) { /* * nfsrv_cleandeleg() unlocks state for non-zero @@ -5554,7 +5564,8 @@ nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) starttime = NFSD_MONOSEC; do { if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { - error = nfsrv_checkremove(vp, 0, p); + error = nfsrv_checkremove(vp, 0, NULL, + (nfsquad_t)((u_quad_t)0), p); NFSVOPUNLOCK(vp, 0); } else error = EPERM; @@ -6200,6 +6211,10 @@ nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid, nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval; nd->nd_flag |= ND_IMPLIEDCLID; + /* Save maximum request and reply sizes. */ + nd->nd_maxreq = sep->sess_maxreq; + nd->nd_maxresp = sep->sess_maxresp; + /* * If this session handles the backchannel, save the nd_xprt for this * RPC, since this is the one being used. @@ -7747,7 +7762,7 @@ nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p) * point. * Also, returns an error instead of the nfsdevice found. */ -static int +APPLESTATIC int nfsrv_delds(char *devid, NFSPROC_T *p) { struct nfsdevice *ds, *fndds; @@ -7879,7 +7894,7 @@ nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost) * as defined for Flexible File Layout) in XDR. */ addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) + - 9 * NFSX_UNSIGNED; + 14 * NFSX_UNSIGNED; ds->nfsdev_flexaddrlen = addrlen; tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO); ds->nfsdev_flexaddr = (char *)tl; @@ -7891,7 +7906,12 @@ nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost) *tl++ = txdr_unsigned(strlen(addr)); NFSBCOPY(addr, tl, strlen(addr)); tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED); - *tl++ = txdr_unsigned(1); /* One NFS Version. */ + *tl++ = txdr_unsigned(2); /* Two NFS Versions. */ + *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */ + *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */ + *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */ + *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */ + *tl++ = newnfs_true; /* Tightly coupled. */ *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */ *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */ *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */ diff --git a/sys/fs/nfsserver/nfs_nfsdsubs.c b/sys/fs/nfsserver/nfs_nfsdsubs.c index f7df2793ceea..64ed01ced919 100644 --- a/sys/fs/nfsserver/nfs_nfsdsubs.c +++ b/sys/fs/nfsserver/nfs_nfsdsubs.c @@ -1544,7 +1544,7 @@ nfsrv_isannfserr(u_int32_t errval) if (errval == NFSERR_OK) return (errval); - if (errval >= NFSERR_BADHANDLE && errval <= NFSERR_DELEGREVOKED) + if (errval >= NFSERR_BADHANDLE && errval <= NFSERR_MAXERRVAL) return (errval); if (errval > 0 && errval <= NFSERR_REMOTE) return (nfsrv_v2errmap[errval - 1]); @@ -2121,6 +2121,8 @@ nfsd_getminorvers(struct nfsrv_descript *nd, u_char *tag, u_char **tagstrp, *tagstrp = tagstr; if (*minversp == NFSV41_MINORVERSION) nd->nd_flag |= ND_NFSV41; + else if (*minversp == NFSV42_MINORVERSION) + nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); nfsmout: if (error != 0) { if (tagstr != NULL && taglen > NFSV4_SMALLSTR)