diff --git a/lib/libc/sys/posix_fallocate.2 b/lib/libc/sys/posix_fallocate.2 index 4fba3dd87611..b61f8a48c394 100644 --- a/lib/libc/sys/posix_fallocate.2 +++ b/lib/libc/sys/posix_fallocate.2 @@ -28,7 +28,7 @@ .\" @(#)open.2 8.2 (Berkeley) 11/16/93 .\" $FreeBSD$ .\" -.Dd November 4, 2017 +.Dd January 5, 2020 .Dt POSIX_FALLOCATE 2 .Os .Sh NAME @@ -115,7 +115,8 @@ An I/O error occurred while reading from or writing to a file system. .It Bq Er ENODEV The .Fa fd -argument does not refer to a regular file. +argument does not refer to a file that supports +.Nm . .It Bq Er ENOSPC There is insufficient free space remaining on the file system storage media. diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index cd78cfbfbba2..8e614a0accc6 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -818,6 +818,47 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) return (error); } +int +sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) +{ + int error; + + error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); + return (kern_posix_error(td, error)); +} + +int +kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) +{ + struct file *fp; + int error; + + AUDIT_ARG_FD(fd); + if (offset < 0 || len <= 0) + return (EINVAL); + /* Check for wrap. */ + if (offset > OFF_MAX - len) + return (EFBIG); + AUDIT_ARG_FD(fd); + error = fget(td, fd, &cap_pwrite_rights, &fp); + if (error != 0) + return (error); + AUDIT_ARG_FILE(td->td_proc, fp); + if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { + error = ESPIPE; + goto out; + } + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; + } + + error = fo_fallocate(fp, offset, len, td); + out: + fdrop(fp, td); + return (error); +} + int poll_no_poll(int events) { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index b954e3a6dd45..a3b66f0bcf47 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -4565,99 +4565,6 @@ kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) return (error); } -int -kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) -{ - struct file *fp; - struct mount *mp; - struct vnode *vp; - off_t olen, ooffset; - int error; -#ifdef AUDIT - int audited_vnode1 = 0; -#endif - - AUDIT_ARG_FD(fd); - if (offset < 0 || len <= 0) - return (EINVAL); - /* Check for wrap. */ - if (offset > OFF_MAX - len) - return (EFBIG); - AUDIT_ARG_FD(fd); - error = fget(td, fd, &cap_pwrite_rights, &fp); - if (error != 0) - return (error); - AUDIT_ARG_FILE(td->td_proc, fp); - if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { - error = ESPIPE; - goto out; - } - if ((fp->f_flag & FWRITE) == 0) { - error = EBADF; - goto out; - } - if (fp->f_type != DTYPE_VNODE) { - error = ENODEV; - goto out; - } - vp = fp->f_vnode; - if (vp->v_type != VREG) { - error = ENODEV; - goto out; - } - - /* Allocating blocks may take a long time, so iterate. */ - for (;;) { - olen = len; - ooffset = offset; - - bwillwrite(); - mp = NULL; - error = vn_start_write(vp, &mp, V_WAIT | PCATCH); - if (error != 0) - break; - error = vn_lock(vp, LK_EXCLUSIVE); - if (error != 0) { - vn_finished_write(mp); - break; - } -#ifdef AUDIT - if (!audited_vnode1) { - AUDIT_ARG_VNODE1(vp); - audited_vnode1 = 1; - } -#endif -#ifdef MAC - error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); - if (error == 0) -#endif - error = VOP_ALLOCATE(vp, &offset, &len); - VOP_UNLOCK(vp); - vn_finished_write(mp); - - if (olen + ooffset != offset + len) { - panic("offset + len changed from %jx/%jx to %jx/%jx", - ooffset, olen, offset, len); - } - if (error != 0 || len == 0) - break; - KASSERT(olen > len, ("Iteration did not make progress?")); - maybe_yield(); - } - out: - fdrop(fp, td); - return (error); -} - -int -sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) -{ - int error; - - error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); - return (kern_posix_error(td, error)); -} - /* * Unlike madvise(2), we do not make a best effort to remember every * possible caching hint. Instead, we remember the last setting with diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index f15ea0969b7e..61edb3165e01 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -103,6 +103,7 @@ static fo_kqfilter_t vn_kqfilter; static fo_stat_t vn_statfile; static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; +static fo_fallocate_t vn_fallocate; struct fileops vnops = { .fo_read = vn_io_fault, @@ -119,6 +120,7 @@ struct fileops vnops = { .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, + .fo_fallocate = vn_fallocate, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -3150,3 +3152,60 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp, free(dat, M_TEMP); return (error); } + +static int +vn_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) +{ + struct mount *mp; + struct vnode *vp; + off_t olen, ooffset; + int error; +#ifdef AUDIT + int audited_vnode1 = 0; +#endif + + vp = fp->f_vnode; + if (vp->v_type != VREG) + return (ENODEV); + + /* Allocating blocks may take a long time, so iterate. */ + for (;;) { + olen = len; + ooffset = offset; + + bwillwrite(); + mp = NULL; + error = vn_start_write(vp, &mp, V_WAIT | PCATCH); + if (error != 0) + break; + error = vn_lock(vp, LK_EXCLUSIVE); + if (error != 0) { + vn_finished_write(mp); + break; + } +#ifdef AUDIT + if (!audited_vnode1) { + AUDIT_ARG_VNODE1(vp); + audited_vnode1 = 1; + } +#endif +#ifdef MAC + error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); + if (error == 0) +#endif + error = VOP_ALLOCATE(vp, &offset, &len); + VOP_UNLOCK(vp); + vn_finished_write(mp); + + if (olen + ooffset != offset + len) { + panic("offset + len changed from %jx/%jx to %jx/%jx", + ooffset, olen, offset, len); + } + if (error != 0 || len == 0) + break; + KASSERT(olen > len, ("Iteration did not make progress?")); + maybe_yield(); + } + + return (error); +} diff --git a/sys/sys/file.h b/sys/sys/file.h index 0efc80a882a6..6d6d2754e097 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -125,6 +125,8 @@ typedef int fo_mmap_t(struct file *fp, vm_map_t map, vm_offset_t *addr, typedef int fo_aio_queue_t(struct file *fp, struct kaiocb *job); typedef int fo_add_seals_t(struct file *fp, int flags); typedef int fo_get_seals_t(struct file *fp, int *flags); +typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len, + struct thread *td); typedef int fo_flags_t; struct fileops { @@ -145,6 +147,7 @@ struct fileops { fo_aio_queue_t *fo_aio_queue; fo_add_seals_t *fo_add_seals; fo_get_seals_t *fo_get_seals; + fo_fallocate_t *fo_fallocate; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -446,6 +449,15 @@ fo_get_seals(struct file *fp, int *seals) return ((*fp->f_ops->fo_get_seals)(fp, seals)); } +static __inline int +fo_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) +{ + + if (fp->f_ops->fo_fallocate == NULL) + return (ENODEV); + return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td)); +} + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */