diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 4739f85c4775..f639b09c970b 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -166,6 +166,7 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, char *target, dev_t rdev, struct tmpfs_node **node) { struct tmpfs_node *nnode; + vm_object_t obj; /* If the root directory of the 'tmp' file system is not yet * allocated, this must be the request to do it. */ @@ -227,9 +228,14 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, break; case VREG: - nnode->tn_reg.tn_aobj = + obj = nnode->tn_reg.tn_aobj = vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0, NULL /* XXXKIB - tmpfs needs swap reservation */); + VM_OBJECT_WLOCK(obj); + /* OBJ_TMPFS is set together with the setting of vp->v_object */ + vm_object_set_flag(obj, OBJ_NOSPLIT); + vm_object_clear_flag(obj, OBJ_ONEMAPPING); + VM_OBJECT_WUNLOCK(obj); break; default: @@ -434,9 +440,11 @@ int tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, struct vnode **vpp) { - int error = 0; struct vnode *vp; + vm_object_t object; + int error; + error = 0; loop: TMPFS_NODE_LOCK(node); if ((vp = node->tn_vnode) != NULL) { @@ -506,13 +514,22 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ - case VREG: - /* FALLTHROUGH */ case VSOCK: break; case VFIFO: vp->v_op = &tmpfs_fifoop_entries; break; + case VREG: + object = node->tn_reg.tn_aobj; + VM_OBJECT_WLOCK(object); + VI_LOCK(vp); + KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); + vp->v_object = object; + object->un_pager.swp.swp_tmpfs = vp; + vm_object_set_flag(object, OBJ_TMPFS); + VI_UNLOCK(vp); + VM_OBJECT_WUNLOCK(object); + break; case VDIR: MPASS(node->tn_dir.tn_parent != NULL); if (node->tn_dir.tn_parent == node) @@ -523,7 +540,6 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); } - vnode_pager_setsize(vp, node->tn_size); error = insmntque(vp, mp); if (error) vp = NULL; @@ -1343,7 +1359,6 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) TMPFS_UNLOCK(tmp); node->tn_size = newsize; - vnode_pager_setsize(vp, newsize); return (0); } diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index b79af38695d1..134f8d9c81cd 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -278,8 +278,6 @@ tmpfs_close(struct vop_close_args *v) { struct vnode *vp = v->a_vp; - MPASS(VOP_ISLOCKED(vp)); - /* Update node times. */ tmpfs_update(vp); @@ -439,7 +437,6 @@ tmpfs_setattr(struct vop_setattr_args *v) return error; } -/* --------------------------------------------------------------------- */ static int tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx, vm_offset_t offset, size_t tlen, struct uio *uio) @@ -448,12 +445,35 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx, int error, rv; VM_OBJECT_WLOCK(tobj); - m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | - VM_ALLOC_NORMAL | VM_ALLOC_RETRY); + + /* + * The kern_sendfile() code calls vn_rdwr() with the page + * soft-busied. Ignore the soft-busy state here. Parallel + * reads of the page content from disk are prevented by + * VPO_BUSY. + * + * Although the tmpfs vnode lock is held here, it is + * nonetheless safe to sleep waiting for a free page. The + * pageout daemon does not need to acquire the tmpfs vnode + * lock to page out tobj's pages because tobj is a OBJT_SWAP + * type object. + */ + m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | + VM_ALLOC_IGN_SBUSY); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &m, 1, 0); + m = vm_page_lookup(tobj, idx); + if (m == NULL) { + printf( + "tmpfs: vm_obj %p idx %jd null lookup rv %d\n", + tobj, idx, rv); + return (EIO); + } if (rv != VM_PAGER_OK) { + printf( + "tmpfs: vm_obj %p idx %jd valid %x pager error %d\n", + tobj, idx, m->valid, rv); vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); @@ -463,127 +483,38 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx, } else vm_page_zero_invalid(m, TRUE); } + vm_page_lock(m); + vm_page_hold(m); + vm_page_wakeup(m); + vm_page_unlock(m); VM_OBJECT_WUNLOCK(tobj); error = uiomove_fromphys(&m, offset, tlen, uio); VM_OBJECT_WLOCK(tobj); vm_page_lock(m); - vm_page_unwire(m, TRUE); + vm_page_unhold(m); + vm_page_deactivate(m); + /* Requeue to maintain LRU ordering. */ + vm_page_requeue(m); vm_page_unlock(m); - vm_page_wakeup(m); VM_OBJECT_WUNLOCK(tobj); return (error); } -static __inline int -tmpfs_nocacheread_buf(vm_object_t tobj, vm_pindex_t idx, - vm_offset_t offset, size_t tlen, void *buf) -{ - struct uio uio; - struct iovec iov; - - uio.uio_iovcnt = 1; - uio.uio_iov = &iov; - iov.iov_base = buf; - iov.iov_len = tlen; - - uio.uio_offset = 0; - uio.uio_resid = tlen; - uio.uio_rw = UIO_READ; - uio.uio_segflg = UIO_SYSSPACE; - uio.uio_td = curthread; - - return (tmpfs_nocacheread(tobj, idx, offset, tlen, &uio)); -} - -static int -tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio) -{ - struct sf_buf *sf; - vm_pindex_t idx; - vm_page_t m; - vm_offset_t offset; - off_t addr; - size_t tlen; - char *ma; - int error; - - addr = uio->uio_offset; - idx = OFF_TO_IDX(addr); - offset = addr & PAGE_MASK; - tlen = MIN(PAGE_SIZE - offset, len); - - VM_OBJECT_WLOCK(vobj); -lookupvpg: - if (((m = vm_page_lookup(vobj, idx)) != NULL) && - vm_page_is_valid(m, offset, tlen)) { - if ((m->oflags & VPO_BUSY) != 0) { - /* - * Reference the page before unlocking and sleeping so - * that the page daemon is less likely to reclaim it. - */ - vm_page_reference(m); - vm_page_sleep(m, "tmfsmr"); - goto lookupvpg; - } - vm_page_busy(m); - VM_OBJECT_WUNLOCK(vobj); - error = uiomove_fromphys(&m, offset, tlen, uio); - VM_OBJECT_WLOCK(vobj); - vm_page_wakeup(m); - VM_OBJECT_WUNLOCK(vobj); - return (error); - } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { - KASSERT(offset == 0, - ("unexpected offset in tmpfs_mappedread for sendfile")); - if ((m->oflags & VPO_BUSY) != 0) { - /* - * Reference the page before unlocking and sleeping so - * that the page daemon is less likely to reclaim it. - */ - vm_page_reference(m); - vm_page_sleep(m, "tmfsmr"); - goto lookupvpg; - } - vm_page_busy(m); - VM_OBJECT_WUNLOCK(vobj); - sched_pin(); - sf = sf_buf_alloc(m, SFB_CPUPRIVATE); - ma = (char *)sf_buf_kva(sf); - error = tmpfs_nocacheread_buf(tobj, idx, 0, tlen, ma); - if (error == 0) { - if (tlen != PAGE_SIZE) - bzero(ma + tlen, PAGE_SIZE - tlen); - uio->uio_offset += tlen; - uio->uio_resid -= tlen; - } - sf_buf_free(sf); - sched_unpin(); - VM_OBJECT_WLOCK(vobj); - if (error == 0) - m->valid = VM_PAGE_BITS_ALL; - vm_page_wakeup(m); - VM_OBJECT_WUNLOCK(vobj); - return (error); - } - VM_OBJECT_WUNLOCK(vobj); - error = tmpfs_nocacheread(tobj, idx, offset, tlen, uio); - - return (error); -} - static int tmpfs_read(struct vop_read_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; - struct tmpfs_node *node; vm_object_t uobj; size_t len; int resid; - int error = 0; + vm_pindex_t idx; + vm_offset_t offset; + off_t addr; + size_t tlen; node = VP_TO_TMPFS_NODE(vp); @@ -607,7 +538,11 @@ tmpfs_read(struct vop_read_args *v) len = MIN(node->tn_size - uio->uio_offset, resid); if (len == 0) break; - error = tmpfs_mappedread(vp->v_object, uobj, len, uio); + addr = uio->uio_offset; + idx = OFF_TO_IDX(addr); + offset = addr & PAGE_MASK; + tlen = MIN(PAGE_SIZE - offset, len); + error = tmpfs_nocacheread(uobj, idx, offset, tlen, uio); if ((error != 0) || (resid == uio->uio_resid)) break; } @@ -620,10 +555,10 @@ tmpfs_read(struct vop_read_args *v) /* --------------------------------------------------------------------- */ static int -tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio) +tmpfs_mappedwrite(vm_object_t tobj, size_t len, struct uio *uio) { vm_pindex_t idx; - vm_page_t vpg, tpg; + vm_page_t tpg; vm_offset_t offset; off_t addr; size_t tlen; @@ -636,69 +571,47 @@ tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *ui offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); - VM_OBJECT_WLOCK(vobj); -lookupvpg: - if (((vpg = vm_page_lookup(vobj, idx)) != NULL) && - vm_page_is_valid(vpg, offset, tlen)) { - if ((vpg->oflags & VPO_BUSY) != 0) { - /* - * Reference the page before unlocking and sleeping so - * that the page daemon is less likely to reclaim it. - */ - vm_page_reference(vpg); - vm_page_sleep(vpg, "tmfsmw"); - goto lookupvpg; - } - vm_page_busy(vpg); - vm_page_undirty(vpg); - VM_OBJECT_WUNLOCK(vobj); - error = uiomove_fromphys(&vpg, offset, tlen, uio); - } else { - if (vm_page_is_cached(vobj, idx)) - vm_page_cache_free(vobj, idx, idx + 1); - VM_OBJECT_WUNLOCK(vobj); - vpg = NULL; - } VM_OBJECT_WLOCK(tobj); - tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | - VM_ALLOC_NORMAL | VM_ALLOC_RETRY); + tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (tpg->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(tobj, idx, NULL, NULL)) { rv = vm_pager_get_pages(tobj, &tpg, 1, 0); + tpg = vm_page_lookup(tobj, idx); + if (tpg == NULL) { + printf( + "tmpfs: vm_obj %p idx %jd null lookup rv %d\n", + tobj, idx, rv); + return (EIO); + } if (rv != VM_PAGER_OK) { + printf( + "tmpfs: vm_obj %p idx %jd valid %x pager error %d\n", + tobj, idx, tpg->valid, rv); vm_page_lock(tpg); vm_page_free(tpg); vm_page_unlock(tpg); - error = EIO; - goto out; + VM_OBJECT_WUNLOCK(tobj); + return (EIO); } } else vm_page_zero_invalid(tpg, TRUE); } - VM_OBJECT_WUNLOCK(tobj); - if (vpg == NULL) - error = uiomove_fromphys(&tpg, offset, tlen, uio); - else { - KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid")); - pmap_copy_page(vpg, tpg); - } - VM_OBJECT_WLOCK(tobj); - if (error == 0) { - KASSERT(tpg->valid == VM_PAGE_BITS_ALL, - ("parts of tpg invalid")); - vm_page_dirty(tpg); - } vm_page_lock(tpg); - vm_page_unwire(tpg, TRUE); - vm_page_unlock(tpg); + vm_page_hold(tpg); vm_page_wakeup(tpg); -out: + vm_page_unlock(tpg); + VM_OBJECT_WUNLOCK(tobj); + error = uiomove_fromphys(&tpg, offset, tlen, uio); + VM_OBJECT_WLOCK(tobj); + if (error == 0) + vm_page_dirty(tpg); + vm_page_lock(tpg); + vm_page_unhold(tpg); + vm_page_deactivate(tpg); + /* Requeue to maintain LRU ordering. */ + vm_page_requeue(tpg); + vm_page_unlock(tpg); VM_OBJECT_WUNLOCK(tobj); - if (vpg != NULL) { - VM_OBJECT_WLOCK(vobj); - vm_page_wakeup(vpg); - VM_OBJECT_WUNLOCK(vobj); - } return (error); } @@ -756,7 +669,7 @@ tmpfs_write(struct vop_write_args *v) len = MIN(node->tn_size - uio->uio_offset, resid); if (len == 0) break; - error = tmpfs_mappedwrite(vp->v_object, uobj, len, uio); + error = tmpfs_mappedwrite(uobj, len, uio); if ((error != 0) || (resid == uio->uio_resid)) break; } @@ -1536,8 +1449,6 @@ tmpfs_inactive(struct vop_inactive_args *v) struct tmpfs_node *node; - MPASS(VOP_ISLOCKED(vp)); - node = VP_TO_TMPFS_NODE(vp); if (node->tn_links == 0) @@ -1555,11 +1466,24 @@ tmpfs_reclaim(struct vop_reclaim_args *v) struct tmpfs_mount *tmp; struct tmpfs_node *node; + vm_object_t obj; node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); - vnode_destroy_vobject(vp); + if (node->tn_type == VREG) { + obj = node->tn_reg.tn_aobj; + if (obj != NULL) { + /* Instead of vnode_destroy_vobject() */ + VM_OBJECT_WLOCK(obj); + VI_LOCK(vp); + vm_object_clear_flag(obj, OBJ_TMPFS); + obj->un_pager.swp.swp_tmpfs = NULL; + VI_UNLOCK(vp); + VM_OBJECT_WUNLOCK(obj); + } + } + vp->v_object = NULL; cache_purge(vp); TMPFS_NODE_LOCK(node); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 1f7cb7881a8b..ce81ad55f1c9 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -505,6 +505,7 @@ void vm_object_deallocate(vm_object_t object) { vm_object_t temp; + struct vnode *vp; while (object != NULL) { VM_OBJECT_WLOCK(object); @@ -527,15 +528,36 @@ vm_object_deallocate(vm_object_t object) VM_OBJECT_WUNLOCK(object); return; } else if (object->ref_count == 1) { + if (object->type == OBJT_SWAP && + (object->flags & OBJ_TMPFS) != 0) { + vp = object->un_pager.swp.swp_tmpfs; + vhold(vp); + VM_OBJECT_WUNLOCK(object); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + vdrop(vp); + VM_OBJECT_WLOCK(object); + if (object->type == OBJT_DEAD) { + VM_OBJECT_WUNLOCK(object); + VOP_UNLOCK(vp, 0); + return; + } else if ((object->flags & OBJ_TMPFS) != 0) { + if (object->ref_count == 1) + VOP_UNSET_TEXT(vp); + VOP_UNLOCK(vp, 0); + } + } if (object->shadow_count == 0 && object->handle == NULL && (object->type == OBJT_DEFAULT || - object->type == OBJT_SWAP)) { + (object->type == OBJT_SWAP && + (object->flags & OBJ_TMPFS) == 0))) { vm_object_set_flag(object, OBJ_ONEMAPPING); } else if ((object->shadow_count == 1) && (object->handle == NULL) && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { + KASSERT((object->flags & OBJ_TMPFS) == 0, + ("Shadowed tmpfs v_object")); vm_object_t robject; robject = LIST_FIRST(&object->shadow_head); diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 2f2c961eb311..cf5466385fbb 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -154,11 +154,21 @@ struct vm_object { /* * Swap pager * + * swp_tmpfs - back-pointer to the tmpfs vnode, + * if any, which uses the vm object + * as backing store. The handle + * cannot be reused for linking, + * because the vnode can be + * reclaimed and recreated, making + * the handle changed and hash-chain + * invalid. + * * swp_bcount - number of swap 'swblock' metablocks, each * contains up to 16 swapblk assignments. * see vm/swap_pager.h */ struct { + void *swp_tmpfs; int swp_bcount; } swp; } un_pager; @@ -179,6 +189,7 @@ struct vm_object { #define OBJ_COLORED 0x1000 /* pg_color is defined */ #define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */ #define OBJ_DISCONNECTWNT 0x4000 /* disconnect from vnode wanted */ +#define OBJ_TMPFS 0x8000 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) #define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT))