diff --git a/sys/dev/virtio/balloon/virtio_balloon.c b/sys/dev/virtio/balloon/virtio_balloon.c index 61ae4b127730..d589a733c25c 100644 --- a/sys/dev/virtio/balloon/virtio_balloon.c +++ b/sys/dev/virtio/balloon/virtio_balloon.c @@ -122,6 +122,9 @@ static void vtballoon_add_sysctl(struct vtballoon_softc *); */ #define VTBALLOON_PAGES_PER_REQUEST 256 +/* Must be able to fix all pages frames in one page (segment). */ +CTASSERT(VTBALLOON_PAGES_PER_REQUEST * sizeof(uint32_t) <= PAGE_SIZE); + #define VTBALLOON_MTX(_sc) &(_sc)->vtballoon_mtx #define VTBALLOON_LOCK_INIT(_sc, _name) mtx_init(VTBALLOON_MTX((_sc)), _name, \ "VirtIO Balloon Lock", MTX_SPIN) @@ -138,7 +141,7 @@ static device_method_t vtballoon_methods[] = { /* VirtIO methods. */ DEVMETHOD(virtio_config_change, vtballoon_config_change), - { 0, 0 } + DEVMETHOD_END }; static driver_t vtballoon_driver = { @@ -402,13 +405,13 @@ vtballoon_send_page_frames(struct vtballoon_softc *sc, struct virtqueue *vq, error = virtqueue_enqueue(vq, vq, &sg, 1, 0); KASSERT(error == 0, ("error enqueuing page frames to virtqueue")); + virtqueue_notify(vq); /* * Inflate and deflate operations are done synchronously. The * interrupt handler will wake us up. */ VTBALLOON_LOCK(sc); - virtqueue_notify(vq); while ((c = virtqueue_dequeue(vq, NULL)) == NULL) msleep_spin(sc, VTBALLOON_MTX(sc), "vtbspf", 0); diff --git a/sys/dev/virtio/block/virtio_blk.c b/sys/dev/virtio/block/virtio_blk.c index dca19ba80239..9442a6ddd7e7 100644 --- a/sys/dev/virtio/block/virtio_blk.c +++ b/sys/dev/virtio/block/virtio_blk.c @@ -70,8 +70,8 @@ struct vtblk_softc { uint32_t vtblk_flags; #define VTBLK_FLAG_INDIRECT 0x0001 #define VTBLK_FLAG_READONLY 0x0002 -#define VTBLK_FLAG_DETACHING 0x0004 -#define VTBLK_FLAG_SUSPENDED 0x0008 +#define VTBLK_FLAG_DETACH 0x0004 +#define VTBLK_FLAG_SUSPEND 0x0008 #define VTBLK_FLAG_DUMPING 0x0010 struct virtqueue *vtblk_vq; @@ -82,7 +82,7 @@ struct vtblk_softc { TAILQ_HEAD(, vtblk_request) vtblk_req_free; TAILQ_HEAD(, vtblk_request) - vtblk_req_ready; + vtblk_req_ready; struct taskqueue *vtblk_tq; struct task vtblk_intr_task; @@ -116,6 +116,13 @@ static int vtblk_suspend(device_t); static int vtblk_resume(device_t); static int vtblk_shutdown(device_t); +static int vtblk_open(struct disk *); +static int vtblk_close(struct disk *); +static int vtblk_ioctl(struct disk *, u_long, void *, int, + struct thread *); +static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); +static void vtblk_strategy(struct bio *); + static void vtblk_negotiate_features(struct vtblk_softc *); static int vtblk_maximum_segments(struct vtblk_softc *, struct virtio_blk_config *); @@ -124,13 +131,7 @@ static void vtblk_alloc_disk(struct vtblk_softc *, struct virtio_blk_config *); static void vtblk_create_disk(struct vtblk_softc *); -static int vtblk_open(struct disk *); -static int vtblk_close(struct disk *); -static int vtblk_ioctl(struct disk *, u_long, void *, int, - struct thread *); -static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); -static void vtblk_strategy(struct bio *); - +static int vtblk_quiesce(struct vtblk_softc *); static void vtblk_startio(struct vtblk_softc *); static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); static int vtblk_execute_request(struct vtblk_softc *, @@ -148,6 +149,7 @@ static int vtblk_flush_dump(struct vtblk_softc *); static int vtblk_poll_request(struct vtblk_softc *, struct vtblk_request *); +static void vtblk_finish_completed(struct vtblk_softc *); static void vtblk_drain_vq(struct vtblk_softc *, int); static void vtblk_drain(struct vtblk_softc *); @@ -161,7 +163,8 @@ static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *); static void vtblk_enqueue_ready(struct vtblk_softc *, struct vtblk_request *); -static void vtblk_bio_error(struct bio *, int); +static int vtblk_request_error(struct vtblk_request *); +static void vtblk_finish_bio(struct bio *, int); /* Tunables. */ static int vtblk_no_ident = 0; @@ -189,9 +192,8 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \ mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED) -#define VTBLK_BIO_SEGMENTS(_bp) sglist_count((_bp)->bio_data, (_bp)->bio_bcount) - #define VTBLK_DISK_NAME "vtbd" +#define VTBLK_QUIESCE_TIMEOUT (30 * hz) /* * Each block request uses at least two segments - one for the header @@ -210,7 +212,7 @@ static device_method_t vtblk_methods[] = { DEVMETHOD(device_resume, vtblk_resume), DEVMETHOD(device_shutdown, vtblk_shutdown), - { 0, 0 } + DEVMETHOD_END }; static driver_t vtblk_driver = { @@ -314,11 +316,13 @@ vtblk_attach(device_t dev) } sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); + if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { + error = EINVAL; + device_printf(dev, "fewer than minimum number of segments " + "allowed: %d\n", sc->vtblk_max_nsegs); + goto fail; + } - /* - * Allocate working sglist. The number of segments may be too - * large to safely store on the stack. - */ sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT); if (sc->vtblk_sglist == NULL) { error = ENOMEM; @@ -376,7 +380,7 @@ vtblk_detach(device_t dev) sc = device_get_softc(dev); VTBLK_LOCK(sc); - sc->vtblk_flags |= VTBLK_FLAG_DETACHING; + sc->vtblk_flags |= VTBLK_FLAG_DETACH; if (device_is_attached(dev)) vtblk_stop(sc); VTBLK_UNLOCK(sc); @@ -408,15 +412,19 @@ static int vtblk_suspend(device_t dev) { struct vtblk_softc *sc; + int error; sc = device_get_softc(dev); VTBLK_LOCK(sc); - sc->vtblk_flags |= VTBLK_FLAG_SUSPENDED; - /* TODO Wait for any inflight IO to complete? */ + sc->vtblk_flags |= VTBLK_FLAG_SUSPEND; + /* XXX BMV: virtio_stop(), etc needed here? */ + error = vtblk_quiesce(sc); + if (error) + sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; VTBLK_UNLOCK(sc); - return (0); + return (error); } static int @@ -427,8 +435,9 @@ vtblk_resume(device_t dev) sc = device_get_softc(dev); VTBLK_LOCK(sc); - sc->vtblk_flags &= ~VTBLK_FLAG_SUSPENDED; - /* TODO Resume IO? */ + /* XXX BMV: virtio_reinit(), etc needed here? */ + sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; + vtblk_startio(sc); VTBLK_UNLOCK(sc); return (0); @@ -449,7 +458,7 @@ vtblk_open(struct disk *dp) if ((sc = dp->d_drv1) == NULL) return (ENXIO); - return (sc->vtblk_flags & VTBLK_FLAG_DETACHING ? ENXIO : 0); + return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0); } static int @@ -489,6 +498,8 @@ vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, if ((sc = dp->d_drv1) == NULL) return (ENXIO); + VTBLK_LOCK(sc); + if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { vtblk_prepare_dump(sc); sc->vtblk_flags |= VTBLK_FLAG_DUMPING; @@ -498,6 +509,10 @@ vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, error = vtblk_write_dump(sc, virtual, offset, length); else if (virtual == NULL && offset == 0) error = vtblk_flush_dump(sc); + else { + error = EINVAL; + sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING; + } VTBLK_UNLOCK(sc); @@ -510,7 +525,7 @@ vtblk_strategy(struct bio *bp) struct vtblk_softc *sc; if ((sc = bp->bio_disk->d_drv1) == NULL) { - vtblk_bio_error(bp, EINVAL); + vtblk_finish_bio(bp, EINVAL); return; } @@ -520,29 +535,37 @@ vtblk_strategy(struct bio *bp) */ if (sc->vtblk_flags & VTBLK_FLAG_READONLY && (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { - vtblk_bio_error(bp, EROFS); + vtblk_finish_bio(bp, EROFS); return; } +#ifdef INVARIANTS /* * Prevent read/write buffers spanning too many segments from * getting into the queue. This should only trip if d_maxsize * was incorrectly set. */ if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { - KASSERT(VTBLK_BIO_SEGMENTS(bp) <= sc->vtblk_max_nsegs - - VTBLK_MIN_SEGMENTS, + int nsegs, max_nsegs; + + nsegs = sglist_count(bp->bio_data, bp->bio_bcount); + max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS; + + KASSERT(nsegs <= max_nsegs, ("bio spanned too many segments: %d, max: %d", - VTBLK_BIO_SEGMENTS(bp), - sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS)); + nsegs, max_nsegs)); } +#endif VTBLK_LOCK(sc); - if ((sc->vtblk_flags & VTBLK_FLAG_DETACHING) == 0) { + if (sc->vtblk_flags & VTBLK_FLAG_DETACH) + vtblk_finish_bio(bp, ENXIO); + else { bioq_disksort(&sc->vtblk_bioq, bp); - vtblk_startio(sc); - } else - vtblk_bio_error(bp, ENXIO); + + if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0) + vtblk_startio(sc); + } VTBLK_UNLOCK(sc); } @@ -669,6 +692,26 @@ vtblk_create_disk(struct vtblk_softc *sc) disk_create(dp, DISK_VERSION); } +static int +vtblk_quiesce(struct vtblk_softc *sc) +{ + int error; + + error = 0; + + VTBLK_LOCK_ASSERT(sc); + + while (!virtqueue_empty(sc->vtblk_vq)) { + if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq", + VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) { + error = EBUSY; + break; + } + } + + return (error); +} + static void vtblk_startio(struct vtblk_softc *sc) { @@ -681,9 +724,6 @@ vtblk_startio(struct vtblk_softc *sc) VTBLK_LOCK_ASSERT(sc); - if (sc->vtblk_flags & VTBLK_FLAG_SUSPENDED) - return; - while (!virtqueue_full(vq)) { if ((req = vtblk_dequeue_ready(sc)) == NULL) req = vtblk_bio_request(sc); @@ -736,9 +776,8 @@ vtblk_bio_request(struct vtblk_softc *sc) req->vbr_hdr.sector = bp->bio_offset / 512; break; default: - KASSERT(0, ("bio with unhandled cmd: %d", bp->bio_cmd)); - req->vbr_hdr.type = -1; - break; + panic("%s: bio with unhandled cmd: %d", __FUNCTION__, + bp->bio_cmd); } if (bp->bio_flags & BIO_ORDERED) @@ -752,7 +791,7 @@ vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) { struct sglist *sg; struct bio *bp; - int writable, error; + int readable, writable, error; sg = sc->vtblk_sglist; bp = req->vbr_bp; @@ -783,10 +822,9 @@ vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, ("fewer than min segments: %d", sg->sg_nseg)); - error = virtqueue_enqueue(sc->vtblk_vq, req, sg, - sg->sg_nseg - writable, writable); + readable = sg->sg_nseg - writable; - return (error); + return (virtqueue_enqueue(sc->vtblk_vq, req, sg, readable, writable)); } static int @@ -806,37 +844,23 @@ static void vtblk_intr_task(void *arg, int pending) { struct vtblk_softc *sc; - struct vtblk_request *req; struct virtqueue *vq; - struct bio *bp; sc = arg; vq = sc->vtblk_vq; VTBLK_LOCK(sc); - if (sc->vtblk_flags & VTBLK_FLAG_DETACHING) { + if (sc->vtblk_flags & VTBLK_FLAG_DETACH) { VTBLK_UNLOCK(sc); return; } - while ((req = virtqueue_dequeue(vq, NULL)) != NULL) { - bp = req->vbr_bp; + vtblk_finish_completed(sc); - if (req->vbr_ack == VIRTIO_BLK_S_OK) - bp->bio_resid = 0; - else { - bp->bio_flags |= BIO_ERROR; - if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) - bp->bio_error = ENOTSUP; - else - bp->bio_error = EIO; - } - - biodone(bp); - vtblk_enqueue_request(sc, req); - } - - vtblk_startio(sc); + if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0) + vtblk_startio(sc); + else + wakeup(&sc->vtblk_vq); if (virtqueue_enable_intr(vq) != 0) { virtqueue_disable_intr(vq); @@ -973,7 +997,6 @@ vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) { device_t dev; struct virtqueue *vq; - struct vtblk_request *r; int error; dev = sc->vtblk_dev; @@ -988,19 +1011,36 @@ vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) virtqueue_notify(vq); - r = virtqueue_poll(vq, NULL); - KASSERT(r == req, ("unexpected request response")); + req = virtqueue_poll(vq, NULL); - if (req->vbr_ack != VIRTIO_BLK_S_OK) { - error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO; - if (bootverbose) - device_printf(dev, - "vtblk_poll_request: IO error: %d\n", error); + error = vtblk_request_error(req); + if (error && bootverbose) { + device_printf(dev, "vtblk_poll_request: IO error: %d\n", + error); } return (error); } +static void +vtblk_finish_completed(struct vtblk_softc *sc) +{ + struct vtblk_request *req; + struct bio *bp; + int error; + + while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) { + bp = req->vbr_bp; + + error = vtblk_request_error(req); + if (error) + disk_err(bp, "hard error", -1, 1); + + vtblk_finish_bio(bp, error); + vtblk_enqueue_request(sc, req); + } +} + static void vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) { @@ -1013,7 +1053,7 @@ vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) while ((req = virtqueue_drain(vq, &last)) != NULL) { if (!skip_done) - vtblk_bio_error(req->vbr_bp, ENXIO); + vtblk_finish_bio(req->vbr_bp, ENXIO); vtblk_enqueue_request(sc, req); } @@ -1030,17 +1070,19 @@ vtblk_drain(struct vtblk_softc *sc) bioq = &sc->vtblk_bioq; - if (sc->vtblk_vq != NULL) + if (sc->vtblk_vq != NULL) { + vtblk_finish_completed(sc); vtblk_drain_vq(sc, 0); + } while ((req = vtblk_dequeue_ready(sc)) != NULL) { - vtblk_bio_error(req->vbr_bp, ENXIO); + vtblk_finish_bio(req->vbr_bp, ENXIO); vtblk_enqueue_request(sc, req); } while (bioq_first(bioq) != NULL) { bp = bioq_takefirst(bioq); - vtblk_bio_error(bp, ENXIO); + vtblk_finish_bio(bp, ENXIO); } vtblk_free_requests(sc); @@ -1050,9 +1092,9 @@ static int vtblk_alloc_requests(struct vtblk_softc *sc) { struct vtblk_request *req; - int i, size; + int i, nreqs; - size = virtqueue_size(sc->vtblk_vq); + nreqs = virtqueue_size(sc->vtblk_vq); /* * Preallocate sufficient requests to keep the virtqueue full. Each @@ -1060,9 +1102,9 @@ vtblk_alloc_requests(struct vtblk_softc *sc) * the number allocated when indirect descriptors are not available. */ if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) - size /= VTBLK_MIN_SEGMENTS; + nreqs /= VTBLK_MIN_SEGMENTS; - for (i = 0; i < size; i++) { + for (i = 0; i < nreqs; i++) { req = uma_zalloc(vtblk_req_zone, M_NOWAIT); if (req == NULL) return (ENOMEM); @@ -1079,6 +1121,9 @@ vtblk_free_requests(struct vtblk_softc *sc) { struct vtblk_request *req; + KASSERT(TAILQ_EMPTY(&sc->vtblk_req_ready), + ("ready requests left on queue")); + while ((req = vtblk_dequeue_request(sc)) != NULL) { sc->vtblk_request_count--; uma_zfree(vtblk_req_zone, req); @@ -1126,9 +1171,35 @@ vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req) TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link); } +static int +vtblk_request_error(struct vtblk_request *req) +{ + int error; + + switch (req->vbr_ack) { + case VIRTIO_BLK_S_OK: + error = 0; + break; + case VIRTIO_BLK_S_UNSUPP: + error = ENOTSUP; + break; + default: + error = EIO; + break; + } + + return (error); +} + static void -vtblk_bio_error(struct bio *bp, int error) +vtblk_finish_bio(struct bio *bp, int error) { - biofinish(bp, NULL, error); + if (error) { + bp->bio_resid = bp->bio_bcount; + bp->bio_error = error; + bp->bio_flags |= BIO_ERROR; + } + + biodone(bp); } diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index ca57aec1c91e..64a82ac028fe 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -223,7 +223,7 @@ static device_method_t vtnet_methods[] = { /* VirtIO methods. */ DEVMETHOD(virtio_config_change, vtnet_config_change), - { 0, 0 } + DEVMETHOD_END }; static driver_t vtnet_driver = { diff --git a/sys/dev/virtio/pci/virtio_pci.c b/sys/dev/virtio/pci/virtio_pci.c index c71b5a9fe9b1..56813e4a1a7b 100644 --- a/sys/dev/virtio/pci/virtio_pci.c +++ b/sys/dev/virtio/pci/virtio_pci.c @@ -189,7 +189,7 @@ static device_method_t vtpci_methods[] = { DEVMETHOD(virtio_bus_read_device_config, vtpci_read_dev_config), DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config), - { 0, 0 } + DEVMETHOD_END }; static driver_t vtpci_driver = { diff --git a/sys/dev/virtio/virtio_ring.h b/sys/dev/virtio/virtio_ring.h index b0ab94651a9e..52580856a864 100644 --- a/sys/dev/virtio/virtio_ring.h +++ b/sys/dev/virtio/virtio_ring.h @@ -103,6 +103,7 @@ struct vring { * __u16 avail_flags; * __u16 avail_idx; * __u16 available[num]; + * __u16 used_event_idx; * * // Padding to the next align boundary. * char pad[]; @@ -111,11 +112,19 @@ struct vring { * __u16 used_flags; * __u16 used_idx; * struct vring_used_elem used[num]; + * __u16 avail_event_idx; * }; * * NOTE: for VirtIO PCI, align is 4096. */ +/* + * We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. + */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num]) + static inline int vring_size(unsigned int num, unsigned long align) { @@ -140,4 +149,18 @@ vring_init(struct vring *vr, unsigned int num, uint8_t *p, vr->used = (void *) (((unsigned long) &vr->avail->ring[num] + align-1) & ~(align-1)); } + +/* + * The following is used with VIRTIO_RING_F_EVENT_IDX. + * + * Assuming a given event_idx value from the other size, if we have + * just incremented index from old to new_idx, should we trigger an + * event? + */ +static inline int +vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) +{ + + return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); +} #endif /* VIRTIO_RING_H */ diff --git a/sys/dev/virtio/virtqueue.c b/sys/dev/virtio/virtqueue.c index 1fb182e3dae2..31f47d0380dc 100644 --- a/sys/dev/virtio/virtqueue.c +++ b/sys/dev/virtio/virtqueue.c @@ -60,6 +60,7 @@ struct virtqueue { uint16_t vq_nentries; uint32_t vq_flags; #define VIRTQUEUE_FLAG_INDIRECT 0x0001 +#define VIRTQUEUE_FLAG_EVENT_IDX 0x0002 int vq_alignment; int vq_ring_size; @@ -126,7 +127,8 @@ static uint16_t vq_ring_enqueue_segments(struct virtqueue *, static int vq_ring_use_indirect(struct virtqueue *, int); static void vq_ring_enqueue_indirect(struct virtqueue *, void *, struct sglist *, int, int); -static void vq_ring_notify_host(struct virtqueue *, int); +static int vq_ring_must_notify_host(struct virtqueue *); +static void vq_ring_notify_host(struct virtqueue *); static void vq_ring_free_chain(struct virtqueue *, uint16_t); uint64_t @@ -136,6 +138,7 @@ virtqueue_filter_features(uint64_t features) mask = (1 << VIRTIO_TRANSPORT_F_START) - 1; mask |= VIRTIO_RING_F_INDIRECT_DESC; + mask |= VIRTIO_RING_F_EVENT_IDX; return (features & mask); } @@ -184,6 +187,9 @@ virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, int align, vq->vq_intrhand = info->vqai_intr; vq->vq_intrhand_arg = info->vqai_intr_arg; + if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0) + vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX; + if (info->vqai_maxindirsz > 1) { error = virtqueue_init_indirect(vq, info->vqai_maxindirsz); if (error) @@ -384,9 +390,12 @@ virtqueue_full(struct virtqueue *vq) void virtqueue_notify(struct virtqueue *vq) { + /* Ensure updated avail->idx is visible to host. */ + mb(); + if (vq_ring_must_notify_host(vq)) + vq_ring_notify_host(vq); vq->vq_queued_cnt = 0; - vq_ring_notify_host(vq, 0); } int @@ -395,11 +404,8 @@ virtqueue_nused(struct virtqueue *vq) uint16_t used_idx, nused; used_idx = vq->vq_ring.used->idx; - if (used_idx >= vq->vq_used_cons_idx) - nused = used_idx - vq->vq_used_cons_idx; - else - nused = UINT16_MAX - vq->vq_used_cons_idx + - used_idx + 1; + + nused = (uint16_t)(used_idx - vq->vq_used_cons_idx); VQASSERT(vq, nused <= vq->vq_nentries, "used more than available"); return (nused); @@ -427,6 +433,10 @@ virtqueue_enable_intr(struct virtqueue *vq) * index of what's already been consumed. */ vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) + vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx; + else + vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; mb(); @@ -441,6 +451,37 @@ virtqueue_enable_intr(struct virtqueue *vq) return (0); } +int +virtqueue_postpone_intr(struct virtqueue *vq) +{ + uint16_t ndesc; + + /* + * Postpone until at least half of the available descriptors + * have been consumed. + * + * XXX Adaptive factor? (Linux uses 3/4) + */ + ndesc = (uint16_t)(vq->vq_ring.avail->idx - vq->vq_used_cons_idx) / 2; + + if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) + vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc; + else + vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + + mb(); + + /* + * Enough items may have already been consumed to meet our + * threshold since we last checked. Let our caller know so + * it processes the new entries. + */ + if (virtqueue_nused(vq) > ndesc) + return (1); + + return (0); +} + void virtqueue_disable_intr(struct virtqueue *vq) { @@ -448,7 +489,8 @@ virtqueue_disable_intr(struct virtqueue *vq) /* * Note this is only considered a hint to the host. */ - vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; + if ((vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) == 0) + vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; } int @@ -618,7 +660,7 @@ vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) mb(); vq->vq_ring.avail->idx++; - /* Keep pending count until virtqueue_notify() for debugging. */ + /* Keep pending count until virtqueue_notify(). */ vq->vq_queued_cnt++; } @@ -709,15 +751,27 @@ vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie, vq_ring_update_avail(vq, head_idx); } +static int +vq_ring_must_notify_host(struct virtqueue *vq) +{ + uint16_t new_idx, prev_idx, event_idx; + + if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { + new_idx = vq->vq_ring.avail->idx; + prev_idx = new_idx - vq->vq_queued_cnt; + event_idx = vring_avail_event(&vq->vq_ring); + + return (vring_need_event(event_idx, new_idx, prev_idx) != 0); + } + + return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0); +} + static void -vq_ring_notify_host(struct virtqueue *vq, int force) +vq_ring_notify_host(struct virtqueue *vq) { - mb(); - - if (force || - (vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0) - VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index); + VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index); } static void diff --git a/sys/dev/virtio/virtqueue.h b/sys/dev/virtio/virtqueue.h index e790e65d51a8..eab57b22a23d 100644 --- a/sys/dev/virtio/virtqueue.h +++ b/sys/dev/virtio/virtqueue.h @@ -78,6 +78,7 @@ int virtqueue_reinit(struct virtqueue *vq, uint16_t size); int virtqueue_intr(struct virtqueue *vq); int virtqueue_enable_intr(struct virtqueue *vq); +int virtqueue_postpone_intr(struct virtqueue *vq); void virtqueue_disable_intr(struct virtqueue *vq); /* Get physical address of the virtqueue ring. */