From 310dacd09bdac56bf4b84751a3804785ecbe65ba Mon Sep 17 00:00:00 2001 From: Peter Grehan <grehan@FreeBSD.org> Date: Wed, 11 Jul 2012 02:57:19 +0000 Subject: [PATCH] Various VirtIO improvements PCI: - Properly handle interrupt fallback from MSIX to MSI to legacy. The host may not have sufficient resources to support MSIX, so we must be able to fallback to legacy interrupts. - Add interface to get the (sub) vendor and device IDs. - Rename flags to VTPCI_FLAG_* like other VirtIO drivers. Block: - No longer allocate vtblk_requests from separate UMA zone. malloc(9) from M_DEVBUF is sufficient. Assert segment counts at allocation. - More verbose error and debug messages. Network: - Remove stray write once variable. Virtqueue: - Shuffle code around in preparation of converting the mb()s to the appropriate atomic(9) operations. - Only walk the descriptor chain when freeing if INVARIANTS is defined since the result is only KASSERT()ed. Submitted by: Bryan Venteicher (bryanv@daemoninthecloset.org) --- sys/dev/virtio/balloon/virtio_balloon.c | 1 - sys/dev/virtio/block/virtio_blk.c | 81 ++- sys/dev/virtio/network/if_vtnet.c | 2 - sys/dev/virtio/pci/virtio_pci.c | 627 +++++++++++++++--------- sys/dev/virtio/pci/virtio_pci.h | 2 +- sys/dev/virtio/virtio.c | 37 +- sys/dev/virtio/virtio.h | 33 +- sys/dev/virtio/virtio_ring.h | 9 +- sys/dev/virtio/virtqueue.c | 87 ++-- sys/dev/virtio/virtqueue.h | 6 +- 10 files changed, 536 insertions(+), 349 deletions(-) diff --git a/sys/dev/virtio/balloon/virtio_balloon.c b/sys/dev/virtio/balloon/virtio_balloon.c index d589a733c25c..9b454591a9d0 100644 --- a/sys/dev/virtio/balloon/virtio_balloon.c +++ b/sys/dev/virtio/balloon/virtio_balloon.c @@ -412,7 +412,6 @@ vtballoon_send_page_frames(struct vtballoon_softc *sc, struct virtqueue *vq, * interrupt handler will wake us up. */ VTBALLOON_LOCK(sc); - while ((c = virtqueue_dequeue(vq, NULL)) == NULL) msleep_spin(sc, VTBALLOON_MTX(sc), "vtbspf", 0); VTBALLOON_UNLOCK(sc); diff --git a/sys/dev/virtio/block/virtio_blk.c b/sys/dev/virtio/block/virtio_blk.c index d05fa7a4ec4a..d35421a7659e 100644 --- a/sys/dev/virtio/block/virtio_blk.c +++ b/sys/dev/virtio/block/virtio_blk.c @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include <sys/taskqueue.h> #include <geom/geom_disk.h> -#include <vm/uma.h> #include <machine/bus.h> #include <machine/resource.h> @@ -119,7 +118,7 @@ static int vtblk_shutdown(device_t); static int vtblk_open(struct disk *); static int vtblk_close(struct disk *); static int vtblk_ioctl(struct disk *, u_long, void *, int, - struct thread *); + struct thread *); static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); static void vtblk_strategy(struct bio *); @@ -193,7 +192,7 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED) #define VTBLK_DISK_NAME "vtbd" -#define VTBLK_QUIESCE_TIMEOUT (30 * hz) +#define VTBLK_QUIESCE_TIMEOUT (30 * hz) /* * Each block request uses at least two segments - one for the header @@ -201,8 +200,6 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); */ #define VTBLK_MIN_SEGMENTS 2 -static uma_zone_t vtblk_req_zone; - static device_method_t vtblk_methods[] = { /* Device methods. */ DEVMETHOD(device_probe, vtblk_probe), @@ -236,19 +233,8 @@ vtblk_modevent(module_t mod, int type, void *unused) switch (type) { case MOD_LOAD: - vtblk_req_zone = uma_zcreate("vtblk_request", - sizeof(struct vtblk_request), - NULL, NULL, NULL, NULL, 0, 0); - break; case MOD_QUIESCE: case MOD_UNLOAD: - if (uma_zone_get_cur(vtblk_req_zone) > 0) - error = EBUSY; - else if (type == MOD_UNLOAD) { - uma_zdestroy(vtblk_req_zone); - vtblk_req_zone = NULL; - } - break; case MOD_SHUTDOWN: break; default: @@ -316,7 +302,7 @@ vtblk_attach(device_t dev) } sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); - if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { + if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { error = EINVAL; device_printf(dev, "fewer than minimum number of segments " "allowed: %d\n", sc->vtblk_max_nsegs); @@ -493,7 +479,6 @@ vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, int error; dp = arg; - error = 0; if ((sc = dp->d_drv1) == NULL) return (ENXIO); @@ -539,7 +524,7 @@ vtblk_strategy(struct bio *bp) return; } -#ifdef INVARIANTS +#ifdef INVARIANTS /* * Prevent read/write buffers spanning too many segments from * getting into the queue. This should only trip if d_maxsize @@ -547,13 +532,13 @@ vtblk_strategy(struct bio *bp) */ if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { int nsegs, max_nsegs; - + nsegs = sglist_count(bp->bio_data, bp->bio_bcount); max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS; KASSERT(nsegs <= max_nsegs, - ("bio spanned too many segments: %d, max: %d", - nsegs, max_nsegs)); + ("bio %p spanned too many segments: %d, max: %d", + bp, nsegs, max_nsegs)); } #endif @@ -800,27 +785,22 @@ vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) VTBLK_LOCK_ASSERT(sc); sglist_reset(sg); - error = sglist_append(sg, &req->vbr_hdr, - sizeof(struct virtio_blk_outhdr)); - KASSERT(error == 0, ("error adding header to sglist")); - KASSERT(sg->sg_nseg == 1, - ("header spanned multiple segments: %d", sg->sg_nseg)); + + sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr)); if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { error = sglist_append(sg, bp->bio_data, bp->bio_bcount); - KASSERT(error == 0, ("error adding buffer to sglist")); + if (error || sg->sg_nseg == sg->sg_maxseg) + panic("%s: data buffer too big bio:%p error:%d", + __FUNCTION__, bp, error); /* BIO_READ means the host writes into our buffer. */ if (bp->bio_cmd == BIO_READ) - writable += sg->sg_nseg - 1; + writable = sg->sg_nseg - 1; } - error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); - KASSERT(error == 0, ("error adding ack to sglist")); writable++; - - KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, - ("fewer than min segments: %d", sg->sg_nseg)); + sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); readable = sg->sg_nseg - writable; @@ -995,12 +975,10 @@ vtblk_flush_dump(struct vtblk_softc *sc) static int vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) { - device_t dev; struct virtqueue *vq; struct vtblk_request *r; int error; - dev = sc->vtblk_dev; vq = sc->vtblk_vq; if (!virtqueue_empty(vq)) @@ -1013,12 +991,12 @@ vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) virtqueue_notify(vq); r = virtqueue_poll(vq, NULL); - KASSERT(r == req, ("unexpected request response")); + KASSERT(r == req, ("unexpected request response: %p/%p", r, req)); error = vtblk_request_error(req); if (error && bootverbose) { - device_printf(dev, "vtblk_poll_request: IO error: %d\n", - error); + device_printf(sc->vtblk_dev, + "%s: IO error: %d\n", __FUNCTION__, error); } return (error); @@ -1090,6 +1068,20 @@ vtblk_drain(struct vtblk_softc *sc) vtblk_free_requests(sc); } +#ifdef INVARIANTS +static void +vtblk_request_invariants(struct vtblk_request *req) +{ + int hdr_nsegs, ack_nsegs; + + hdr_nsegs = sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)); + ack_nsegs = sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)); + + KASSERT(hdr_nsegs == 1, ("request header crossed page boundary")); + KASSERT(ack_nsegs == 1, ("request ack crossed page boundary")); +} +#endif + static int vtblk_alloc_requests(struct vtblk_softc *sc) { @@ -1107,10 +1099,14 @@ vtblk_alloc_requests(struct vtblk_softc *sc) nreqs /= VTBLK_MIN_SEGMENTS; for (i = 0; i < nreqs; i++) { - req = uma_zalloc(vtblk_req_zone, M_NOWAIT); + req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT); if (req == NULL) return (ENOMEM); +#ifdef INVARIANTS + vtblk_request_invariants(req); +#endif + sc->vtblk_request_count++; vtblk_enqueue_request(sc, req); } @@ -1128,10 +1124,11 @@ vtblk_free_requests(struct vtblk_softc *sc) while ((req = vtblk_dequeue_request(sc)) != NULL) { sc->vtblk_request_count--; - uma_zfree(vtblk_req_zone, req); + free(req, M_DEVBUF); } - KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); + KASSERT(sc->vtblk_request_count == 0, + ("leaked requests: %d", sc->vtblk_request_count)); } static struct vtblk_request * diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 64a82ac028fe..e6fef9094db2 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -748,11 +748,9 @@ vtnet_is_link_up(struct vtnet_softc *sc) static void vtnet_update_link_status(struct vtnet_softc *sc) { - device_t dev; struct ifnet *ifp; int link; - dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; link = vtnet_is_link_up(sc); diff --git a/sys/dev/virtio/pci/virtio_pci.c b/sys/dev/virtio/pci/virtio_pci.c index 56813e4a1a7b..917ca84af7f6 100644 --- a/sys/dev/virtio/pci/virtio_pci.c +++ b/sys/dev/virtio/pci/virtio_pci.c @@ -57,12 +57,15 @@ struct vtpci_softc { struct resource *vtpci_msix_res; uint64_t vtpci_features; uint32_t vtpci_flags; -#define VIRTIO_PCI_FLAG_NO_MSI 0x0001 -#define VIRTIO_PCI_FLAG_MSI 0x0002 -#define VIRTIO_PCI_FLAG_NO_MSIX 0x0010 -#define VIRTIO_PCI_FLAG_MSIX 0x0020 -#define VIRTIO_PCI_FLAG_SHARED_MSIX 0x0040 +#define VTPCI_FLAG_NO_MSI 0x0001 +#define VTPCI_FLAG_NO_MSIX 0x0002 +#define VTPCI_FLAG_LEGACY 0x1000 +#define VTPCI_FLAG_MSI 0x2000 +#define VTPCI_FLAG_MSIX 0x4000 +#define VTPCI_FLAG_SHARED_MSIX 0x8000 +#define VTPCI_FLAG_ITYPE_MASK 0xF000 + /* This "bus" will only ever have one child. */ device_t vtpci_child_dev; struct virtio_feature_desc *vtpci_child_feat_desc; @@ -80,7 +83,8 @@ struct vtpci_softc { int vtpci_nvqs; struct vtpci_virtqueue { struct virtqueue *vq; - + /* Device did not provide a callback for this virtqueue. */ + int no_intr; /* Index into vtpci_intr_res[] below. Unused, then -1. */ int ires_idx; } vtpci_vqx[VIRTIO_MAX_VIRTQUEUES]; @@ -130,24 +134,39 @@ static void vtpci_describe_features(struct vtpci_softc *, const char *, uint64_t); static void vtpci_probe_and_attach_child(struct vtpci_softc *); -static int vtpci_alloc_interrupts(struct vtpci_softc *, int, int, - struct vq_alloc_info *); -static int vtpci_alloc_intr_resources(struct vtpci_softc *, int, - struct vq_alloc_info *); -static int vtpci_alloc_msi(struct vtpci_softc *); -static int vtpci_alloc_msix(struct vtpci_softc *, int); +static int vtpci_alloc_msix(struct vtpci_softc *, int); +static int vtpci_alloc_msi(struct vtpci_softc *); +static int vtpci_alloc_intr_msix_pervq(struct vtpci_softc *); +static int vtpci_alloc_intr_msix_shared(struct vtpci_softc *); +static int vtpci_alloc_intr_msi(struct vtpci_softc *); +static int vtpci_alloc_intr_legacy(struct vtpci_softc *); +static int vtpci_alloc_intr_resources(struct vtpci_softc *); + +static int vtpci_setup_legacy_interrupt(struct vtpci_softc *, + enum intr_type); +static int vtpci_setup_msix_interrupts(struct vtpci_softc *, + enum intr_type); +static int vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type); + static int vtpci_register_msix_vector(struct vtpci_softc *, int, int); +static int vtpci_set_host_msix_vectors(struct vtpci_softc *); +static int vtpci_reinit_virtqueue(struct vtpci_softc *, int); static void vtpci_free_interrupts(struct vtpci_softc *); static void vtpci_free_virtqueues(struct vtpci_softc *); +static void vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *); static void vtpci_release_child_resources(struct vtpci_softc *); static void vtpci_reset(struct vtpci_softc *); +static void vtpci_select_virtqueue(struct vtpci_softc *, int); + static int vtpci_legacy_intr(void *); static int vtpci_vq_shared_intr(void *); static int vtpci_vq_intr(void *); static int vtpci_config_intr(void *); +#define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt + /* * I/O port read/write wrappers. */ @@ -252,7 +271,7 @@ vtpci_attach(device_t dev) } if (pci_find_cap(dev, PCIY_MSI, NULL) != 0) - sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSI; + sc->vtpci_flags |= VTPCI_FLAG_NO_MSI; if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) { rid = PCIR_BAR(1); @@ -261,7 +280,7 @@ vtpci_attach(device_t dev) } if (sc->vtpci_msix_res == NULL) - sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSIX; + sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX; vtpci_reset(sc); @@ -372,6 +391,16 @@ vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) switch (index) { case VIRTIO_IVAR_DEVTYPE: + case VIRTIO_IVAR_SUBDEVICE: + *result = pci_get_subdevice(dev); + break; + case VIRTIO_IVAR_VENDOR: + *result = pci_get_vendor(dev); + break; + case VIRTIO_IVAR_DEVICE: + *result = pci_get_device(dev); + break; + case VIRTIO_IVAR_SUBVENDOR: *result = pci_get_subdevice(dev); break; default: @@ -442,102 +471,97 @@ vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs, struct vq_alloc_info *vq_info) { struct vtpci_softc *sc; + struct virtqueue *vq; struct vtpci_virtqueue *vqx; struct vq_alloc_info *info; - int queue, error; - uint16_t vq_size; + int idx, error; + uint16_t size; sc = device_get_softc(dev); + error = 0; - if (sc->vtpci_nvqs != 0 || nvqs <= 0 || - nvqs > VIRTIO_MAX_VIRTQUEUES) + if (sc->vtpci_nvqs != 0) + return (EALREADY); + if (nvqs <= 0 || nvqs > VIRTIO_MAX_VIRTQUEUES) return (EINVAL); - error = vtpci_alloc_interrupts(sc, flags, nvqs, vq_info); - if (error) { - device_printf(dev, "cannot allocate interrupts\n"); - return (error); - } + if (flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX) + sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX; - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - error = vtpci_register_msix_vector(sc, - VIRTIO_MSI_CONFIG_VECTOR, 0); - if (error) - return (error); - } + for (idx = 0; idx < nvqs; idx++) { + vqx = &sc->vtpci_vqx[idx]; + info = &vq_info[idx]; - for (queue = 0; queue < nvqs; queue++) { - vqx = &sc->vtpci_vqx[queue]; - info = &vq_info[queue]; + vtpci_select_virtqueue(sc, idx); + size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); - vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue); - - vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); - error = virtqueue_alloc(dev, queue, vq_size, - VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq); - if (error) - return (error); - - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - error = vtpci_register_msix_vector(sc, - VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx); - if (error) - return (error); + error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN, + 0xFFFFFFFFUL, info, &vq); + if (error) { + device_printf(dev, + "cannot allocate virtqueue %d: %d\n", idx, error); + break; } vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, - virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + + vqx->vq = *info->vqai_vq = vq; + vqx->no_intr = info->vqai_intr == NULL; - *info->vqai_vq = vqx->vq; sc->vtpci_nvqs++; } - return (0); + return (error); } static int vtpci_setup_intr(device_t dev, enum intr_type type) { struct vtpci_softc *sc; - struct vtpci_intr_resource *ires; - struct vtpci_virtqueue *vqx; - int i, flags, error; + int attempt, error; sc = device_get_softc(dev); - flags = type | INTR_MPSAFE; - ires = &sc->vtpci_intr_res[0]; - if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) { - error = bus_setup_intr(dev, ires->irq, flags, - vtpci_legacy_intr, NULL, sc, &ires->intrhand); + for (attempt = 0; attempt < 5; attempt++) { + /* + * Start with the most desirable interrupt configuration and + * fallback towards less desirable ones. + */ + switch (attempt) { + case 0: + error = vtpci_alloc_intr_msix_pervq(sc); + break; + case 1: + error = vtpci_alloc_intr_msix_shared(sc); + break; + case 2: + error = vtpci_alloc_intr_msi(sc); + break; + case 3: + error = vtpci_alloc_intr_legacy(sc); + break; + default: + device_printf(dev, + "exhausted all interrupt allocation attempts\n"); + return (ENXIO); + } - return (error); + if (error == 0 && vtpci_setup_interrupts(sc, type) == 0) + break; + + vtpci_cleanup_setup_intr_attempt(sc); } - error = bus_setup_intr(dev, ires->irq, flags, vtpci_config_intr, - NULL, sc, &ires->intrhand); - if (error) - return (error); - - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) { - ires = &sc->vtpci_intr_res[1]; - error = bus_setup_intr(dev, ires->irq, flags, - vtpci_vq_shared_intr, NULL, sc, &ires->intrhand); - - return (error); - } - - /* Setup an interrupt handler for each virtqueue. */ - for (i = 0; i < sc->vtpci_nvqs; i++) { - vqx = &sc->vtpci_vqx[i]; - if (vqx->ires_idx < 1) - continue; - - ires = &sc->vtpci_intr_res[vqx->ires_idx]; - error = bus_setup_intr(dev, ires->irq, flags, - vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand); - if (error) - return (error); + if (bootverbose) { + if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) + device_printf(dev, "using legacy interrupt\n"); + else if (sc->vtpci_flags & VTPCI_FLAG_MSI) + device_printf(dev, "using MSI interrupt\n"); + else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) + device_printf(dev, "using shared MSIX interrupts\n"); + else + device_printf(dev, "using per VQ MSIX interrupts\n"); } return (0); @@ -554,20 +578,19 @@ static int vtpci_reinit(device_t dev, uint64_t features) { struct vtpci_softc *sc; - struct vtpci_virtqueue *vqx; - struct virtqueue *vq; - int queue, error; - uint16_t vq_size; + int idx, error; sc = device_get_softc(dev); /* - * Redrive the device initialization. This is a bit of an abuse - * of the specification, but both VirtualBox and QEMU/KVM seem - * to play nice. We do not allow the host device to change from - * what was originally negotiated beyond what the guest driver - * changed (MSIX state should not change, number of virtqueues - * and their size remain the same, etc). + * Redrive the device initialization. This is a bit of an abuse of + * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to + * play nice. + * + * We do not allow the host device to change from what was originally + * negotiated beyond what the guest driver changed. MSIX state should + * not change, number of virtqueues and their size remain the same, etc. + * This will need to be rethought when we want to support migration. */ if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET) @@ -582,34 +605,16 @@ vtpci_reinit(device_t dev, uint64_t features) vtpci_negotiate_features(dev, features); - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - error = vtpci_register_msix_vector(sc, - VIRTIO_MSI_CONFIG_VECTOR, 0); + for (idx = 0; idx < sc->vtpci_nvqs; idx++) { + error = vtpci_reinit_virtqueue(sc, idx); if (error) return (error); } - for (queue = 0; queue < sc->vtpci_nvqs; queue++) { - vqx = &sc->vtpci_vqx[queue]; - vq = vqx->vq; - - KASSERT(vq != NULL, ("vq %d not allocated", queue)); - vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue); - - vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); - error = virtqueue_reinit(vq, vq_size); + if (sc->vtpci_flags & VTPCI_FLAG_MSIX) { + error = vtpci_set_host_msix_vectors(sc); if (error) return (error); - - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - error = vtpci_register_msix_vector(sc, - VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx); - if (error) - return (error); - } - - vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, - virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); } return (0); @@ -744,7 +749,6 @@ vtpci_probe_and_attach_child(struct vtpci_softc *sc) vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED); vtpci_reset(sc); vtpci_release_child_resources(sc); - /* Reset status for future attempt. */ vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); } else @@ -752,42 +756,126 @@ vtpci_probe_and_attach_child(struct vtpci_softc *sc) } static int -vtpci_alloc_interrupts(struct vtpci_softc *sc, int flags, int nvqs, - struct vq_alloc_info *vq_info) +vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors) { - int i, nvectors, error; + device_t dev; + int nmsix, cnt, required; - /* - * Only allocate a vector for virtqueues that are actually - * expecting an interrupt. - */ - for (nvectors = 0, i = 0; i < nvqs; i++) - if (vq_info[i].vqai_intr != NULL) - nvectors++; + dev = sc->vtpci_dev; - if (vtpci_disable_msix != 0 || - sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSIX || - flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX || - vtpci_alloc_msix(sc, nvectors) != 0) { - /* - * Use MSI interrupts if available. Otherwise, we fallback - * to legacy interrupts. - */ - if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSI) == 0 && - vtpci_alloc_msi(sc) == 0) - sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI; + /* Allocate an additional vector for the config changes. */ + required = nvectors + 1; - sc->vtpci_nintr_res = 1; + nmsix = pci_msix_count(dev); + if (nmsix < required) + return (1); + + cnt = required; + if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { + sc->vtpci_nintr_res = required; + return (0); } - error = vtpci_alloc_intr_resources(sc, nvqs, vq_info); + pci_release_msi(dev); - return (error); + return (1); } static int -vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs, - struct vq_alloc_info *vq_info) +vtpci_alloc_msi(struct vtpci_softc *sc) +{ + device_t dev; + int nmsi, cnt, required; + + dev = sc->vtpci_dev; + required = 1; + + nmsi = pci_msi_count(dev); + if (nmsi < required) + return (1); + + cnt = required; + if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) { + sc->vtpci_nintr_res = required; + return (0); + } + + pci_release_msi(dev); + + return (1); +} + +static int +vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc) +{ + int i, nvectors, error; + + if (vtpci_disable_msix != 0 || + sc->vtpci_flags & VTPCI_FLAG_NO_MSIX) + return (ENOTSUP); + + for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) { + if (sc->vtpci_vqx[i].no_intr == 0) + nvectors++; + } + + error = vtpci_alloc_msix(sc, nvectors); + if (error) + return (error); + + sc->vtpci_flags |= VTPCI_FLAG_MSIX; + + return (0); +} + +static int +vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc) +{ + int error; + + if (vtpci_disable_msix != 0 || + sc->vtpci_flags & VTPCI_FLAG_NO_MSIX) + return (ENOTSUP); + + error = vtpci_alloc_msix(sc, 1); + if (error) + return (error); + + sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX; + + return (0); +} + +static int +vtpci_alloc_intr_msi(struct vtpci_softc *sc) +{ + int error; + + /* Only BHyVe supports MSI. */ + if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI) + return (ENOTSUP); + + error = vtpci_alloc_msi(sc); + if (error) + return (error); + + sc->vtpci_flags |= VTPCI_FLAG_MSI; + + return (0); +} + +static int +vtpci_alloc_intr_legacy(struct vtpci_softc *sc) +{ + + sc->vtpci_flags |= VTPCI_FLAG_LEGACY; + sc->vtpci_nintr_res = 1; + + return (0); +} + +static int +vtpci_alloc_intr_resources(struct vtpci_softc *sc) { device_t dev; struct resource *irq; @@ -795,14 +883,14 @@ vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs, int i, rid, flags, res_idx; dev = sc->vtpci_dev; - flags = RF_ACTIVE; - if ((sc->vtpci_flags & - (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) == 0) { + if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) { rid = 0; - flags |= RF_SHAREABLE; - } else + flags = RF_ACTIVE | RF_SHAREABLE; + } else { rid = 1; + flags = RF_ACTIVE; + } for (i = 0; i < sc->vtpci_nintr_res; i++) { irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, flags); @@ -814,16 +902,16 @@ vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs, } /* - * Map the virtqueue into the correct index in vq_intr_res[]. Note the - * first index is reserved for configuration changes notifications. + * Map the virtqueue into the correct index in vq_intr_res[]. The + * first index is reserved for configuration changed notifications. */ - for (i = 0, res_idx = 1; i < nvqs; i++) { + for (i = 0, res_idx = 1; i < sc->vtpci_nvqs; i++) { vqx = &sc->vtpci_vqx[i]; - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - if (vq_info[i].vqai_intr == NULL) + if (sc->vtpci_flags & VTPCI_FLAG_MSIX) { + if (vqx->no_intr != 0) vqx->ires_idx = -1; - else if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) + else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) vqx->ires_idx = res_idx; else vqx->ires_idx = res_idx++; @@ -835,110 +923,180 @@ vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs, } static int -vtpci_alloc_msi(struct vtpci_softc *sc) +vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type) { device_t dev; - int nmsi, cnt; + struct vtpci_intr_resource *ires; + int error; dev = sc->vtpci_dev; - nmsi = pci_msi_count(dev); - if (nmsi < 1) - return (1); + ires = &sc->vtpci_intr_res[0]; + error = bus_setup_intr(dev, ires->irq, type, vtpci_legacy_intr, NULL, + sc, &ires->intrhand); - cnt = 1; - if (pci_alloc_msi(dev, &cnt) == 0 && cnt == 1) - return (0); - - return (1); + return (error); } static int -vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors) +vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type) { device_t dev; - int nmsix, cnt, required; + struct vtpci_intr_resource *ires; + struct vtpci_virtqueue *vqx; + int i, error; dev = sc->vtpci_dev; - nmsix = pci_msix_count(dev); - if (nmsix < 1) - return (1); + /* + * The first resource is used for configuration changed interrupts. + */ + ires = &sc->vtpci_intr_res[0]; + error = bus_setup_intr(dev, ires->irq, type, vtpci_config_intr, + NULL, sc, &ires->intrhand); + if (error) + return (error); - /* An additional vector is needed for the config changes. */ - required = nvectors + 1; - if (nmsix >= required) { - cnt = required; - if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) - goto out; + if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) { + ires = &sc->vtpci_intr_res[1]; - pci_release_msi(dev); - } + error = bus_setup_intr(dev, ires->irq, type, + vtpci_vq_shared_intr, NULL, sc, &ires->intrhand); + if (error) + return (error); + } else { + /* + * Each remaining resource is assigned to a specific virtqueue. + */ + for (i = 0; i < sc->vtpci_nvqs; i++) { + vqx = &sc->vtpci_vqx[i]; + if (vqx->ires_idx < 1) + continue; - /* Attempt shared MSIX configuration. */ - required = 2; - if (nmsix >= required) { - cnt = required; - if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { - sc->vtpci_flags |= VIRTIO_PCI_FLAG_SHARED_MSIX; - goto out; + ires = &sc->vtpci_intr_res[vqx->ires_idx]; + error = bus_setup_intr(dev, ires->irq, type, + vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand); + if (error) + return (error); } - - pci_release_msi(dev); } - return (1); - -out: - sc->vtpci_nintr_res = required; - sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX; - - if (bootverbose) { - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) - device_printf(dev, "using shared virtqueue MSIX\n"); - else - device_printf(dev, "using per virtqueue MSIX\n"); - } + error = vtpci_set_host_msix_vectors(sc); + if (error) + return (error); return (0); } +static int +vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type) +{ + int error; + + type |= INTR_MPSAFE; + KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK, + ("no interrupt type selected: %#x", sc->vtpci_flags)); + + error = vtpci_alloc_intr_resources(sc); + if (error) + return (error); + + if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) + error = vtpci_setup_legacy_interrupt(sc, type); + else if (sc->vtpci_flags & VTPCI_FLAG_MSI) + error = vtpci_setup_msi_interrupt(sc, type); + else + error = vtpci_setup_msix_interrupts(sc, type); + + return (error); +} + static int vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx) { device_t dev; - uint16_t vector; + uint16_t vector, rdvector; dev = sc->vtpci_dev; - if (offset != VIRTIO_MSI_CONFIG_VECTOR && - offset != VIRTIO_MSI_QUEUE_VECTOR) - return (EINVAL); - if (res_idx != -1) { - /* Map from rid to host vector. */ + /* Map from guest rid to host vector. */ vector = sc->vtpci_intr_res[res_idx].rid - 1; } else vector = VIRTIO_MSI_NO_VECTOR; - /* The first resource is special; make sure it is used correctly. */ + /* + * Assert the first resource is always used for the configuration + * changed interrupts. + */ if (res_idx == 0) { - KASSERT(vector == 0, ("unexpected config vector")); - KASSERT(offset == VIRTIO_MSI_CONFIG_VECTOR, - ("unexpected config offset")); - } + KASSERT(vector == 0 && offset == VIRTIO_MSI_CONFIG_VECTOR, + ("bad first res use vector:%d offset:%d", vector, offset)); + } else + KASSERT(offset == VIRTIO_MSI_QUEUE_VECTOR, ("bad offset")); vtpci_write_config_2(sc, offset, vector); - if (vtpci_read_config_2(sc, offset) != vector) { - device_printf(dev, "insufficient host resources for " - "MSIX interrupts\n"); + /* Read vector to determine if the host had sufficient resources. */ + rdvector = vtpci_read_config_2(sc, offset); + if (rdvector != vector) { + device_printf(dev, + "insufficient host resources for MSIX interrupts\n"); return (ENODEV); } return (0); } +static int +vtpci_set_host_msix_vectors(struct vtpci_softc *sc) +{ + struct vtpci_virtqueue *vqx; + int idx, error; + + error = vtpci_register_msix_vector(sc, VIRTIO_MSI_CONFIG_VECTOR, 0); + if (error) + return (error); + + for (idx = 0; idx < sc->vtpci_nvqs; idx++) { + vqx = &sc->vtpci_vqx[idx]; + + vtpci_select_virtqueue(sc, idx); + error = vtpci_register_msix_vector(sc, VIRTIO_MSI_QUEUE_VECTOR, + vqx->ires_idx); + if (error) + return (error); + } + + return (0); +} + +static int +vtpci_reinit_virtqueue(struct vtpci_softc *sc, int idx) +{ + struct vtpci_virtqueue *vqx; + struct virtqueue *vq; + int error; + uint16_t size; + + vqx = &sc->vtpci_vqx[idx]; + vq = vqx->vq; + + KASSERT(vq != NULL, ("vq %d not allocated", idx)); + + vtpci_select_virtqueue(sc, idx); + size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); + + error = virtqueue_reinit(vq, size); + if (error) + return (error); + + vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, + virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + + return (0); +} + static void vtpci_free_interrupts(struct vtpci_softc *sc) { @@ -947,15 +1105,8 @@ vtpci_free_interrupts(struct vtpci_softc *sc) int i; dev = sc->vtpci_dev; - sc->vtpci_nintr_res = 0; - if (sc->vtpci_flags & (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) { - pci_release_msi(dev); - sc->vtpci_flags &= ~(VIRTIO_PCI_FLAG_MSI | - VIRTIO_PCI_FLAG_MSIX | VIRTIO_PCI_FLAG_SHARED_MSIX); - } - - for (i = 0; i < 1 + VIRTIO_MAX_VIRTQUEUES; i++) { + for (i = 0; i < sc->vtpci_nintr_res; i++) { ires = &sc->vtpci_intr_res[i]; if (ires->intrhand != NULL) { @@ -971,6 +1122,12 @@ vtpci_free_interrupts(struct vtpci_softc *sc) ires->rid = -1; } + + if (sc->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX)) + pci_release_msi(dev); + + sc->vtpci_nintr_res = 0; + sc->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK; } static void @@ -979,16 +1136,33 @@ vtpci_free_virtqueues(struct vtpci_softc *sc) struct vtpci_virtqueue *vqx; int i; - sc->vtpci_nvqs = 0; - - for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) { + for (i = 0; i < sc->vtpci_nvqs; i++) { vqx = &sc->vtpci_vqx[i]; - if (vqx->vq != NULL) { - virtqueue_free(vqx->vq); - vqx->vq = NULL; + virtqueue_free(vqx->vq); + vqx->vq = NULL; + } + + sc->vtpci_nvqs = 0; +} + +static void +vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *sc) +{ + int idx; + + if (sc->vtpci_flags & VTPCI_FLAG_MSIX) { + vtpci_write_config_2(sc, VIRTIO_MSI_CONFIG_VECTOR, + VIRTIO_MSI_NO_VECTOR); + + for (idx = 0; idx < sc->vtpci_nvqs; idx++) { + vtpci_select_virtqueue(sc, idx); + vtpci_write_config_2(sc, VIRTIO_MSI_QUEUE_VECTOR, + VIRTIO_MSI_NO_VECTOR); } } + + vtpci_free_interrupts(sc); } static void @@ -1010,6 +1184,13 @@ vtpci_reset(struct vtpci_softc *sc) vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET); } +static void +vtpci_select_virtqueue(struct vtpci_softc *sc, int idx) +{ + + vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, idx); +} + static int vtpci_legacy_intr(void *xsc) { diff --git a/sys/dev/virtio/pci/virtio_pci.h b/sys/dev/virtio/pci/virtio_pci.h index d8daa31e2d14..485cf4ff5b1c 100644 --- a/sys/dev/virtio/pci/virtio_pci.h +++ b/sys/dev/virtio/pci/virtio_pci.h @@ -73,7 +73,7 @@ * configuration space. */ #define VIRTIO_PCI_CONFIG(sc) \ - (((sc)->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) ? 24 : 20) + (((sc)->vtpci_flags & VTPCI_FLAG_MSIX) ? 24 : 20) /* * How many bits to shift physical queue address written to QUEUE_PFN. diff --git a/sys/dev/virtio/virtio.c b/sys/dev/virtio/virtio.c index e38557542441..1cac3c7d15a6 100644 --- a/sys/dev/virtio/virtio.c +++ b/sys/dev/virtio/virtio.c @@ -86,28 +86,6 @@ virtio_device_name(uint16_t devid) return (NULL); } -int -virtio_get_device_type(device_t dev) -{ - uintptr_t devtype; - - devtype = -1; - - BUS_READ_IVAR(device_get_parent(dev), dev, - VIRTIO_IVAR_DEVTYPE, &devtype); - - return ((int) devtype); -} - -void -virtio_set_feature_desc(device_t dev, - struct virtio_feature_desc *feature_desc) -{ - - BUS_WRITE_IVAR(device_get_parent(dev), dev, - VIRTIO_IVAR_FEATURE_DESC, (uintptr_t) feature_desc); -} - void virtio_describe(device_t dev, const char *msg, uint64_t features, struct virtio_feature_desc *feature_desc) @@ -184,6 +162,21 @@ virtio_feature_name(uint64_t val, struct virtio_feature_desc *feature_desc) * VirtIO bus method wrappers. */ +void +virtio_read_ivar(device_t dev, int ivar, uintptr_t *val) +{ + + *val = -1; + BUS_READ_IVAR(device_get_parent(dev), dev, ivar, val); +} + +void +virtio_write_ivar(device_t dev, int ivar, uintptr_t val) +{ + + BUS_WRITE_IVAR(device_get_parent(dev), dev, ivar, val); +} + uint64_t virtio_negotiate_features(device_t dev, uint64_t child_features) { diff --git a/sys/dev/virtio/virtio.h b/sys/dev/virtio/virtio.h index e0ecfb9c0dd9..8c22b12e7c9f 100644 --- a/sys/dev/virtio/virtio.h +++ b/sys/dev/virtio/virtio.h @@ -91,6 +91,10 @@ struct vq_alloc_info; */ #define VIRTIO_IVAR_DEVTYPE 1 #define VIRTIO_IVAR_FEATURE_DESC 2 +#define VIRTIO_IVAR_VENDOR 3 +#define VIRTIO_IVAR_DEVICE 4 +#define VIRTIO_IVAR_SUBVENDOR 5 +#define VIRTIO_IVAR_SUBDEVICE 6 struct virtio_feature_desc { uint64_t vfd_val; @@ -98,15 +102,14 @@ struct virtio_feature_desc { }; const char *virtio_device_name(uint16_t devid); -int virtio_get_device_type(device_t dev); -void virtio_set_feature_desc(device_t dev, - struct virtio_feature_desc *feature_desc); void virtio_describe(device_t dev, const char *msg, uint64_t features, struct virtio_feature_desc *feature_desc); /* * VirtIO Bus Methods. */ +void virtio_read_ivar(device_t dev, int ivar, uintptr_t *val); +void virtio_write_ivar(device_t dev, int ivar, uintptr_t val); uint64_t virtio_negotiate_features(device_t dev, uint64_t child_features); int virtio_alloc_virtqueues(device_t dev, int flags, int nvqs, struct vq_alloc_info *info); @@ -148,4 +151,28 @@ VIRTIO_RDWR_DEVICE_CONFIG(1, uint8_t); VIRTIO_RDWR_DEVICE_CONFIG(2, uint16_t); VIRTIO_RDWR_DEVICE_CONFIG(4, uint32_t); +#define VIRTIO_READ_IVAR(name, ivar) \ +static inline int \ +__CONCAT(virtio_get_,name)(device_t dev) \ +{ \ + uintptr_t val; \ + virtio_read_ivar(dev, ivar, &val); \ + return ((int) val); \ +} + +VIRTIO_READ_IVAR(device_type, VIRTIO_IVAR_DEVTYPE); +VIRTIO_READ_IVAR(vendor, VIRTIO_IVAR_VENDOR); +VIRTIO_READ_IVAR(device, VIRTIO_IVAR_DEVICE); +VIRTIO_READ_IVAR(subvendor, VIRTIO_IVAR_SUBVENDOR); +VIRTIO_READ_IVAR(subdevice, VIRTIO_IVAR_SUBDEVICE); + +#define VIRTIO_WRITE_IVAR(name, ivar) \ +static inline void \ +__CONCAT(virtio_set_,name)(device_t dev, void *val) \ +{ \ + virtio_write_ivar(dev, ivar, (uintptr_t) val); \ +} + +VIRTIO_WRITE_IVAR(feature_desc, VIRTIO_IVAR_FEATURE_DESC); + #endif /* _VIRTIO_H_ */ diff --git a/sys/dev/virtio/virtio_ring.h b/sys/dev/virtio/virtio_ring.h index 0d0001334aa6..8e902ab59c2b 100644 --- a/sys/dev/virtio/virtio_ring.h +++ b/sys/dev/virtio/virtio_ring.h @@ -120,8 +120,8 @@ struct vring { * We publish the used event index at the end of the available ring, and vice * versa. They are at the end for backwards compatibility. */ -#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) -#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num]) +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num]) static inline int vring_size(unsigned int num, unsigned long align) @@ -129,10 +129,11 @@ vring_size(unsigned int num, unsigned long align) int size; size = num * sizeof(struct vring_desc); - size += sizeof(struct vring_avail) + (num * sizeof(uint16_t)); + size += sizeof(struct vring_avail) + (num * sizeof(uint16_t)) + + sizeof(uint16_t); size = (size + align - 1) & ~(align - 1); size += sizeof(struct vring_used) + - (num * sizeof(struct vring_used_elem)); + (num * sizeof(struct vring_used_elem)) + sizeof(uint16_t); return (size); } diff --git a/sys/dev/virtio/virtqueue.c b/sys/dev/virtio/virtqueue.c index 31f47d0380dc..83d39ba2ed94 100644 --- a/sys/dev/virtio/virtqueue.c +++ b/sys/dev/virtio/virtqueue.c @@ -127,6 +127,7 @@ static uint16_t vq_ring_enqueue_segments(struct virtqueue *, static int vq_ring_use_indirect(struct virtqueue *, int); static void vq_ring_enqueue_indirect(struct virtqueue *, void *, struct sglist *, int, int); +static int vq_ring_enable_interrupt(struct virtqueue *, uint16_t); static int vq_ring_must_notify_host(struct virtqueue *); static void vq_ring_notify_host(struct virtqueue *); static void vq_ring_free_chain(struct virtqueue *, uint16_t); @@ -311,7 +312,7 @@ virtqueue_reinit(struct virtqueue *vq, uint16_t size) /* Warn if the virtqueue was not properly cleaned up. */ if (vq->vq_free_cnt != vq->vq_nentries) { device_printf(vq->vq_dev, - "%s: warning, '%s' virtqueue not empty, " + "%s: warning '%s' virtqueue not empty, " "leaking %d entries\n", __func__, vq->vq_name, vq->vq_nentries - vq->vq_free_cnt); } @@ -390,6 +391,7 @@ virtqueue_full(struct virtqueue *vq) void virtqueue_notify(struct virtqueue *vq) { + /* Ensure updated avail->idx is visible to host. */ mb(); @@ -428,58 +430,22 @@ int virtqueue_enable_intr(struct virtqueue *vq) { - /* - * Enable interrupts, making sure we get the latest - * index of what's already been consumed. - */ - vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; - if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) - vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx; - else - vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; - - mb(); - - /* - * Additional items may have been consumed in the time between - * since we last checked and enabled interrupts above. Let our - * caller know so it processes the new entries. - */ - if (vq->vq_used_cons_idx != vq->vq_ring.used->idx) - return (1); - - return (0); + return (vq_ring_enable_interrupt(vq, 0)); } int virtqueue_postpone_intr(struct virtqueue *vq) { - uint16_t ndesc; + uint16_t ndesc, avail_idx; /* - * Postpone until at least half of the available descriptors - * have been consumed. - * - * XXX Adaptive factor? (Linux uses 3/4) + * Request the next interrupt be postponed until at least half + * of the available descriptors have been consumed. */ - ndesc = (uint16_t)(vq->vq_ring.avail->idx - vq->vq_used_cons_idx) / 2; + avail_idx = vq->vq_ring.avail->idx; + ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx) / 2; - if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) - vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc; - else - vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; - - mb(); - - /* - * Enough items may have already been consumed to meet our - * threshold since we last checked. Let our caller know so - * it processes the new entries. - */ - if (virtqueue_nused(vq) > ndesc) - return (1); - - return (0); + return (vq_ring_enable_interrupt(vq, ndesc)); } void @@ -751,6 +717,32 @@ vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie, vq_ring_update_avail(vq, head_idx); } +static int +vq_ring_enable_interrupt(struct virtqueue *vq, uint16_t ndesc) +{ + + /* + * Enable interrupts, making sure we get the latest index of + * what's already been consumed. + */ + if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) + vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc; + else + vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + + mb(); + + /* + * Enough items may have already been consumed to meet our threshold + * since we last checked. Let our caller know so it processes the new + * entries. + */ + if (virtqueue_nused(vq) > ndesc) + return (1); + + return (0); +} + static int vq_ring_must_notify_host(struct virtqueue *vq) { @@ -788,8 +780,8 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) VQ_RING_ASSERT_CHAIN_TERM(vq); vq->vq_free_cnt += dxp->ndescs; - dxp->ndescs--; +#ifdef INVARIANTS if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { while (dp->flags & VRING_DESC_F_NEXT) { VQ_RING_ASSERT_VALID_IDX(vq, dp->next); @@ -797,7 +789,10 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) dxp->ndescs--; } } - VQASSERT(vq, dxp->ndescs == 0, "failed to free entire desc chain"); + VQASSERT(vq, dxp->ndescs == 1, + "failed to free entire desc chain, remaining: %d", dxp->ndescs); +#endif + dxp->ndescs = 0; /* * We must append the existing free chain, if any, to the end of diff --git a/sys/dev/virtio/virtqueue.h b/sys/dev/virtio/virtqueue.h index a84d7a144698..0296b8c2fba4 100644 --- a/sys/dev/virtio/virtqueue.h +++ b/sys/dev/virtio/virtqueue.h @@ -35,11 +35,7 @@ struct sglist; /* Support for indirect buffer descriptors. */ #define VIRTIO_RING_F_INDIRECT_DESC (1 << 28) -/* The guest publishes the used index for which it expects an interrupt - * at the end of the avail ring. Host should ignore the avail->flags field. - * The host publishes the avail index for which it expects a kick - * at the end of the used ring. Guest should ignore the used->flags field. - */ +/* Support to suppress interrupt until specific index is reached. */ #define VIRTIO_RING_F_EVENT_IDX (1 << 29) /* Device callback for a virtqueue interrupt. */