From 310dacd09bdac56bf4b84751a3804785ecbe65ba Mon Sep 17 00:00:00 2001
From: Peter Grehan <grehan@FreeBSD.org>
Date: Wed, 11 Jul 2012 02:57:19 +0000
Subject: [PATCH] Various VirtIO improvements

      PCI:
        - Properly handle interrupt fallback from MSIX to MSI to legacy.
          The host may not have sufficient resources to support MSIX,
          so we must be able to fallback to legacy interrupts.
        - Add interface to get the (sub) vendor and device IDs.
        - Rename flags to VTPCI_FLAG_* like other VirtIO drivers.
      Block:
        - No longer allocate vtblk_requests from separate UMA zone.
          malloc(9) from M_DEVBUF is sufficient. Assert segment counts
          at allocation.
        - More verbose error and debug messages.
      Network:
        - Remove stray write once variable.
      Virtqueue:
        - Shuffle code around in preparation of converting the mb()s to
          the appropriate atomic(9) operations.
        - Only walk the descriptor chain when freeing if INVARIANTS is
          defined since the result is only KASSERT()ed.

Submitted by:	Bryan Venteicher (bryanv@daemoninthecloset.org)
---
 sys/dev/virtio/balloon/virtio_balloon.c |   1 -
 sys/dev/virtio/block/virtio_blk.c       |  81 ++-
 sys/dev/virtio/network/if_vtnet.c       |   2 -
 sys/dev/virtio/pci/virtio_pci.c         | 627 +++++++++++++++---------
 sys/dev/virtio/pci/virtio_pci.h         |   2 +-
 sys/dev/virtio/virtio.c                 |  37 +-
 sys/dev/virtio/virtio.h                 |  33 +-
 sys/dev/virtio/virtio_ring.h            |   9 +-
 sys/dev/virtio/virtqueue.c              |  87 ++--
 sys/dev/virtio/virtqueue.h              |   6 +-
 10 files changed, 536 insertions(+), 349 deletions(-)

diff --git a/sys/dev/virtio/balloon/virtio_balloon.c b/sys/dev/virtio/balloon/virtio_balloon.c
index d589a733c25c..9b454591a9d0 100644
--- a/sys/dev/virtio/balloon/virtio_balloon.c
+++ b/sys/dev/virtio/balloon/virtio_balloon.c
@@ -412,7 +412,6 @@ vtballoon_send_page_frames(struct vtballoon_softc *sc, struct virtqueue *vq,
 	 * interrupt handler will wake us up.
 	 */
 	VTBALLOON_LOCK(sc);
-
 	while ((c = virtqueue_dequeue(vq, NULL)) == NULL)
 		msleep_spin(sc, VTBALLOON_MTX(sc), "vtbspf", 0);
 	VTBALLOON_UNLOCK(sc);
diff --git a/sys/dev/virtio/block/virtio_blk.c b/sys/dev/virtio/block/virtio_blk.c
index d05fa7a4ec4a..d35421a7659e 100644
--- a/sys/dev/virtio/block/virtio_blk.c
+++ b/sys/dev/virtio/block/virtio_blk.c
@@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/taskqueue.h>
 
 #include <geom/geom_disk.h>
-#include <vm/uma.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
@@ -119,7 +118,7 @@ static int	vtblk_shutdown(device_t);
 static int	vtblk_open(struct disk *);
 static int	vtblk_close(struct disk *);
 static int	vtblk_ioctl(struct disk *, u_long, void *, int,
-	            struct thread *);
+		    struct thread *);
 static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
 static void	vtblk_strategy(struct bio *);
 
@@ -193,7 +192,7 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
 				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
 
 #define VTBLK_DISK_NAME		"vtbd"
-#define	VTBLK_QUIESCE_TIMEOUT	(30 * hz)
+#define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
 
 /*
  * Each block request uses at least two segments - one for the header
@@ -201,8 +200,6 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
  */
 #define VTBLK_MIN_SEGMENTS	2
 
-static uma_zone_t vtblk_req_zone;
-
 static device_method_t vtblk_methods[] = {
 	/* Device methods. */
 	DEVMETHOD(device_probe,		vtblk_probe),
@@ -236,19 +233,8 @@ vtblk_modevent(module_t mod, int type, void *unused)
 
 	switch (type) {
 	case MOD_LOAD:
-		vtblk_req_zone = uma_zcreate("vtblk_request",
-		    sizeof(struct vtblk_request),
-		    NULL, NULL, NULL, NULL, 0, 0);
-		break;
 	case MOD_QUIESCE:
 	case MOD_UNLOAD:
-		if (uma_zone_get_cur(vtblk_req_zone) > 0)
-			error = EBUSY;
-		else if (type == MOD_UNLOAD) {
-			uma_zdestroy(vtblk_req_zone);
-			vtblk_req_zone = NULL;
-		}
-		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
@@ -316,7 +302,7 @@ vtblk_attach(device_t dev)
 	}
 
 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
-        if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
+	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
 		error = EINVAL;
 		device_printf(dev, "fewer than minimum number of segments "
 		    "allowed: %d\n", sc->vtblk_max_nsegs);
@@ -493,7 +479,6 @@ vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
 	int error;
 
 	dp = arg;
-	error = 0;
 
 	if ((sc = dp->d_drv1) == NULL)
 		return (ENXIO);
@@ -539,7 +524,7 @@ vtblk_strategy(struct bio *bp)
 		return;
 	}
 
-#ifdef	INVARIANTS
+#ifdef INVARIANTS
 	/*
 	 * Prevent read/write buffers spanning too many segments from
 	 * getting into the queue. This should only trip if d_maxsize
@@ -547,13 +532,13 @@ vtblk_strategy(struct bio *bp)
 	 */
 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
 		int nsegs, max_nsegs;
-		
+
 		nsegs = sglist_count(bp->bio_data, bp->bio_bcount);
 		max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS;
 
 		KASSERT(nsegs <= max_nsegs,
-		    ("bio spanned too many segments: %d, max: %d",
-		    nsegs, max_nsegs));
+		    ("bio %p spanned too many segments: %d, max: %d",
+		    bp, nsegs, max_nsegs));
 	}
 #endif
 
@@ -800,27 +785,22 @@ vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
 	VTBLK_LOCK_ASSERT(sc);
 
 	sglist_reset(sg);
-	error = sglist_append(sg, &req->vbr_hdr,
-	    sizeof(struct virtio_blk_outhdr));
-	KASSERT(error == 0, ("error adding header to sglist"));
-	KASSERT(sg->sg_nseg == 1,
-	    ("header spanned multiple segments: %d", sg->sg_nseg));
+
+	sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
 
 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
 		error = sglist_append(sg, bp->bio_data, bp->bio_bcount);
-		KASSERT(error == 0, ("error adding buffer to sglist"));
+		if (error || sg->sg_nseg == sg->sg_maxseg)
+			panic("%s: data buffer too big bio:%p error:%d",
+			    __FUNCTION__, bp, error);
 
 		/* BIO_READ means the host writes into our buffer. */
 		if (bp->bio_cmd == BIO_READ)
-			writable += sg->sg_nseg - 1;
+			writable = sg->sg_nseg - 1;
 	}
 
-	error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
-	KASSERT(error == 0, ("error adding ack to sglist"));
 	writable++;
-
-	KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
-	    ("fewer than min segments: %d", sg->sg_nseg));
+	sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
 
 	readable = sg->sg_nseg - writable;
 
@@ -995,12 +975,10 @@ vtblk_flush_dump(struct vtblk_softc *sc)
 static int
 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
 {
-	device_t dev;
 	struct virtqueue *vq;
 	struct vtblk_request *r;
 	int error;
 
-	dev = sc->vtblk_dev;
 	vq = sc->vtblk_vq;
 
 	if (!virtqueue_empty(vq))
@@ -1013,12 +991,12 @@ vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
 	virtqueue_notify(vq);
 
 	r = virtqueue_poll(vq, NULL);
-	KASSERT(r == req, ("unexpected request response"));
+	KASSERT(r == req, ("unexpected request response: %p/%p", r, req));
 
 	error = vtblk_request_error(req);
 	if (error && bootverbose) {
-		device_printf(dev, "vtblk_poll_request: IO error: %d\n",
-		    error);
+		device_printf(sc->vtblk_dev,
+		    "%s: IO error: %d\n", __FUNCTION__, error);
 	}
 
 	return (error);
@@ -1090,6 +1068,20 @@ vtblk_drain(struct vtblk_softc *sc)
 	vtblk_free_requests(sc);
 }
 
+#ifdef INVARIANTS
+static void
+vtblk_request_invariants(struct vtblk_request *req)
+{
+	int hdr_nsegs, ack_nsegs;
+
+	hdr_nsegs = sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr));
+	ack_nsegs = sglist_count(&req->vbr_ack, sizeof(req->vbr_ack));
+
+	KASSERT(hdr_nsegs == 1, ("request header crossed page boundary"));
+	KASSERT(ack_nsegs == 1, ("request ack crossed page boundary"));
+}
+#endif
+
 static int
 vtblk_alloc_requests(struct vtblk_softc *sc)
 {
@@ -1107,10 +1099,14 @@ vtblk_alloc_requests(struct vtblk_softc *sc)
 		nreqs /= VTBLK_MIN_SEGMENTS;
 
 	for (i = 0; i < nreqs; i++) {
-		req = uma_zalloc(vtblk_req_zone, M_NOWAIT);
+		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
 		if (req == NULL)
 			return (ENOMEM);
 
+#ifdef INVARIANTS
+		vtblk_request_invariants(req);
+#endif
+
 		sc->vtblk_request_count++;
 		vtblk_enqueue_request(sc, req);
 	}
@@ -1128,10 +1124,11 @@ vtblk_free_requests(struct vtblk_softc *sc)
 
 	while ((req = vtblk_dequeue_request(sc)) != NULL) {
 		sc->vtblk_request_count--;
-		uma_zfree(vtblk_req_zone, req);
+		free(req, M_DEVBUF);
 	}
 
-	KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
+	KASSERT(sc->vtblk_request_count == 0,
+	    ("leaked requests: %d", sc->vtblk_request_count));
 }
 
 static struct vtblk_request *
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 64a82ac028fe..e6fef9094db2 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -748,11 +748,9 @@ vtnet_is_link_up(struct vtnet_softc *sc)
 static void
 vtnet_update_link_status(struct vtnet_softc *sc)
 {
-	device_t dev;
 	struct ifnet *ifp;
 	int link;
 
-	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
 	link = vtnet_is_link_up(sc);
diff --git a/sys/dev/virtio/pci/virtio_pci.c b/sys/dev/virtio/pci/virtio_pci.c
index 56813e4a1a7b..917ca84af7f6 100644
--- a/sys/dev/virtio/pci/virtio_pci.c
+++ b/sys/dev/virtio/pci/virtio_pci.c
@@ -57,12 +57,15 @@ struct vtpci_softc {
 	struct resource			*vtpci_msix_res;
 	uint64_t			 vtpci_features;
 	uint32_t			 vtpci_flags;
-#define VIRTIO_PCI_FLAG_NO_MSI		 0x0001
-#define VIRTIO_PCI_FLAG_MSI		 0x0002
-#define VIRTIO_PCI_FLAG_NO_MSIX		 0x0010
-#define VIRTIO_PCI_FLAG_MSIX		 0x0020
-#define VIRTIO_PCI_FLAG_SHARED_MSIX	 0x0040
+#define VTPCI_FLAG_NO_MSI		0x0001
+#define VTPCI_FLAG_NO_MSIX		0x0002
+#define VTPCI_FLAG_LEGACY		0x1000
+#define VTPCI_FLAG_MSI			0x2000
+#define VTPCI_FLAG_MSIX			0x4000
+#define VTPCI_FLAG_SHARED_MSIX		0x8000
+#define VTPCI_FLAG_ITYPE_MASK		0xF000
 
+	/* This "bus" will only ever have one child. */
 	device_t			 vtpci_child_dev;
 	struct virtio_feature_desc	*vtpci_child_feat_desc;
 
@@ -80,7 +83,8 @@ struct vtpci_softc {
 	int				 vtpci_nvqs;
 	struct vtpci_virtqueue {
 		struct virtqueue *vq;
-
+		/* Device did not provide a callback for this virtqueue. */
+		int		  no_intr;
 		/* Index into vtpci_intr_res[] below. Unused, then -1. */
 		int		  ires_idx;
 	} vtpci_vqx[VIRTIO_MAX_VIRTQUEUES];
@@ -130,24 +134,39 @@ static void	vtpci_describe_features(struct vtpci_softc *, const char *,
 		    uint64_t);
 static void	vtpci_probe_and_attach_child(struct vtpci_softc *);
 
-static int	vtpci_alloc_interrupts(struct vtpci_softc *, int, int,
-		    struct vq_alloc_info *);
-static int	vtpci_alloc_intr_resources(struct vtpci_softc *, int,
-		    struct vq_alloc_info *);
-static int	vtpci_alloc_msi(struct vtpci_softc *);
-static int	vtpci_alloc_msix(struct vtpci_softc *, int);
+static int 	vtpci_alloc_msix(struct vtpci_softc *, int);
+static int 	vtpci_alloc_msi(struct vtpci_softc *);
+static int 	vtpci_alloc_intr_msix_pervq(struct vtpci_softc *);
+static int 	vtpci_alloc_intr_msix_shared(struct vtpci_softc *);
+static int 	vtpci_alloc_intr_msi(struct vtpci_softc *);
+static int 	vtpci_alloc_intr_legacy(struct vtpci_softc *);
+static int	vtpci_alloc_intr_resources(struct vtpci_softc *);
+
+static int 	vtpci_setup_legacy_interrupt(struct vtpci_softc *,
+		    enum intr_type);
+static int 	vtpci_setup_msix_interrupts(struct vtpci_softc *,
+		    enum intr_type);
+static int 	vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type);
+
 static int	vtpci_register_msix_vector(struct vtpci_softc *, int, int);
+static int 	vtpci_set_host_msix_vectors(struct vtpci_softc *);
+static int 	vtpci_reinit_virtqueue(struct vtpci_softc *, int);
 
 static void	vtpci_free_interrupts(struct vtpci_softc *);
 static void	vtpci_free_virtqueues(struct vtpci_softc *);
+static void 	vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *);
 static void	vtpci_release_child_resources(struct vtpci_softc *);
 static void	vtpci_reset(struct vtpci_softc *);
 
+static void	vtpci_select_virtqueue(struct vtpci_softc *, int);
+
 static int	vtpci_legacy_intr(void *);
 static int	vtpci_vq_shared_intr(void *);
 static int	vtpci_vq_intr(void *);
 static int	vtpci_config_intr(void *);
 
+#define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt
+
 /*
  * I/O port read/write wrappers.
  */
@@ -252,7 +271,7 @@ vtpci_attach(device_t dev)
 	}
 
 	if (pci_find_cap(dev, PCIY_MSI, NULL) != 0)
-		sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSI;
+		sc->vtpci_flags |= VTPCI_FLAG_NO_MSI;
 
 	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
 		rid = PCIR_BAR(1);
@@ -261,7 +280,7 @@ vtpci_attach(device_t dev)
 	}
 
 	if (sc->vtpci_msix_res == NULL)
-		sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSIX;
+		sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
 
 	vtpci_reset(sc);
 
@@ -372,6 +391,16 @@ vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
 
 	switch (index) {
 	case VIRTIO_IVAR_DEVTYPE:
+	case VIRTIO_IVAR_SUBDEVICE:
+		*result = pci_get_subdevice(dev);
+		break;
+	case VIRTIO_IVAR_VENDOR:
+		*result = pci_get_vendor(dev);
+		break;
+	case VIRTIO_IVAR_DEVICE:
+		*result = pci_get_device(dev);
+		break;
+	case VIRTIO_IVAR_SUBVENDOR:
 		*result = pci_get_subdevice(dev);
 		break;
 	default:
@@ -442,102 +471,97 @@ vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs,
     struct vq_alloc_info *vq_info)
 {
 	struct vtpci_softc *sc;
+	struct virtqueue *vq;
 	struct vtpci_virtqueue *vqx;
 	struct vq_alloc_info *info;
-	int queue, error;
-	uint16_t vq_size;
+	int idx, error;
+	uint16_t size;
 
 	sc = device_get_softc(dev);
+	error = 0;
 
-	if (sc->vtpci_nvqs != 0 || nvqs <= 0 ||
-	    nvqs > VIRTIO_MAX_VIRTQUEUES)
+	if (sc->vtpci_nvqs != 0)
+		return (EALREADY);
+	if (nvqs <= 0 || nvqs > VIRTIO_MAX_VIRTQUEUES)
 		return (EINVAL);
 
-	error = vtpci_alloc_interrupts(sc, flags, nvqs, vq_info);
-	if (error) {
-		device_printf(dev, "cannot allocate interrupts\n");
-		return (error);
-	}
+	if (flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX)
+		sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
 
-	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-		error = vtpci_register_msix_vector(sc,
-		    VIRTIO_MSI_CONFIG_VECTOR, 0);
-		if (error)
-			return (error);
-	}
+	for (idx = 0; idx < nvqs; idx++) {
+		vqx = &sc->vtpci_vqx[idx];
+		info = &vq_info[idx];
 
-	for (queue = 0; queue < nvqs; queue++) {
-		vqx = &sc->vtpci_vqx[queue];
-		info = &vq_info[queue];
+		vtpci_select_virtqueue(sc, idx);
+		size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
 
-		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
-
-		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
-		error = virtqueue_alloc(dev, queue, vq_size,
-		    VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq);
-		if (error)
-			return (error);
-
-		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-			error = vtpci_register_msix_vector(sc,
-			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
-			if (error)
-				return (error);
+		error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN,
+		    0xFFFFFFFFUL, info, &vq);
+		if (error) {
+			device_printf(dev,
+			    "cannot allocate virtqueue %d: %d\n", idx, error);
+			break;
 		}
 
 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
-		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+		    virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+
+		vqx->vq = *info->vqai_vq = vq;
+		vqx->no_intr = info->vqai_intr == NULL;
 
-		*info->vqai_vq = vqx->vq;
 		sc->vtpci_nvqs++;
 	}
 
-	return (0);
+	return (error);
 }
 
 static int
 vtpci_setup_intr(device_t dev, enum intr_type type)
 {
 	struct vtpci_softc *sc;
-	struct vtpci_intr_resource *ires;
-	struct vtpci_virtqueue *vqx;
-	int i, flags, error;
+	int attempt, error;
 
 	sc = device_get_softc(dev);
-	flags = type | INTR_MPSAFE;
-	ires = &sc->vtpci_intr_res[0];
 
-	if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) {
-		error = bus_setup_intr(dev, ires->irq, flags,
-		    vtpci_legacy_intr, NULL, sc, &ires->intrhand);
+	for (attempt = 0; attempt < 5; attempt++) {
+		/*
+		 * Start with the most desirable interrupt configuration and
+		 * fallback towards less desirable ones.
+		 */
+		switch (attempt) {
+		case 0:
+			error = vtpci_alloc_intr_msix_pervq(sc);
+			break;
+		case 1:
+			error = vtpci_alloc_intr_msix_shared(sc);
+			break;
+		case 2:
+			error = vtpci_alloc_intr_msi(sc);
+			break;
+		case 3:
+			error = vtpci_alloc_intr_legacy(sc);
+			break;
+		default:
+			device_printf(dev,
+			    "exhausted all interrupt allocation attempts\n");
+			return (ENXIO);
+		}
 
-		return (error);
+		if (error == 0 && vtpci_setup_interrupts(sc, type) == 0)
+			break;
+
+		vtpci_cleanup_setup_intr_attempt(sc);
 	}
 
-	error = bus_setup_intr(dev, ires->irq, flags, vtpci_config_intr,
-	    NULL, sc, &ires->intrhand);
-	if (error)
-		return (error);
-
-	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) {
-		ires = &sc->vtpci_intr_res[1];
-		error = bus_setup_intr(dev, ires->irq, flags,
-		    vtpci_vq_shared_intr, NULL, sc, &ires->intrhand);
-
-		return (error);
-	}
-
-	/* Setup an interrupt handler for each virtqueue. */
-	for (i = 0; i < sc->vtpci_nvqs; i++) {
-		vqx = &sc->vtpci_vqx[i];
-		if (vqx->ires_idx < 1)
-			continue;
-
-		ires = &sc->vtpci_intr_res[vqx->ires_idx];
-		error = bus_setup_intr(dev, ires->irq, flags,
-		    vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand);
-		if (error)
-			return (error);
+	if (bootverbose) {
+		if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
+			device_printf(dev, "using legacy interrupt\n");
+		else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
+			device_printf(dev, "using MSI interrupt\n");
+		else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
+			device_printf(dev, "using shared MSIX interrupts\n");
+		else
+			device_printf(dev, "using per VQ MSIX interrupts\n");
 	}
 
 	return (0);
@@ -554,20 +578,19 @@ static int
 vtpci_reinit(device_t dev, uint64_t features)
 {
 	struct vtpci_softc *sc;
-	struct vtpci_virtqueue *vqx;
-	struct virtqueue *vq;
-	int queue, error;
-	uint16_t vq_size;
+	int idx, error;
 
 	sc = device_get_softc(dev);
 
 	/*
-	 * Redrive the device initialization. This is a bit of an abuse
-	 * of the specification, but both VirtualBox and QEMU/KVM seem
-	 * to play nice. We do not allow the host device to change from
-	 * what was originally negotiated beyond what the guest driver
-	 * changed (MSIX state should not change, number of virtqueues
-	 * and their size remain the same, etc).
+	 * Redrive the device initialization. This is a bit of an abuse of
+	 * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to
+	 * play nice.
+	 *
+	 * We do not allow the host device to change from what was originally
+	 * negotiated beyond what the guest driver changed. MSIX state should
+	 * not change, number of virtqueues and their size remain the same, etc.
+	 * This will need to be rethought when we want to support migration.
 	 */
 
 	if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
@@ -582,34 +605,16 @@ vtpci_reinit(device_t dev, uint64_t features)
 
 	vtpci_negotiate_features(dev, features);
 
-	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-		error = vtpci_register_msix_vector(sc,
-		    VIRTIO_MSI_CONFIG_VECTOR, 0);
+	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
+		error = vtpci_reinit_virtqueue(sc, idx);
 		if (error)
 			return (error);
 	}
 
-	for (queue = 0; queue < sc->vtpci_nvqs; queue++) {
-		vqx = &sc->vtpci_vqx[queue];
-		vq = vqx->vq;
-
-		KASSERT(vq != NULL, ("vq %d not allocated", queue));
-		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
-
-		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
-		error = virtqueue_reinit(vq, vq_size);
+	if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
+		error = vtpci_set_host_msix_vectors(sc);
 		if (error)
 			return (error);
-
-		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-			error = vtpci_register_msix_vector(sc,
-			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
-			if (error)
-				return (error);
-		}
-
-		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
-		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
 	}
 
 	return (0);
@@ -744,7 +749,6 @@ vtpci_probe_and_attach_child(struct vtpci_softc *sc)
 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
 		vtpci_reset(sc);
 		vtpci_release_child_resources(sc);
-
 		/* Reset status for future attempt. */
 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
 	} else
@@ -752,42 +756,126 @@ vtpci_probe_and_attach_child(struct vtpci_softc *sc)
 }
 
 static int
-vtpci_alloc_interrupts(struct vtpci_softc *sc, int flags, int nvqs,
-    struct vq_alloc_info *vq_info)
+vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
 {
-	int i, nvectors, error;
+	device_t dev;
+	int nmsix, cnt, required;
 
-	/*
-	 * Only allocate a vector for virtqueues that are actually
-	 * expecting an interrupt.
-	 */
-	for (nvectors = 0, i = 0; i < nvqs; i++)
-		if (vq_info[i].vqai_intr != NULL)
-			nvectors++;
+	dev = sc->vtpci_dev;
 
-	if (vtpci_disable_msix != 0 ||
-	    sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSIX ||
-	    flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX ||
-	    vtpci_alloc_msix(sc, nvectors) != 0) {
-		/*
-		 * Use MSI interrupts if available. Otherwise, we fallback
-		 * to legacy interrupts.
-		 */
-		if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSI) == 0 &&
-		    vtpci_alloc_msi(sc) == 0)
-			sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI;
+	/* Allocate an additional vector for the config changes. */
+	required = nvectors + 1;
 
-		sc->vtpci_nintr_res = 1;
+	nmsix = pci_msix_count(dev);
+	if (nmsix < required)
+		return (1);
+
+	cnt = required;
+	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
+		sc->vtpci_nintr_res = required;
+		return (0);
 	}
 
-	error = vtpci_alloc_intr_resources(sc, nvqs, vq_info);
+	pci_release_msi(dev);
 
-	return (error);
+	return (1);
 }
 
 static int
-vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs,
-    struct vq_alloc_info *vq_info)
+vtpci_alloc_msi(struct vtpci_softc *sc)
+{
+	device_t dev;
+	int nmsi, cnt, required;
+
+	dev = sc->vtpci_dev;
+	required = 1;
+
+	nmsi = pci_msi_count(dev);
+	if (nmsi < required)
+		return (1);
+
+	cnt = required;
+	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
+		sc->vtpci_nintr_res = required;
+		return (0);
+	}
+
+	pci_release_msi(dev);
+
+	return (1);
+}
+
+static int
+vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc)
+{
+	int i, nvectors, error;
+
+	if (vtpci_disable_msix != 0 ||
+	    sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
+		return (ENOTSUP);
+
+	for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) {
+		if (sc->vtpci_vqx[i].no_intr == 0)
+			nvectors++;
+	}
+
+	error = vtpci_alloc_msix(sc, nvectors);
+	if (error)
+		return (error);
+
+	sc->vtpci_flags |= VTPCI_FLAG_MSIX;
+
+	return (0);
+}
+
+static int
+vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc)
+{
+	int error;
+
+	if (vtpci_disable_msix != 0 ||
+	    sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
+		return (ENOTSUP);
+
+	error = vtpci_alloc_msix(sc, 1);
+	if (error)
+		return (error);
+
+	sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX;
+
+	return (0);
+}
+
+static int
+vtpci_alloc_intr_msi(struct vtpci_softc *sc)
+{
+	int error;
+
+	/* Only BHyVe supports MSI. */
+	if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI)
+		return (ENOTSUP);
+
+	error = vtpci_alloc_msi(sc);
+	if (error)
+		return (error);
+
+	sc->vtpci_flags |= VTPCI_FLAG_MSI;
+
+	return (0);
+}
+
+static int
+vtpci_alloc_intr_legacy(struct vtpci_softc *sc)
+{
+
+	sc->vtpci_flags |= VTPCI_FLAG_LEGACY;
+	sc->vtpci_nintr_res = 1;
+
+	return (0);
+}
+
+static int
+vtpci_alloc_intr_resources(struct vtpci_softc *sc)
 {
 	device_t dev;
 	struct resource *irq;
@@ -795,14 +883,14 @@ vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs,
 	int i, rid, flags, res_idx;
 
 	dev = sc->vtpci_dev;
-	flags = RF_ACTIVE;
 
-	if ((sc->vtpci_flags &
-	    (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) == 0) {
+	if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) {
 		rid = 0;
-		flags |= RF_SHAREABLE;
-	} else
+		flags = RF_ACTIVE | RF_SHAREABLE;
+	} else {
 		rid = 1;
+		flags = RF_ACTIVE;
+	}
 
 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
 		irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, flags);
@@ -814,16 +902,16 @@ vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs,
 	}
 
 	/*
-	 * Map the virtqueue into the correct index in vq_intr_res[]. Note the
-	 * first index is reserved for configuration changes notifications.
+	 * Map the virtqueue into the correct index in vq_intr_res[]. The
+	 * first index is reserved for configuration changed notifications.
 	 */
-	for (i = 0, res_idx = 1; i < nvqs; i++) {
+	for (i = 0, res_idx = 1; i < sc->vtpci_nvqs; i++) {
 		vqx = &sc->vtpci_vqx[i];
 
-		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-			if (vq_info[i].vqai_intr == NULL)
+		if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
+			if (vqx->no_intr != 0)
 				vqx->ires_idx = -1;
-			else if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
+			else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
 				vqx->ires_idx = res_idx;
 			else
 				vqx->ires_idx = res_idx++;
@@ -835,110 +923,180 @@ vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs,
 }
 
 static int
-vtpci_alloc_msi(struct vtpci_softc *sc)
+vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type)
 {
 	device_t dev;
-	int nmsi, cnt;
+	struct vtpci_intr_resource *ires;
+	int error;
 
 	dev = sc->vtpci_dev;
-	nmsi = pci_msi_count(dev);
 
-	if (nmsi < 1)
-		return (1);
+	ires = &sc->vtpci_intr_res[0];
+	error = bus_setup_intr(dev, ires->irq, type, vtpci_legacy_intr, NULL,
+	    sc, &ires->intrhand);
 
-	cnt = 1;
-	if (pci_alloc_msi(dev, &cnt) == 0 && cnt == 1)
-		return (0);
-
-	return (1);
+	return (error);
 }
 
 static int
-vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
+vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type)
 {
 	device_t dev;
-	int nmsix, cnt, required;
+	struct vtpci_intr_resource *ires;
+	struct vtpci_virtqueue *vqx;
+	int i, error;
 
 	dev = sc->vtpci_dev;
 
-	nmsix = pci_msix_count(dev);
-	if (nmsix < 1)
-		return (1);
+	/*
+	 * The first resource is used for configuration changed interrupts.
+	 */
+	ires = &sc->vtpci_intr_res[0];
+	error = bus_setup_intr(dev, ires->irq, type, vtpci_config_intr,
+	    NULL, sc, &ires->intrhand);
+	if (error)
+		return (error);
 
-	/* An additional vector is needed for the config changes. */
-	required = nvectors + 1;
-	if (nmsix >= required) {
-		cnt = required;
-		if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required)
-			goto out;
+	if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) {
+		ires = &sc->vtpci_intr_res[1];
 
-		pci_release_msi(dev);
-	}
+		error = bus_setup_intr(dev, ires->irq, type,
+		    vtpci_vq_shared_intr, NULL, sc, &ires->intrhand);
+		if (error)
+			return (error);
+	} else {
+		/*
+		 * Each remaining resource is assigned to a specific virtqueue.
+		 */
+		for (i = 0; i < sc->vtpci_nvqs; i++) {
+			vqx = &sc->vtpci_vqx[i];
+			if (vqx->ires_idx < 1)
+				continue;
 
-	/* Attempt shared MSIX configuration. */
-	required = 2;
-	if (nmsix >= required) {
-		cnt = required;
-		if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
-			sc->vtpci_flags |= VIRTIO_PCI_FLAG_SHARED_MSIX;
-			goto out;
+			ires = &sc->vtpci_intr_res[vqx->ires_idx];
+			error = bus_setup_intr(dev, ires->irq, type,
+			    vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand);
+			if (error)
+				return (error);
 		}
-
-		pci_release_msi(dev);
 	}
 
-	return (1);
-
-out:
-	sc->vtpci_nintr_res = required;
-	sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX;
-
-	if (bootverbose) {
-		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
-			device_printf(dev, "using shared virtqueue MSIX\n");
-		else
-			device_printf(dev, "using per virtqueue MSIX\n");
-	}
+	error = vtpci_set_host_msix_vectors(sc);
+	if (error)
+		return (error);
 
 	return (0);
 }
 
+static int
+vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type)
+{
+	int error;
+
+	type |= INTR_MPSAFE;
+	KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK,
+	    ("no interrupt type selected: %#x", sc->vtpci_flags));
+
+	error = vtpci_alloc_intr_resources(sc);
+	if (error)
+		return (error);
+
+	if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
+		error = vtpci_setup_legacy_interrupt(sc, type);
+	else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
+		error = vtpci_setup_msi_interrupt(sc, type);
+	else
+		error = vtpci_setup_msix_interrupts(sc, type);
+
+	return (error);
+}
+
 static int
 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx)
 {
 	device_t dev;
-	uint16_t vector;
+	uint16_t vector, rdvector;
 
 	dev = sc->vtpci_dev;
 
-	if (offset != VIRTIO_MSI_CONFIG_VECTOR &&
-	    offset != VIRTIO_MSI_QUEUE_VECTOR)
-		return (EINVAL);
-
 	if (res_idx != -1) {
-		/* Map from rid to host vector. */
+		/* Map from guest rid to host vector. */
 		vector = sc->vtpci_intr_res[res_idx].rid - 1;
 	} else
 		vector = VIRTIO_MSI_NO_VECTOR;
 
-	/* The first resource is special; make sure it is used correctly. */
+	/*
+	 * Assert the first resource is always used for the configuration
+	 * changed interrupts.
+	 */
 	if (res_idx == 0) {
-		KASSERT(vector == 0, ("unexpected config vector"));
-		KASSERT(offset == VIRTIO_MSI_CONFIG_VECTOR,
-		    ("unexpected config offset"));
-	}
+		KASSERT(vector == 0 && offset == VIRTIO_MSI_CONFIG_VECTOR,
+		    ("bad first res use vector:%d offset:%d", vector, offset));
+	} else
+		KASSERT(offset == VIRTIO_MSI_QUEUE_VECTOR, ("bad offset"));
 
 	vtpci_write_config_2(sc, offset, vector);
 
-	if (vtpci_read_config_2(sc, offset) != vector) {
-		device_printf(dev, "insufficient host resources for "
-		    "MSIX interrupts\n");
+	/* Read vector to determine if the host had sufficient resources. */
+	rdvector = vtpci_read_config_2(sc, offset);
+	if (rdvector != vector) {
+		device_printf(dev,
+		    "insufficient host resources for MSIX interrupts\n");
 		return (ENODEV);
 	}
 
 	return (0);
 }
 
+static int
+vtpci_set_host_msix_vectors(struct vtpci_softc *sc)
+{
+	struct vtpci_virtqueue *vqx;
+	int idx, error;
+
+	error = vtpci_register_msix_vector(sc, VIRTIO_MSI_CONFIG_VECTOR, 0);
+	if (error)
+		return (error);
+
+	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
+		vqx = &sc->vtpci_vqx[idx];
+
+		vtpci_select_virtqueue(sc, idx);
+		error = vtpci_register_msix_vector(sc, VIRTIO_MSI_QUEUE_VECTOR,
+		    vqx->ires_idx);
+		if (error)
+			return (error);
+	}
+
+	return (0);
+}
+
+static int
+vtpci_reinit_virtqueue(struct vtpci_softc *sc, int idx)
+{
+	struct vtpci_virtqueue *vqx;
+	struct virtqueue *vq;
+	int error;
+	uint16_t size;
+
+	vqx = &sc->vtpci_vqx[idx];
+	vq = vqx->vq;
+
+	KASSERT(vq != NULL, ("vq %d not allocated", idx));
+
+	vtpci_select_virtqueue(sc, idx);
+	size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
+
+	error = virtqueue_reinit(vq, size);
+	if (error)
+		return (error);
+
+	vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
+	    virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+
+	return (0);
+}
+
 static void
 vtpci_free_interrupts(struct vtpci_softc *sc)
 {
@@ -947,15 +1105,8 @@ vtpci_free_interrupts(struct vtpci_softc *sc)
 	int i;
 
 	dev = sc->vtpci_dev;
-	sc->vtpci_nintr_res = 0;
 
-	if (sc->vtpci_flags & (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) {
-		pci_release_msi(dev);
-		sc->vtpci_flags &= ~(VIRTIO_PCI_FLAG_MSI |
-		    VIRTIO_PCI_FLAG_MSIX | VIRTIO_PCI_FLAG_SHARED_MSIX);
-	}
-
-	for (i = 0; i < 1 + VIRTIO_MAX_VIRTQUEUES; i++) {
+	for (i = 0; i < sc->vtpci_nintr_res; i++) {
 		ires = &sc->vtpci_intr_res[i];
 
 		if (ires->intrhand != NULL) {
@@ -971,6 +1122,12 @@ vtpci_free_interrupts(struct vtpci_softc *sc)
 
 		ires->rid = -1;
 	}
+
+	if (sc->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX))
+		pci_release_msi(dev);
+
+	sc->vtpci_nintr_res = 0;
+	sc->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK;
 }
 
 static void
@@ -979,16 +1136,33 @@ vtpci_free_virtqueues(struct vtpci_softc *sc)
 	struct vtpci_virtqueue *vqx;
 	int i;
 
-	sc->vtpci_nvqs = 0;
-
-	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) {
+	for (i = 0; i < sc->vtpci_nvqs; i++) {
 		vqx = &sc->vtpci_vqx[i];
 
-		if (vqx->vq != NULL) {
-			virtqueue_free(vqx->vq);
-			vqx->vq = NULL;
+		virtqueue_free(vqx->vq);
+		vqx->vq = NULL;
+	}
+
+	sc->vtpci_nvqs = 0;
+}
+
+static void
+vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *sc)
+{
+	int idx;
+
+	if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
+		vtpci_write_config_2(sc, VIRTIO_MSI_CONFIG_VECTOR,
+		    VIRTIO_MSI_NO_VECTOR);
+
+		for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
+			vtpci_select_virtqueue(sc, idx);
+			vtpci_write_config_2(sc, VIRTIO_MSI_QUEUE_VECTOR,
+			    VIRTIO_MSI_NO_VECTOR);
 		}
 	}
+
+	vtpci_free_interrupts(sc);
 }
 
 static void
@@ -1010,6 +1184,13 @@ vtpci_reset(struct vtpci_softc *sc)
 	vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
 }
 
+static void
+vtpci_select_virtqueue(struct vtpci_softc *sc, int idx)
+{
+
+	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, idx);
+}
+
 static int
 vtpci_legacy_intr(void *xsc)
 {
diff --git a/sys/dev/virtio/pci/virtio_pci.h b/sys/dev/virtio/pci/virtio_pci.h
index d8daa31e2d14..485cf4ff5b1c 100644
--- a/sys/dev/virtio/pci/virtio_pci.h
+++ b/sys/dev/virtio/pci/virtio_pci.h
@@ -73,7 +73,7 @@
  * configuration space.
  */
 #define VIRTIO_PCI_CONFIG(sc) \
-    (((sc)->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) ? 24 : 20)
+    (((sc)->vtpci_flags & VTPCI_FLAG_MSIX) ? 24 : 20)
 
 /*
  * How many bits to shift physical queue address written to QUEUE_PFN.
diff --git a/sys/dev/virtio/virtio.c b/sys/dev/virtio/virtio.c
index e38557542441..1cac3c7d15a6 100644
--- a/sys/dev/virtio/virtio.c
+++ b/sys/dev/virtio/virtio.c
@@ -86,28 +86,6 @@ virtio_device_name(uint16_t devid)
 	return (NULL);
 }
 
-int
-virtio_get_device_type(device_t dev)
-{
-	uintptr_t devtype;
-
-	devtype = -1;
-
-	BUS_READ_IVAR(device_get_parent(dev), dev,
-	    VIRTIO_IVAR_DEVTYPE, &devtype);
-
-	return ((int) devtype);
-}
-
-void
-virtio_set_feature_desc(device_t dev,
-    struct virtio_feature_desc *feature_desc)
-{
-
-	BUS_WRITE_IVAR(device_get_parent(dev), dev,
-	    VIRTIO_IVAR_FEATURE_DESC, (uintptr_t) feature_desc);
-}
-
 void
 virtio_describe(device_t dev, const char *msg,
     uint64_t features, struct virtio_feature_desc *feature_desc)
@@ -184,6 +162,21 @@ virtio_feature_name(uint64_t val, struct virtio_feature_desc *feature_desc)
  * VirtIO bus method wrappers.
  */
 
+void
+virtio_read_ivar(device_t dev, int ivar, uintptr_t *val)
+{
+
+	*val = -1;
+	BUS_READ_IVAR(device_get_parent(dev), dev, ivar, val);
+}
+
+void
+virtio_write_ivar(device_t dev, int ivar, uintptr_t val)
+{
+
+	BUS_WRITE_IVAR(device_get_parent(dev), dev, ivar, val);
+}
+
 uint64_t
 virtio_negotiate_features(device_t dev, uint64_t child_features)
 {
diff --git a/sys/dev/virtio/virtio.h b/sys/dev/virtio/virtio.h
index e0ecfb9c0dd9..8c22b12e7c9f 100644
--- a/sys/dev/virtio/virtio.h
+++ b/sys/dev/virtio/virtio.h
@@ -91,6 +91,10 @@ struct vq_alloc_info;
  */
 #define VIRTIO_IVAR_DEVTYPE		1
 #define VIRTIO_IVAR_FEATURE_DESC	2
+#define VIRTIO_IVAR_VENDOR		3
+#define VIRTIO_IVAR_DEVICE		4
+#define VIRTIO_IVAR_SUBVENDOR		5
+#define VIRTIO_IVAR_SUBDEVICE		6
 
 struct virtio_feature_desc {
 	uint64_t	 vfd_val;
@@ -98,15 +102,14 @@ struct virtio_feature_desc {
 };
 
 const char *virtio_device_name(uint16_t devid);
-int	 virtio_get_device_type(device_t dev);
-void	 virtio_set_feature_desc(device_t dev,
-	     struct virtio_feature_desc *feature_desc);
 void	 virtio_describe(device_t dev, const char *msg,
 	     uint64_t features, struct virtio_feature_desc *feature_desc);
 
 /*
  * VirtIO Bus Methods.
  */
+void	 virtio_read_ivar(device_t dev, int ivar, uintptr_t *val);
+void	 virtio_write_ivar(device_t dev, int ivar, uintptr_t val);
 uint64_t virtio_negotiate_features(device_t dev, uint64_t child_features);
 int	 virtio_alloc_virtqueues(device_t dev, int flags, int nvqs,
 	     struct vq_alloc_info *info);
@@ -148,4 +151,28 @@ VIRTIO_RDWR_DEVICE_CONFIG(1, uint8_t);
 VIRTIO_RDWR_DEVICE_CONFIG(2, uint16_t);
 VIRTIO_RDWR_DEVICE_CONFIG(4, uint32_t);
 
+#define VIRTIO_READ_IVAR(name, ivar)					\
+static inline int							\
+__CONCAT(virtio_get_,name)(device_t dev)				\
+{									\
+	uintptr_t val;							\
+	virtio_read_ivar(dev, ivar, &val);				\
+	return ((int) val);						\
+}
+
+VIRTIO_READ_IVAR(device_type,	VIRTIO_IVAR_DEVTYPE);
+VIRTIO_READ_IVAR(vendor,	VIRTIO_IVAR_VENDOR);
+VIRTIO_READ_IVAR(device,	VIRTIO_IVAR_DEVICE);
+VIRTIO_READ_IVAR(subvendor,	VIRTIO_IVAR_SUBVENDOR);
+VIRTIO_READ_IVAR(subdevice,	VIRTIO_IVAR_SUBDEVICE);
+
+#define VIRTIO_WRITE_IVAR(name, ivar)					\
+static inline void							\
+__CONCAT(virtio_set_,name)(device_t dev, void *val)			\
+{									\
+	virtio_write_ivar(dev, ivar, (uintptr_t) val);			\
+}
+
+VIRTIO_WRITE_IVAR(feature_desc,	VIRTIO_IVAR_FEATURE_DESC);
+
 #endif /* _VIRTIO_H_ */
diff --git a/sys/dev/virtio/virtio_ring.h b/sys/dev/virtio/virtio_ring.h
index 0d0001334aa6..8e902ab59c2b 100644
--- a/sys/dev/virtio/virtio_ring.h
+++ b/sys/dev/virtio/virtio_ring.h
@@ -120,8 +120,8 @@ struct vring {
  * We publish the used event index at the end of the available ring, and vice
  * versa. They are at the end for backwards compatibility.
  */
-#define	vring_used_event(vr)	((vr)->avail->ring[(vr)->num])
-#define	vring_avail_event(vr)	(*(uint16_t *)&(vr)->used->ring[(vr)->num])
+#define vring_used_event(vr)	((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr)	(*(uint16_t *)&(vr)->used->ring[(vr)->num])
 
 static inline int
 vring_size(unsigned int num, unsigned long align)
@@ -129,10 +129,11 @@ vring_size(unsigned int num, unsigned long align)
 	int size;
 
 	size = num * sizeof(struct vring_desc);
-	size += sizeof(struct vring_avail) + (num * sizeof(uint16_t));
+	size += sizeof(struct vring_avail) + (num * sizeof(uint16_t)) +
+	    sizeof(uint16_t);
 	size = (size + align - 1) & ~(align - 1);
 	size += sizeof(struct vring_used) +
-	    (num * sizeof(struct vring_used_elem));
+	    (num * sizeof(struct vring_used_elem)) + sizeof(uint16_t);
 	return (size);
 }
 
diff --git a/sys/dev/virtio/virtqueue.c b/sys/dev/virtio/virtqueue.c
index 31f47d0380dc..83d39ba2ed94 100644
--- a/sys/dev/virtio/virtqueue.c
+++ b/sys/dev/virtio/virtqueue.c
@@ -127,6 +127,7 @@ static uint16_t	vq_ring_enqueue_segments(struct virtqueue *,
 static int	vq_ring_use_indirect(struct virtqueue *, int);
 static void	vq_ring_enqueue_indirect(struct virtqueue *, void *,
 		    struct sglist *, int, int);
+static int 	vq_ring_enable_interrupt(struct virtqueue *, uint16_t);
 static int	vq_ring_must_notify_host(struct virtqueue *);
 static void	vq_ring_notify_host(struct virtqueue *);
 static void	vq_ring_free_chain(struct virtqueue *, uint16_t);
@@ -311,7 +312,7 @@ virtqueue_reinit(struct virtqueue *vq, uint16_t size)
 	/* Warn if the virtqueue was not properly cleaned up. */
 	if (vq->vq_free_cnt != vq->vq_nentries) {
 		device_printf(vq->vq_dev,
-		    "%s: warning, '%s' virtqueue not empty, "
+		    "%s: warning '%s' virtqueue not empty, "
 		    "leaking %d entries\n", __func__, vq->vq_name,
 		    vq->vq_nentries - vq->vq_free_cnt);
 	}
@@ -390,6 +391,7 @@ virtqueue_full(struct virtqueue *vq)
 void
 virtqueue_notify(struct virtqueue *vq)
 {
+
 	/* Ensure updated avail->idx is visible to host. */
 	mb();
 
@@ -428,58 +430,22 @@ int
 virtqueue_enable_intr(struct virtqueue *vq)
 {
 
-	/*
-	 * Enable interrupts, making sure we get the latest
-	 * index of what's already been consumed.
-	 */
-	vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
-	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
-		vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx;
-       else
-	       vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
-
-	mb();
-
-	/*
-	 * Additional items may have been consumed in the time between
-	 * since we last checked and enabled interrupts above. Let our
-	 * caller know so it processes the new entries.
-	 */
-	if (vq->vq_used_cons_idx != vq->vq_ring.used->idx)
-		return (1);
-
-	return (0);
+	return (vq_ring_enable_interrupt(vq, 0));
 }
 
 int
 virtqueue_postpone_intr(struct virtqueue *vq)
 {
-	uint16_t ndesc;
+	uint16_t ndesc, avail_idx;
 
 	/*
-	 * Postpone until at least half of the available descriptors
-	 * have been consumed.
-	 *
-	 * XXX Adaptive factor? (Linux uses 3/4)
+	 * Request the next interrupt be postponed until at least half
+	 * of the available descriptors have been consumed.
 	 */
-	ndesc = (uint16_t)(vq->vq_ring.avail->idx - vq->vq_used_cons_idx) / 2;
+	avail_idx = vq->vq_ring.avail->idx;
+	ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx) / 2;
 
-	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
-		vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc;
-	else
-		vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
-
-	mb();
-
-	/*
-	 * Enough items may have already been consumed to meet our
-	 * threshold since we last checked. Let our caller know so
-	 * it processes the new entries.
-	 */
-	if (virtqueue_nused(vq) > ndesc)
-		return (1);
-
-	return (0);
+	return (vq_ring_enable_interrupt(vq, ndesc));
 }
 
 void
@@ -751,6 +717,32 @@ vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie,
 	vq_ring_update_avail(vq, head_idx);
 }
 
+static int
+vq_ring_enable_interrupt(struct virtqueue *vq, uint16_t ndesc)
+{
+
+	/*
+	 * Enable interrupts, making sure we get the latest index of
+	 * what's already been consumed.
+	 */
+	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
+		vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc;
+	else
+		vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+
+	mb();
+
+	/*
+	 * Enough items may have already been consumed to meet our threshold
+	 * since we last checked. Let our caller know so it processes the new
+	 * entries.
+	 */
+	if (virtqueue_nused(vq) > ndesc)
+		return (1);
+
+	return (0);
+}
+
 static int
 vq_ring_must_notify_host(struct virtqueue *vq)
 {
@@ -788,8 +780,8 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 		VQ_RING_ASSERT_CHAIN_TERM(vq);
 
 	vq->vq_free_cnt += dxp->ndescs;
-	dxp->ndescs--;
 
+#ifdef INVARIANTS
 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
 		while (dp->flags & VRING_DESC_F_NEXT) {
 			VQ_RING_ASSERT_VALID_IDX(vq, dp->next);
@@ -797,7 +789,10 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 			dxp->ndescs--;
 		}
 	}
-	VQASSERT(vq, dxp->ndescs == 0, "failed to free entire desc chain");
+	VQASSERT(vq, dxp->ndescs == 1,
+	    "failed to free entire desc chain, remaining: %d", dxp->ndescs);
+#endif
+	dxp->ndescs = 0;
 
 	/*
 	 * We must append the existing free chain, if any, to the end of
diff --git a/sys/dev/virtio/virtqueue.h b/sys/dev/virtio/virtqueue.h
index a84d7a144698..0296b8c2fba4 100644
--- a/sys/dev/virtio/virtqueue.h
+++ b/sys/dev/virtio/virtqueue.h
@@ -35,11 +35,7 @@ struct sglist;
 /* Support for indirect buffer descriptors. */
 #define VIRTIO_RING_F_INDIRECT_DESC	(1 << 28)
 
-/* The guest publishes the used index for which it expects an interrupt
- * at the end of the avail ring. Host should ignore the avail->flags field.
- * The host publishes the avail index for which it expects a kick
- * at the end of the used ring. Guest should ignore the used->flags field.
- */
+/* Support to suppress interrupt until specific index is reached. */
 #define VIRTIO_RING_F_EVENT_IDX		(1 << 29)
 
 /* Device callback for a virtqueue interrupt. */