From d49fc192c14136825eb5c5fd1ebb957c0b909cc0 Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Sat, 2 Feb 2019 04:15:16 +0000 Subject: [PATCH] powerpc/powernv: Add a driver for the POWER9 XIVE interrupt controller The XIVE (External Interrupt Virtualization Engine) is a new interrupt controller present in IBM's POWER9 processor. It's a very powerful, very complex device using queues and shared memory to improve interrupt dispatch performance in a virtualized environment. This yields a ~10% performance improvment over the XICS emulation mode, measured in both buildworld, and 'dd' from nvme to /dev/null. Currently, this only supports native access. MFC after: 1 month --- sys/conf/files.powerpc | 1 + sys/powerpc/powernv/opal.h | 26 + sys/powerpc/powernv/platform_powernv.c | 6 +- sys/powerpc/powernv/xive.c | 764 +++++++++++++++++++++++++ sys/powerpc/pseries/xics.c | 15 +- 5 files changed, 804 insertions(+), 8 deletions(-) create mode 100644 sys/powerpc/powernv/xive.c diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 60c6f2304bdd..47287515b5b2 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -201,6 +201,7 @@ powerpc/powernv/opalcall.S optional powernv powerpc/powernv/platform_powernv.c optional powernv powerpc/powernv/powernv_centaur.c optional powernv powerpc/powernv/powernv_xscom.c optional powernv +powerpc/powernv/xive.c optional powernv powerpc/powerpc/altivec.c optional powerpc | powerpc64 powerpc/powerpc/autoconf.c standard powerpc/powerpc/bus_machdep.c standard diff --git a/sys/powerpc/powernv/opal.h b/sys/powerpc/powernv/opal.h index fafeec390e68..043b61a291a3 100644 --- a/sys/powerpc/powernv/opal.h +++ b/sys/powerpc/powernv/opal.h @@ -82,6 +82,20 @@ int opal_call(uint64_t token, ...); #define OPAL_INT_SET_MFRR 125 #define OPAL_PCI_TCE_KILL 126 #define OPAL_XIVE_RESET 128 +#define OPAL_XIVE_GET_IRQ_INFO 129 +#define OPAL_XIVE_GET_IRQ_CONFIG 130 +#define OPAL_XIVE_SET_IRQ_CONFIG 131 +#define OPAL_XIVE_GET_QUEUE_INFO 132 +#define OPAL_XIVE_SET_QUEUE_INFO 133 +#define OPAL_XIVE_DONATE_PAGE 134 +#define OPAL_XIVE_ALLOCATE_VP_BLOCK 135 +#define OPAL_XIVE_FREE_VP_BLOCK 136 +#define OPAL_XIVE_GET_VP_INFO 137 +#define OPAL_XIVE_SET_VP_INFO 138 +#define OPAL_XIVE_ALLOCATE_IRQ 139 +#define OPAL_XIVE_FREE_IRQ 140 +#define OPAL_XIVE_SYNC 141 +#define OPAL_XIVE_DUMP 142 #define OPAL_SENSOR_GROUP_CLEAR 156 #define OPAL_SENSOR_READ_U64 162 #define OPAL_SENSOR_GROUP_ENABLE 163 @@ -114,6 +128,18 @@ int opal_call(uint64_t token, ...); #define OPAL_BUSY_EVENT -12 #define OPAL_ASYNC_COMPLETION -15 #define OPAL_EMPTY -16 +#define OPAL_XIVE_PROVISIONING -31 +#define OPAL_XIVE_FREE_ACTIVE -32 + +#define OPAL_XIVE_XICS_MODE_EMU 0 +#define OPAL_XIVE_XICS_MODE_EXP 1 + +#define OPAL_XIVE_VP_ENABLED 0x00000001 +#define OPAL_XIVE_VP_SINGLE_ESCALATION 0x00000002 + +#define OPAL_XIVE_EQ_ENABLED 0x00000001 +#define OPAL_XIVE_EQ_ALWAYS_NOTIFY 0x00000002 +#define OPAL_XIVE_EQ_ESCALATE 0x00000004 struct opal_msg { uint32_t msg_type; diff --git a/sys/powerpc/powernv/platform_powernv.c b/sys/powerpc/powernv/platform_powernv.c index afcb3d5b90b8..1291eb02e4e5 100644 --- a/sys/powerpc/powernv/platform_powernv.c +++ b/sys/powerpc/powernv/platform_powernv.c @@ -59,7 +59,8 @@ __FBSDID("$FreeBSD$"); extern void *ap_pcpu; #endif -extern void xicp_smp_cpu_startup(void); +void (*powernv_smp_ap_extra_init)(void); + static int powernv_probe(platform_t); static int powernv_attach(platform_t); void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz, @@ -473,7 +474,8 @@ static void powernv_smp_ap_init(platform_t platform) { - xicp_smp_cpu_startup(); + if (powernv_smp_ap_extra_init != NULL) + powernv_smp_ap_extra_init(); } static void diff --git a/sys/powerpc/powernv/xive.c b/sys/powerpc/powernv/xive.c new file mode 100644 index 000000000000..0c1406199ac5 --- /dev/null +++ b/sys/powerpc/powernv/xive.c @@ -0,0 +1,764 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright 2019 Justin Hibbits + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_platform.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#ifdef POWERNV +#include +#endif + +#include "pic_if.h" + +#define XIVE_PRIORITY 7 /* Random non-zero number */ +#define MAX_XIVE_IRQS (1<<24) /* 24-bit XIRR field */ + +/* Registers */ +#define XIVE_TM_QW1_OS 0x010 /* Guest OS registers */ +#define XIVE_TM_QW2_HV_POOL 0x020 /* Hypervisor pool registers */ +#define XIVE_TM_QW3_HV 0x030 /* Hypervisor registers */ + +#define XIVE_TM_NSR 0x00 +#define XIVE_TM_CPPR 0x01 +#define XIVE_TM_IPB 0x02 +#define XIVE_TM_LSMFB 0x03 +#define XIVE_TM_ACK_CNT 0x04 +#define XIVE_TM_INC 0x05 +#define XIVE_TM_AGE 0x06 +#define XIVE_TM_PIPR 0x07 + +#define TM_WORD0 0x0 +#define TM_WORD2 0x8 +#define TM_QW2W2_VP 0x80000000 + +#define XIVE_TM_SPC_ACK 0x800 +#define TM_QW3NSR_HE_SHIFT 14 +#define TM_QW3_NSR_HE_NONE 0 +#define TM_QW3_NSR_HE_POOL 1 +#define TM_QW3_NSR_HE_PHYS 2 +#define TM_QW3_NSR_HE_LSI 3 +#define XIVE_TM_SPC_PULL_POOL_CTX 0x828 + +#define XIVE_IRQ_LOAD_EOI 0x000 +#define XIVE_IRQ_STORE_EOI 0x400 +#define XIVE_IRQ_PQ_00 0xc00 +#define XIVE_IRQ_PQ_01 0xd00 + +#define XIVE_IRQ_VAL_P 0x02 +#define XIVE_IRQ_VAL_Q 0x01 + +struct xive_softc; +struct xive_irq; + +extern void (*powernv_smp_ap_extra_init)(void); + +/* Private support */ +static void xive_setup_cpu(void); +static void xive_smp_cpu_startup(void); +static void xive_init_irq(struct xive_irq *irqd, u_int irq); +static struct xive_irq *xive_configure_irq(u_int irq); +static int xive_provision_page(struct xive_softc *sc); + + +/* Interfaces */ +static int xive_probe(device_t); +static int xive_attach(device_t); +static int xics_probe(device_t); +static int xics_attach(device_t); + +static void xive_bind(device_t, u_int, cpuset_t, void **); +static void xive_dispatch(device_t, struct trapframe *); +static void xive_enable(device_t, u_int, u_int, void **); +static void xive_eoi(device_t, u_int, void *); +static void xive_ipi(device_t, u_int); +static void xive_mask(device_t, u_int, void *); +static void xive_unmask(device_t, u_int, void *); +static void xive_translate_code(device_t dev, u_int irq, int code, + enum intr_trigger *trig, enum intr_polarity *pol); + +static device_method_t xive_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xive_probe), + DEVMETHOD(device_attach, xive_attach), + + /* PIC interface */ + DEVMETHOD(pic_bind, xive_bind), + DEVMETHOD(pic_dispatch, xive_dispatch), + DEVMETHOD(pic_enable, xive_enable), + DEVMETHOD(pic_eoi, xive_eoi), + DEVMETHOD(pic_ipi, xive_ipi), + DEVMETHOD(pic_mask, xive_mask), + DEVMETHOD(pic_unmask, xive_unmask), + DEVMETHOD(pic_translate_code, xive_translate_code), + + DEVMETHOD_END +}; + +static device_method_t xics_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xics_probe), + DEVMETHOD(device_attach, xics_attach), + + DEVMETHOD_END +}; + +struct xive_softc { + struct mtx sc_mtx; + struct resource *sc_mem; + vm_size_t sc_prov_page_size; + uint32_t sc_offset; +}; + +struct xive_queue { + uint32_t *q_page; + uint32_t *q_eoi_page; + uint32_t q_toggle; + uint32_t q_size; + uint32_t q_index; + uint32_t q_mask; +}; + +struct xive_irq { + uint32_t girq; + uint32_t lirq; + uint64_t vp; + uint64_t flags; +#define OPAL_XIVE_IRQ_EOI_VIA_FW 0x00000020 +#define OPAL_XIVE_IRQ_MASK_VIA_FW 0x00000010 +#define OPAL_XIVE_IRQ_SHIFT_BUG 0x00000008 +#define OPAL_XIVE_IRQ_LSI 0x00000004 +#define OPAL_XIVE_IRQ_STORE_EOI 0x00000002 +#define OPAL_XIVE_IRQ_TRIGGER_PAGE 0x00000001 + uint8_t prio; + vm_offset_t eoi_page; + vm_offset_t trig_page; + vm_size_t esb_size; + int chip; +}; + +struct xive_cpu { + uint64_t vp; + uint64_t flags; + struct xive_irq ipi_data; + struct xive_queue queue; /* We only use a single queue for now. */ + uint64_t cam; + uint32_t chip; +}; + +static driver_t xive_driver = { + "xive", + xive_methods, + sizeof(struct xive_softc) +}; + +static driver_t xics_driver = { + "xivevc", + xics_methods, + 0 +}; + +static devclass_t xive_devclass; +static devclass_t xics_devclass; + +EARLY_DRIVER_MODULE(xive, ofwbus, xive_driver, xive_devclass, 0, 0, + BUS_PASS_INTERRUPT-1); +EARLY_DRIVER_MODULE(xivevc, ofwbus, xics_driver, xics_devclass, 0, 0, + BUS_PASS_INTERRUPT); + +MALLOC_DEFINE(M_XIVE, "xive", "XIVE Memory"); + +DPCPU_DEFINE_STATIC(struct xive_cpu, xive_cpu_data); + +static int xive_ipi_vector = -1; + +/* + * XIVE Exploitation mode driver. + * + * The XIVE, present in the POWER9 CPU, can run in two modes: XICS emulation + * mode, and "Exploitation mode". XICS emulation mode is compatible with the + * POWER8 and earlier XICS interrupt controller, using OPAL calls to emulate + * hypervisor calls and memory accesses. Exploitation mode gives us raw access + * to the XIVE MMIO, improving performance significantly. + * + * The XIVE controller is a very bizarre interrupt controller. It uses queues + * in memory to pass interrupts around, and maps itself into 512GB of physical + * device address space, giving each interrupt in the system one or more pages + * of address space. An IRQ is tied to a virtual processor, which could be a + * physical CPU thread, or a guest CPU thread (LPAR running on a physical + * thread). Thus, the controller can route interrupts directly to guest OSes + * bypassing processing by the hypervisor, thereby improving performance of the + * guest OS. + * + * An IRQ, in addition to being tied to a virtual processor, has one or two + * page mappings: an EOI page, and an optional trigger page. The trigger page + * could be the same as the EOI page. Level-sensitive interrupts (LSIs) don't + * have a trigger page, as they're external interrupts controlled by physical + * lines. MSIs and IPIs have trigger pages. An IPI is really just another IRQ + * in the XIVE, which is triggered by software. + * + * An interesting behavior of the XIVE controller is that oftentimes the + * contents of an address location don't actually matter, but the direction of + * the action is the signifier (read vs write), and the address is significant. + * Hence, masking and unmasking an interrupt is done by reading different + * addresses in the EOI page, and triggering an interrupt consists of writing to + * the trigger page. + * + * Additionally, the MMIO region mapped is CPU-sensitive, just like the + * per-processor register space (private access) in OpenPIC. In order for a CPU + * to receive interrupts it must itself configure its CPPR (Current Processor + * Priority Register), it cannot be set by any other processor. This + * necessitates the xive_smp_cpu_startup() function. + * + * Queues are pages of memory, sized powers-of-two, that are shared with the + * XIVE. The XIVE writes into the queue with an alternating polarity bit, which + * flips when the queue wraps. + */ + +/* + * Offset-based read/write interfaces. + */ +static uint16_t +xive_read_2(struct xive_softc *sc, bus_size_t offset) +{ + + return (bus_read_2(sc->sc_mem, sc->sc_offset + offset)); +} + +static void +xive_write_1(struct xive_softc *sc, bus_size_t offset, uint8_t val) +{ + + bus_write_1(sc->sc_mem, sc->sc_offset + offset, val); +} + +/* EOI and Trigger page access interfaces. */ +static uint64_t +xive_read_mmap8(vm_offset_t addr) +{ + return (*(volatile uint64_t *)addr); +} + +static void +xive_write_mmap8(vm_offset_t addr, uint64_t val) +{ + *(uint64_t *)(addr) = val; +} + + +/* Device interfaces. */ +static int +xive_probe(device_t dev) +{ + + if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-pe")) + return (ENXIO); + + device_set_desc(dev, "External Interrupt Virtualization Engine"); + + /* Make sure we always win against the xicp driver. */ + return (BUS_PROBE_DEFAULT); +} + +static int +xics_probe(device_t dev) +{ + + if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-vc")) + return (ENXIO); + + device_set_desc(dev, "External Interrupt Virtualization Engine Root"); + return (BUS_PROBE_DEFAULT); +} + +static int +xive_attach(device_t dev) +{ + struct xive_softc *sc = device_get_softc(dev); + struct xive_cpu *xive_cpud; + phandle_t phandle = ofw_bus_get_node(dev); + int64_t vp_block; + int error; + int rid; + int i, order; + uint64_t vp_id; + int64_t ipi_irq; + + opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EXP); + + error = OF_getencprop(phandle, "ibm,xive-provision-page-size", + (pcell_t *)&sc->sc_prov_page_size, sizeof(sc->sc_prov_page_size)); + + rid = 1; /* Get the Hypervisor-level register set. */ + sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + sc->sc_offset = XIVE_TM_QW3_HV; + + mtx_init(&sc->sc_mtx, "XIVE", NULL, MTX_DEF); + + order = fls(mp_maxid + (mp_maxid - 1)) - 1; + + do { + vp_block = opal_call(OPAL_XIVE_ALLOCATE_VP_BLOCK, order); + if (vp_block == OPAL_BUSY) + DELAY(10); + else if (vp_block == OPAL_XIVE_PROVISIONING) + xive_provision_page(sc); + else + break; + } while (1); + + if (vp_block < 0) { + device_printf(dev, + "Unable to allocate VP block. Opal error %d\n", + (int)vp_block); + bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->sc_mem); + return (ENXIO); + } + + /* + * Set up the VPs. Try to do as much as we can in attach, to lessen + * what's needed at AP spawn time. + */ + CPU_FOREACH(i) { + vp_id = pcpu_find(i)->pc_hwref; + + xive_cpud = DPCPU_ID_PTR(i, xive_cpu_data); + xive_cpud->vp = vp_id + vp_block; + opal_call(OPAL_XIVE_GET_VP_INFO, xive_cpud->vp, NULL, + vtophys(&xive_cpud->cam), NULL, vtophys(&xive_cpud->chip)); + + /* Allocate the queue page and populate the queue state data. */ + xive_cpud->queue.q_page = contigmalloc(PAGE_SIZE, M_XIVE, + M_ZERO | M_WAITOK, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); + xive_cpud->queue.q_size = 1 << PAGE_SHIFT; + xive_cpud->queue.q_mask = + ((xive_cpud->queue.q_size / sizeof(int)) - 1); + xive_cpud->queue.q_toggle = 0; + xive_cpud->queue.q_index = 0; + do { + error = opal_call(OPAL_XIVE_SET_VP_INFO, xive_cpud->vp, + OPAL_XIVE_VP_ENABLED, 0); + } while (error == OPAL_BUSY); + error = opal_call(OPAL_XIVE_SET_QUEUE_INFO, vp_id, + XIVE_PRIORITY, vtophys(xive_cpud->queue.q_page), PAGE_SHIFT, + OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED); + + do { + ipi_irq = opal_call(OPAL_XIVE_ALLOCATE_IRQ, + xive_cpud->chip); + } while (ipi_irq == OPAL_BUSY); + + if (ipi_irq < 0) + device_printf(root_pic, + "Failed allocating IPI. OPAL error %d\n", + (int)ipi_irq); + else { + xive_init_irq(&xive_cpud->ipi_data, ipi_irq); + xive_cpud->ipi_data.vp = vp_id; + xive_cpud->ipi_data.lirq = MAX_XIVE_IRQS; + opal_call(OPAL_XIVE_SET_IRQ_CONFIG, ipi_irq, + xive_cpud->ipi_data.vp, XIVE_PRIORITY, + MAX_XIVE_IRQS); + } + } + + powerpc_register_pic(dev, OF_xref_from_node(phandle), MAX_XIVE_IRQS, + 1 /* Number of IPIs */, FALSE); + root_pic = dev; + + xive_setup_cpu(); + powernv_smp_ap_extra_init = xive_smp_cpu_startup; + + return (0); +} + +static int +xics_attach(device_t dev) +{ + phandle_t phandle = ofw_bus_get_node(dev); + + /* The XIVE (root PIC) will handle all our interrupts */ + powerpc_register_pic(root_pic, OF_xref_from_node(phandle), + MAX_XIVE_IRQS, 1 /* Number of IPIs */, FALSE); + + return (0); +} + +/* + * PIC I/F methods. + */ + +static void +xive_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv) +{ + struct xive_irq *irqd; + int cpu; + int ncpus, i, error; + + if (*priv == NULL) + *priv = xive_configure_irq(irq); + + irqd = *priv; + + /* + * This doesn't appear to actually support affinity groups, so pick a + * random CPU. + */ + ncpus = 0; + CPU_FOREACH(cpu) + if (CPU_ISSET(cpu, &cpumask)) ncpus++; + + i = mftb() % ncpus; + ncpus = 0; + CPU_FOREACH(cpu) { + if (!CPU_ISSET(cpu, &cpumask)) + continue; + if (ncpus == i) + break; + ncpus++; + } + + opal_call(OPAL_XIVE_SYNC); + + irqd->vp = pcpu_find(cpu)->pc_hwref; + error = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, irqd->vp, + XIVE_PRIORITY, irqd->lirq); + + if (error < 0) + panic("Cannot bind interrupt %d to CPU %d", irq, cpu); + + xive_eoi(dev, irq, irqd); +} + +/* Read the next entry in the queue page and update the index. */ +static int +xive_read_eq(struct xive_queue *q) +{ + uint32_t i = be32toh(q->q_page[q->q_index]); + + /* Check validity, using current queue polarity. */ + if ((i >> 31) == q->q_toggle) + return (0); + + q->q_index = (q->q_index + 1) & q->q_mask; + + if (q->q_index == 0) + q->q_toggle ^= 1; + + return (i & 0x7fffffff); +} + +static void +xive_dispatch(device_t dev, struct trapframe *tf) +{ + struct xive_softc *sc; + struct xive_cpu *xive_cpud; + uint32_t vector; + uint16_t ack; + uint8_t cppr, he; + + sc = device_get_softc(dev); + + for (;;) { + ack = xive_read_2(sc, XIVE_TM_SPC_ACK); + cppr = (ack & 0xff); + + he = ack >> TM_QW3NSR_HE_SHIFT; + + if (he == TM_QW3_NSR_HE_NONE) + break; + switch (he) { + case TM_QW3_NSR_HE_NONE: + goto end; + case TM_QW3_NSR_HE_POOL: + case TM_QW3_NSR_HE_LSI: + device_printf(dev, + "Unexpected interrupt he type: %d\n", he); + goto end; + case TM_QW3_NSR_HE_PHYS: + break; + } + + xive_cpud = DPCPU_PTR(xive_cpu_data); + xive_write_1(sc, XIVE_TM_CPPR, cppr); + + for (;;) { + vector = xive_read_eq(&xive_cpud->queue); + + if (vector == 0) + break; + + if (vector == MAX_XIVE_IRQS) + vector = xive_ipi_vector; + + powerpc_dispatch_intr(vector, tf); + } + } +end: + xive_write_1(sc, XIVE_TM_CPPR, 0xff); +} + +static void +xive_enable(device_t dev, u_int irq, u_int vector, void **priv) +{ + struct xive_irq *irqd; + cell_t status, cpu; + + if (irq == MAX_XIVE_IRQS) { + if (xive_ipi_vector == -1) + xive_ipi_vector = vector; + return; + } + if (*priv == NULL) + *priv = xive_configure_irq(irq); + + irqd = *priv; + + /* Bind to this CPU to start */ + cpu = PCPU_GET(hwref); + irqd->lirq = vector; + + for (;;) { + status = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, cpu, + XIVE_PRIORITY, vector); + if (status != OPAL_BUSY) + break; + DELAY(10); + } + + if (status != 0) + panic("OPAL_SET_XIVE IRQ %d -> cpu %d failed: %d", irq, + cpu, status); + + xive_unmask(dev, irq, *priv); +} + +static void +xive_eoi(device_t dev, u_int irq, void *priv) +{ + struct xive_irq *rirq; + struct xive_cpu *cpud; + uint8_t eoi_val; + + if (irq == MAX_XIVE_IRQS) { + cpud = DPCPU_PTR(xive_cpu_data); + rirq = &cpud->ipi_data; + } else + rirq = priv; + + if (rirq->flags & OPAL_XIVE_IRQ_EOI_VIA_FW) + opal_call(OPAL_INT_EOI, irq); + else if (rirq->flags & OPAL_XIVE_IRQ_STORE_EOI) + xive_write_mmap8(rirq->eoi_page + XIVE_IRQ_STORE_EOI, 0); + else if (rirq->flags & OPAL_XIVE_IRQ_LSI) + xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_LOAD_EOI); + else { + eoi_val = xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00); + if ((eoi_val & XIVE_IRQ_VAL_Q) && rirq->trig_page != 0) + xive_write_mmap8(rirq->trig_page, 0); + } +} + +static void +xive_ipi(device_t dev, u_int cpu) +{ + struct xive_cpu *xive_cpud; + + xive_cpud = DPCPU_ID_PTR(cpu, xive_cpu_data); + + if (xive_cpud->ipi_data.trig_page == 0) + return; + xive_write_mmap8(xive_cpud->ipi_data.trig_page, 0); +} + +static void +xive_mask(device_t dev, u_int irq, void *priv) +{ + struct xive_irq *rirq; + + /* Never mask IPIs */ + if (irq == MAX_XIVE_IRQS) + return; + + rirq = priv; + + if (!(rirq->flags & OPAL_XIVE_IRQ_LSI)) + return; + xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_01); +} + +static void +xive_unmask(device_t dev, u_int irq, void *priv) +{ + struct xive_irq *rirq; + + rirq = priv; + + xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00); +} + +static void +xive_translate_code(device_t dev, u_int irq, int code, + enum intr_trigger *trig, enum intr_polarity *pol) +{ + switch (code) { + case 0: + /* L to H edge */ + *trig = INTR_TRIGGER_EDGE; + *pol = INTR_POLARITY_HIGH; + break; + case 1: + /* Active L level */ + *trig = INTR_TRIGGER_LEVEL; + *pol = INTR_POLARITY_LOW; + break; + default: + *trig = INTR_TRIGGER_CONFORM; + *pol = INTR_POLARITY_CONFORM; + } +} + +/* Private functions. */ +/* + * Setup the current CPU. Called by the BSP at driver attachment, and by each + * AP at wakeup (via xive_smp_cpu_startup()). + */ +static void +xive_setup_cpu(void) +{ + struct xive_softc *sc; + struct xive_cpu *cpup; + uint32_t val; + + cpup = DPCPU_PTR(xive_cpu_data); + + sc = device_get_softc(root_pic); + + val = bus_read_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2); + if (val & TM_QW2W2_VP) + bus_read_8(sc->sc_mem, XIVE_TM_SPC_PULL_POOL_CTX); + + bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD0, 0xff); + bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2, + TM_QW2W2_VP | cpup->cam); + + xive_unmask(root_pic, cpup->ipi_data.girq, &cpup->ipi_data); + xive_write_1(sc, XIVE_TM_CPPR, 0xff); +} + +/* Populate an IRQ structure, mapping the EOI and trigger pages. */ +static void +xive_init_irq(struct xive_irq *irqd, u_int irq) +{ + uint64_t eoi_phys, trig_phys; + uint32_t esb_shift; + + opal_call(OPAL_XIVE_GET_IRQ_INFO, irq, + vtophys(&irqd->flags), vtophys(&eoi_phys), + vtophys(&trig_phys), vtophys(&esb_shift), + vtophys(&irqd->chip)); + + irqd->girq = irq; + irqd->esb_size = 1 << esb_shift; + irqd->eoi_page = (vm_offset_t)pmap_mapdev(eoi_phys, irqd->esb_size); + + if (eoi_phys == trig_phys) + irqd->trig_page = irqd->eoi_page; + else if (trig_phys != 0) + irqd->trig_page = (vm_offset_t)pmap_mapdev(trig_phys, + irqd->esb_size); + else + irqd->trig_page = 0; + + opal_call(OPAL_XIVE_GET_IRQ_CONFIG, irq, vtophys(&irqd->vp), + vtophys(&irqd->prio), vtophys(&irqd->lirq)); +} + +/* Allocate an IRQ struct before populating it. */ +static struct xive_irq * +xive_configure_irq(u_int irq) +{ + struct xive_irq *irqd; + + irqd = malloc(sizeof(struct xive_irq), M_XIVE, M_WAITOK); + + xive_init_irq(irqd, irq); + + return (irqd); +} + +/* + * Part of the OPAL API. OPAL_XIVE_ALLOCATE_VP_BLOCK might require more pages, + * provisioned through this call. + */ +static int +xive_provision_page(struct xive_softc *sc) +{ + void *prov_page; + int error; + + do { + prov_page = contigmalloc(sc->sc_prov_page_size, M_XIVE, 0, + 0, BUS_SPACE_MAXADDR, + sc->sc_prov_page_size, sc->sc_prov_page_size); + + error = opal_call(OPAL_XIVE_DONATE_PAGE, -1, + vtophys(prov_page)); + } while (error == OPAL_XIVE_PROVISIONING); + + return (0); +} + +/* The XIVE_TM_CPPR register must be set by each thread */ +static void +xive_smp_cpu_startup(void) +{ + + xive_setup_cpu(); +} diff --git a/sys/powerpc/pseries/xics.c b/sys/powerpc/pseries/xics.c index 4dbfcfbd30cb..fc9a82dd2b4d 100644 --- a/sys/powerpc/pseries/xics.c +++ b/sys/powerpc/pseries/xics.c @@ -61,9 +61,6 @@ __FBSDID("$FreeBSD$"); #define XICP_IPI 2 #define MAX_XICP_IRQS (1<<24) /* 24-bit XIRR field */ -#define XIVE_XICS_MODE_EMU 0 -#define XIVE_XICS_MODE_EXP 1 - static int xicp_probe(device_t); static int xicp_attach(device_t); static int xics_probe(device_t); @@ -78,7 +75,8 @@ static void xicp_mask(device_t, u_int, void *priv); static void xicp_unmask(device_t, u_int, void *priv); #ifdef POWERNV -void xicp_smp_cpu_startup(void); +extern void (*powernv_smp_ap_extra_init)(void); +static void xicp_smp_cpu_startup(void); #endif static device_method_t xicp_methods[] = { @@ -238,7 +236,7 @@ xicp_attach(device_t dev) * compatibility mode. */ sc->xics_emu = true; - opal_call(OPAL_XIVE_RESET, XIVE_XICS_MODE_EMU); + opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EMU); #endif } else { sc->cpu_range[0] = 0; @@ -280,6 +278,11 @@ xicp_attach(device_t dev) 1 /* Number of IPIs */, FALSE); root_pic = dev; +#ifdef POWERNV + if (sc->xics_emu) + powernv_smp_ap_extra_init = xicp_smp_cpu_startup; +#endif + return (0); } @@ -556,7 +559,7 @@ xicp_unmask(device_t dev, u_int irq, void *priv) #ifdef POWERNV /* This is only used on POWER9 systems with the XIVE's XICS emulation. */ -void +static void xicp_smp_cpu_startup(void) { struct xicp_softc *sc;