From 355a415e92af5ecfdc7fca9b15073426f95faac6 Mon Sep 17 00:00:00 2001 From: Pyun YongHyeon Date: Wed, 16 Nov 2011 19:25:26 +0000 Subject: [PATCH] Enable 64bit DMA addressing support for all msk(4) controllers. Unnecessarily complex LE format used on Marvell controller was main reason not to enable 64bit DMA addressing in driver. If high 32bit address of DMA address of TX/RX buffer is changed, driver has to generate a new LE. In TX path, driver will keep track of lastly used high 32bit address of DMA address and generate a new LE whenever it sees high address change in the DMA address. In RX path, driver will always use two LEs to specify 64bit DMA address of RX buffer. If the high 32bit address of DMA address of RX buffer is the same as previous DMA address of RX buffer, driver does not have to use two LEs but driver will use two LEs for simplicity in RX ring management. One of draw back for switching to 64bit DMA addressing is that the large amount of LEs are used to specify 64bit DMA address such that number of available LEs for TX/RX buffers are considerably reduced. To mitigate the issue, increase number of available LEs from 256 to 384 for TX and from 256 to 512 for RX. For 32bit architectures, msk(4) does not use 64bit DMA addressing to save resources. Tested by: das --- sys/dev/msk/if_msk.c | 192 +++++++++++++++++++++++++++++----------- sys/dev/msk/if_mskreg.h | 52 +++++++---- 2 files changed, 177 insertions(+), 67 deletions(-) diff --git a/sys/dev/msk/if_msk.c b/sys/dev/msk/if_msk.c index b060949a8592..36407a2c1a14 100644 --- a/sys/dev/msk/if_msk.c +++ b/sys/dev/msk/if_msk.c @@ -700,7 +700,7 @@ msk_init_rx_ring(struct msk_if_softc *sc_if) { struct msk_ring_data *rd; struct msk_rxdesc *rxd; - int i, prod; + int i, nbuf, prod; MSK_IF_LOCK_ASSERT(sc_if); @@ -710,11 +710,18 @@ msk_init_rx_ring(struct msk_if_softc *sc_if) rd = &sc_if->msk_rdata; bzero(rd->msk_rx_ring, sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT); - prod = sc_if->msk_cdata.msk_rx_prod; - i = 0; + for (i = prod = 0; i < MSK_RX_RING_CNT; i++) { + rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; + rxd->rx_m = NULL; + rxd->rx_le = &rd->msk_rx_ring[prod]; + MSK_INC(prod, MSK_RX_RING_CNT); + } + nbuf = MSK_RX_BUF_CNT; + prod = 0; /* Have controller know how to compute Rx checksum. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) { +#ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_rx_ring[prod]; @@ -723,15 +730,21 @@ msk_init_rx_ring(struct msk_if_softc *sc_if) rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); - i++; - } - for (; i < MSK_RX_RING_CNT; i++) { +#endif rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_rx_ring[prod]; + rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | + ETHER_HDR_LEN); + rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); + MSK_INC(prod, MSK_RX_RING_CNT); + MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); + nbuf--; + } + for (i = 0; i < nbuf; i++) { if (msk_newbuf(sc_if, prod) != 0) return (ENOBUFS); - MSK_INC(prod, MSK_RX_RING_CNT); + MSK_RX_INC(prod, MSK_RX_RING_CNT); } bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag, @@ -739,10 +752,11 @@ msk_init_rx_ring(struct msk_if_softc *sc_if) BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Update prefetch unit. */ - sc_if->msk_cdata.msk_rx_prod = MSK_RX_RING_CNT - 1; + sc_if->msk_cdata.msk_rx_prod = prod; CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), - sc_if->msk_cdata.msk_rx_prod); + (sc_if->msk_cdata.msk_rx_prod + MSK_RX_RING_CNT - 1) % + MSK_RX_RING_CNT); if (msk_rx_fill(sc_if, 0) != 0) return (ENOBUFS); return (0); @@ -753,7 +767,7 @@ msk_init_jumbo_rx_ring(struct msk_if_softc *sc_if) { struct msk_ring_data *rd; struct msk_rxdesc *rxd; - int i, prod; + int i, nbuf, prod; MSK_IF_LOCK_ASSERT(sc_if); @@ -764,11 +778,18 @@ msk_init_jumbo_rx_ring(struct msk_if_softc *sc_if) rd = &sc_if->msk_rdata; bzero(rd->msk_jumbo_rx_ring, sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT); - prod = sc_if->msk_cdata.msk_rx_prod; - i = 0; + for (i = prod = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { + rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; + rxd->rx_m = NULL; + rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; + MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); + } + nbuf = MSK_RX_BUF_CNT; + prod = 0; /* Have controller know how to compute Rx checksum. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) { +#ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; @@ -777,25 +798,33 @@ msk_init_jumbo_rx_ring(struct msk_if_softc *sc_if) rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); - i++; - } - for (; i < MSK_JUMBO_RX_RING_CNT; i++) { +#endif rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; + rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | + ETHER_HDR_LEN); + rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); + MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); + MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); + nbuf--; + } + for (i = 0; i < nbuf; i++) { if (msk_jumbo_newbuf(sc_if, prod) != 0) return (ENOBUFS); - MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); + MSK_RX_INC(prod, MSK_JUMBO_RX_RING_CNT); } bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - sc_if->msk_cdata.msk_rx_prod = MSK_JUMBO_RX_RING_CNT - 1; + /* Update prefetch unit. */ + sc_if->msk_cdata.msk_rx_prod = prod; CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), - sc_if->msk_cdata.msk_rx_prod); + (sc_if->msk_cdata.msk_rx_prod + MSK_JUMBO_RX_RING_CNT - 1) % + MSK_JUMBO_RX_RING_CNT); if (msk_rx_fill(sc_if, 1) != 0) return (ENOBUFS); return (0); @@ -813,6 +842,7 @@ msk_init_tx_ring(struct msk_if_softc *sc_if) sc_if->msk_cdata.msk_tx_prod = 0; sc_if->msk_cdata.msk_tx_cons = 0; sc_if->msk_cdata.msk_tx_cnt = 0; + sc_if->msk_cdata.msk_tx_high_addr = 0; rd = &sc_if->msk_rdata; bzero(rd->msk_tx_ring, sizeof(struct msk_tx_desc) * MSK_TX_RING_CNT); @@ -834,6 +864,12 @@ msk_discard_rxbuf(struct msk_if_softc *sc_if, int idx) struct msk_rxdesc *rxd; struct mbuf *m; +#ifdef MSK_64BIT_DMA + rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; + rx_le = rxd->rx_le; + rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); + MSK_INC(idx, MSK_RX_RING_CNT); +#endif rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; m = rxd->rx_m; rx_le = rxd->rx_le; @@ -847,6 +883,12 @@ msk_discard_jumbo_rxbuf(struct msk_if_softc *sc_if, int idx) struct msk_rxdesc *rxd; struct mbuf *m; +#ifdef MSK_64BIT_DMA + rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; + rx_le = rxd->rx_le; + rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); + MSK_INC(idx, MSK_JUMBO_RX_RING_CNT); +#endif rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; m = rxd->rx_m; rx_le = rxd->rx_le; @@ -884,10 +926,18 @@ msk_newbuf(struct msk_if_softc *sc_if, int idx) KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; +#ifdef MSK_64BIT_DMA + rx_le = rxd->rx_le; + rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr)); + rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); + MSK_INC(idx, MSK_RX_RING_CNT); + rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; +#endif if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap); + rxd->rx_m = NULL; } map = rxd->rx_dmamap; rxd->rx_dmamap = sc_if->msk_cdata.msk_rx_sparemap; @@ -937,11 +987,19 @@ msk_jumbo_newbuf(struct msk_if_softc *sc_if, int idx) KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; +#ifdef MSK_64BIT_DMA + rx_le = rxd->rx_le; + rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr)); + rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); + MSK_INC(idx, MSK_JUMBO_RX_RING_CNT); + rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; +#endif if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_tag, rxd->rx_dmamap); + rxd->rx_m = NULL; } map = rxd->rx_dmamap; rxd->rx_dmamap = sc_if->msk_cdata.msk_jumbo_rx_sparemap; @@ -1472,7 +1530,7 @@ mskc_reset(struct msk_softc *sc) /* Clear status list. */ bzero(sc->msk_stat_ring, - sizeof(struct msk_stat_desc) * MSK_STAT_RING_CNT); + sizeof(struct msk_stat_desc) * sc->msk_stat_count); sc->msk_stat_cons = 0; bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); @@ -1483,7 +1541,7 @@ mskc_reset(struct msk_softc *sc) CSR_WRITE_4(sc, STAT_LIST_ADDR_LO, MSK_ADDR_LO(addr)); CSR_WRITE_4(sc, STAT_LIST_ADDR_HI, MSK_ADDR_HI(addr)); /* Set the status list last index. */ - CSR_WRITE_2(sc, STAT_LAST_IDX, MSK_STAT_RING_CNT - 1); + CSR_WRITE_2(sc, STAT_LAST_IDX, sc->msk_stat_count - 1); if (sc->msk_hw_id == CHIP_ID_YUKON_EC && sc->msk_hw_rev == CHIP_REV_YU_EC_A1) { /* WA for dev. #4.3 */ @@ -2083,17 +2141,29 @@ static int msk_status_dma_alloc(struct msk_softc *sc) { struct msk_dmamap_arg ctx; - int error; + bus_size_t stat_sz; + int count, error; + /* + * It seems controller requires number of status LE entries + * is power of 2 and the maximum number of status LE entries + * is 4096. For dual-port controllers, the number of status + * LE entries should be large enough to hold both port's + * status updates. + */ + count = 3 * MSK_RX_RING_CNT + MSK_TX_RING_CNT; + count = imin(4096, roundup2(count, 1024)); + sc->msk_stat_count = count; + stat_sz = count * sizeof(struct msk_stat_desc); error = bus_dma_tag_create( bus_get_dma_tag(sc->msk_dev), /* parent */ MSK_STAT_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ - MSK_STAT_RING_SZ, /* maxsize */ + stat_sz, /* maxsize */ 1, /* nsegments */ - MSK_STAT_RING_SZ, /* maxsegsize */ + stat_sz, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->msk_stat_tag); @@ -2114,9 +2184,8 @@ msk_status_dma_alloc(struct msk_softc *sc) } ctx.msk_busaddr = 0; - error = bus_dmamap_load(sc->msk_stat_tag, - sc->msk_stat_map, sc->msk_stat_ring, MSK_STAT_RING_SZ, - msk_dmamap_cb, &ctx, 0); + error = bus_dmamap_load(sc->msk_stat_tag, sc->msk_stat_map, + sc->msk_stat_ring, stat_sz, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->msk_dev, "failed to load DMA'able memory for status ring\n"); @@ -2157,27 +2226,10 @@ msk_txrx_dma_alloc(struct msk_if_softc *sc_if) int error, i; /* Create parent DMA tag. */ - /* - * XXX - * It seems that Yukon II supports full 64bits DMA operations. But - * it needs two descriptors(list elements) for 64bits DMA operations. - * Since we don't know what DMA address mappings(32bits or 64bits) - * would be used in advance for each mbufs, we limits its DMA space - * to be in range of 32bits address space. Otherwise, we should check - * what DMA address is used and chain another descriptor for the - * 64bits DMA operation. This also means descriptor ring size is - * variable. Limiting DMA address to be in 32bit address space greatly - * simplifies descriptor handling and possibly would increase - * performance a bit due to efficient handling of descriptors. - * Apart from harassing checksum offloading mechanisms, it seems - * it's really bad idea to use a separate descriptor for 64bit - * DMA operation to save small descriptor memory. Anyway, I've - * never seen these exotic scheme on ethernet interface hardware. - */ error = bus_dma_tag_create( bus_get_dma_tag(sc_if->msk_if_dev), /* parent */ 1, 0, /* alignment, boundary */ - BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ + BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ @@ -2283,7 +2335,7 @@ msk_txrx_dma_alloc(struct msk_if_softc *sc_if) ctx.msk_busaddr = 0; error = bus_dmamap_load(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map, sc_if->msk_rdata.msk_tx_ring, - MSK_TX_RING_SZ, msk_dmamap_cb, &ctx, 0); + MSK_TX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to load DMA'able memory for Tx ring\n"); @@ -2304,7 +2356,7 @@ msk_txrx_dma_alloc(struct msk_if_softc *sc_if) ctx.msk_busaddr = 0; error = bus_dmamap_load(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map, sc_if->msk_rdata.msk_rx_ring, - MSK_RX_RING_SZ, msk_dmamap_cb, &ctx, 0); + MSK_RX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to load DMA'able memory for Rx ring\n"); @@ -2421,7 +2473,7 @@ msk_rx_dma_jalloc(struct msk_if_softc *sc_if) error = bus_dmamap_load(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, sc_if->msk_rdata.msk_jumbo_rx_ring, MSK_JUMBO_RX_RING_SZ, - msk_dmamap_cb, &ctx, 0); + msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to load DMA'able memory for jumbo Rx ring\n"); @@ -2781,6 +2833,18 @@ msk_encap(struct msk_if_softc *sc_if, struct mbuf **m_head) } } +#ifdef MSK_64BIT_DMA + if (MSK_ADDR_HI(txsegs[0].ds_addr) != + sc_if->msk_cdata.msk_tx_high_addr) { + sc_if->msk_cdata.msk_tx_high_addr = + MSK_ADDR_HI(txsegs[0].ds_addr); + tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; + tx_le->msk_addr = htole32(MSK_ADDR_HI(txsegs[0].ds_addr)); + tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); + sc_if->msk_cdata.msk_tx_cnt++; + MSK_INC(prod, MSK_TX_RING_CNT); + } +#endif si = prod; tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[0].ds_addr)); @@ -2795,6 +2859,20 @@ msk_encap(struct msk_if_softc *sc_if, struct mbuf **m_head) for (i = 1; i < nseg; i++) { tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; +#ifdef MSK_64BIT_DMA + if (MSK_ADDR_HI(txsegs[i].ds_addr) != + sc_if->msk_cdata.msk_tx_high_addr) { + sc_if->msk_cdata.msk_tx_high_addr = + MSK_ADDR_HI(txsegs[i].ds_addr); + tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; + tx_le->msk_addr = + htole32(MSK_ADDR_HI(txsegs[i].ds_addr)); + tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); + sc_if->msk_cdata.msk_tx_cnt++; + MSK_INC(prod, MSK_TX_RING_CNT); + tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; + } +#endif tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[i].ds_addr)); tx_le->msk_control = htole32(txsegs[i].ds_len | control | OP_BUFFER | HW_OWNER); @@ -3147,7 +3225,12 @@ msk_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control, msk_discard_rxbuf(sc_if, cons); break; } +#ifdef MSK_64BIT_DMA + rxd = &sc_if->msk_cdata.msk_rxdesc[(cons + 1) % + MSK_RX_RING_CNT]; +#else rxd = &sc_if->msk_cdata.msk_rxdesc[cons]; +#endif m = rxd->rx_m; if (msk_newbuf(sc_if, cons) != 0) { ifp->if_iqdrops++; @@ -3175,8 +3258,8 @@ msk_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control, MSK_IF_LOCK(sc_if); } while (0); - MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); - MSK_INC(sc_if->msk_cdata.msk_rx_prod, MSK_RX_RING_CNT); + MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); + MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_RX_RING_CNT); } static void @@ -3207,7 +3290,12 @@ msk_jumbo_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control, msk_discard_jumbo_rxbuf(sc_if, cons); break; } +#ifdef MSK_64BIT_DMA + jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[(cons + 1) % + MSK_JUMBO_RX_RING_CNT]; +#else jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[cons]; +#endif m = jrxd->rx_m; if (msk_jumbo_newbuf(sc_if, cons) != 0) { ifp->if_iqdrops++; @@ -3235,8 +3323,8 @@ msk_jumbo_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control, MSK_IF_LOCK(sc_if); } while (0); - MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); - MSK_INC(sc_if->msk_cdata.msk_rx_prod, MSK_JUMBO_RX_RING_CNT); + MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); + MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_JUMBO_RX_RING_CNT); } static void @@ -3581,7 +3669,7 @@ msk_handle_events(struct msk_softc *sc) control & STLE_OP_MASK); break; } - MSK_INC(cons, MSK_STAT_RING_CNT); + MSK_INC(cons, sc->msk_stat_count); if (rxprog > sc->msk_process_limit) break; } diff --git a/sys/dev/msk/if_mskreg.h b/sys/dev/msk/if_mskreg.h index 583f5e88ef80..5465deb1bf37 100644 --- a/sys/dev/msk/if_mskreg.h +++ b/sys/dev/msk/if_mskreg.h @@ -2315,35 +2315,48 @@ struct msk_stat_desc { #define BMU_UDP_CHECK (0x57<<16) /* Descr with UDP ext (YUKON only) */ #define BMU_BBC 0xffff /* Bit 15.. 0: Buffer Byte Counter */ +/* + * Controller requires an additional LE op code for 64bit DMA operation. + * Driver uses fixed number of RX buffers such that this limitation + * reduces number of available RX buffers with 64bit DMA so double + * number of RX buffers on platforms that support 64bit DMA. For TX + * side, controller requires an additional OP_ADDR64 op code if a TX + * buffer uses different high address value than previously used one. + * Driver monitors high DMA address change in TX and inserts an + * OP_ADDR64 op code if the high DMA address is changed. Driver + * allocates 50% more total TX buffers on platforms that support 64bit + * DMA. + */ +#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF) +#define MSK_64BIT_DMA +#define MSK_TX_RING_CNT 384 +#define MSK_RX_RING_CNT 512 +#else +#undef MSK_64BIT_DMA #define MSK_TX_RING_CNT 256 #define MSK_RX_RING_CNT 256 +#endif #define MSK_RX_BUF_ALIGN 8 #define MSK_JUMBO_RX_RING_CNT MSK_RX_RING_CNT -#define MSK_STAT_RING_CNT ((1 + 3) * (MSK_TX_RING_CNT + MSK_RX_RING_CNT)) #define MSK_MAXTXSEGS 32 #define MSK_TSO_MAXSGSIZE 4096 #define MSK_TSO_MAXSIZE (65535 + sizeof(struct ether_vlan_header)) /* - * It seems that the hardware requires extra decriptors(LEs) to offload - * TCP/UDP checksum, VLAN hardware tag inserstion and TSO. + * It seems that the hardware requires extra descriptors(LEs) to offload + * TCP/UDP checksum, VLAN hardware tag insertion and TSO. * * 1 descriptor for TCP/UDP checksum offload. * 1 descriptor VLAN hardware tag insertion. * 1 descriptor for TSO(TCP Segmentation Offload) - * 1 descriptor for 64bits DMA : Not applicatable due to the use of - * BUS_SPACE_MAXADDR_32BIT in parent DMA tag creation. + * 1 descriptor for each 64bits DMA transfers */ +#ifdef MSK_64BIT_DMA +#define MSK_RESERVED_TX_DESC_CNT (MSK_MAXTXSEGS + 3) +#else #define MSK_RESERVED_TX_DESC_CNT 3 +#endif -/* - * Jumbo buffer stuff. Note that we must allocate more jumbo - * buffers than there are descriptors in the receive ring. This - * is because we don't know how long it will take for a packet - * to be released after we hand it off to the upper protocol - * layers. To be safe, we allocate 1.5 times the number of - * receive descriptors. - */ #define MSK_JUMBO_FRAMELEN 9022 #define MSK_JUMBO_MTU (MSK_JUMBO_FRAMELEN-ETHER_HDR_LEN-ETHER_CRC_LEN) #define MSK_MAX_FRAMELEN \ @@ -2380,6 +2393,7 @@ struct msk_chain_data { bus_dmamap_t msk_jumbo_rx_sparemap; uint16_t msk_tso_mtu; uint32_t msk_last_csum; + uint32_t msk_tx_high_addr; int msk_tx_prod; int msk_tx_cons; int msk_tx_cnt; @@ -2411,10 +2425,17 @@ struct msk_ring_data { (sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT) #define MSK_JUMBO_RX_RING_SZ \ (sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT) -#define MSK_STAT_RING_SZ \ - (sizeof(struct msk_stat_desc) * MSK_STAT_RING_CNT) #define MSK_INC(x, y) (x) = (x + 1) % y +#ifdef MSK_64BIT_DMA +#define MSK_RX_INC(x, y) (x) = (x + 2) % y +#define MSK_RX_BUF_CNT (MSK_RX_RING_CNT / 2) +#define MSK_JUMBO_RX_BUF_CNT (MSK_JUMBO_RX_RING_CNT / 2) +#else +#define MSK_RX_INC(x, y) (x) = (x + 1) % y +#define MSK_RX_BUF_CNT MSK_RX_RING_CNT +#define MSK_JUMBO_RX_BUF_CNT MSK_JUMBO_RX_RING_CNT +#endif #define MSK_PCI_BUS 0 #define MSK_PCIX_BUS 1 @@ -2519,6 +2540,7 @@ struct msk_softc { int msk_int_holdoff; int msk_process_limit; int msk_stat_cons; + int msk_stat_count; struct mtx msk_mtx; };