From dbb95048da3ac080e2f3f83bcb730fe83661962b Mon Sep 17 00:00:00 2001 From: Marcel Moolenaar Date: Mon, 18 May 2009 18:37:18 +0000 Subject: [PATCH] Add cpu_flush_dcache() for use after non-DMA based I/O so that a possible future I-cache coherency operation can succeed. On ARM for example the L1 cache can be (is) virtually mapped, which means that any I/O that uses temporary mappings will not see the I-cache made coherent. On ia64 a similar behaviour has been observed. By flushing the D-cache, execution of binaries backed by md(4) and/or NFS work reliably. For Book-E (powerpc), execution over NFS exhibits SIGILL once in a while as well, though cpu_flush_dcache() hasn't been implemented yet. Doing an explicit D-cache flush as part of the non-DMA based I/O read operation eliminates the need to do it as part of the I-cache coherency operation itself and as such avoids pessimizing the DMA-based I/O read operations for which D-cache are already flushed/invalidated. It also allows future optimizations whereby the bcopy() followed by the D-cache flush can be integrated in a single operation, which could be implemented using on-chips DMA engines, by-passing the D-cache altogether. --- sys/amd64/amd64/machdep.c | 10 ++++++++++ sys/arm/arm/machdep.c | 12 ++++++++++++ sys/dev/md/md.c | 9 ++++++--- sys/i386/i386/machdep.c | 10 ++++++++++ sys/ia64/ia64/machdep.c | 15 +++++++++++++++ sys/mips/mips/machdep.c | 10 ++++++++++ sys/nfs/nfs_common.c | 5 +++-- sys/pc98/pc98/machdep.c | 10 ++++++++++ sys/powerpc/aim/machdep.c | 10 ++++++++++ sys/powerpc/booke/machdep.c | 10 ++++++++++ sys/sparc64/sparc64/machdep.c | 10 ++++++++++ sys/sun4v/sun4v/machdep.c | 10 ++++++++++ sys/sys/systm.h | 1 + 13 files changed, 117 insertions(+), 5 deletions(-) diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index eb1e72247307..6993dea8e584 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -506,6 +506,16 @@ cpu_boot(int howto) { } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* Not applicable */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c index 0f574c1e46ae..597cdf541063 100644 --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -316,6 +316,18 @@ cpu_startup(void *dummy) SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + + cpu_dcache_wb_range((uintptr_t)ptr, len); + cpu_l2cache_wb_range((uintptr_t)ptr, len); +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index 48d48fdeec0d..a03b0784a26e 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -436,10 +436,11 @@ mdstart_malloc(struct md_s *sc, struct bio *bp) if (osp == 0) bzero(dst, sc->sectorsize); else if (osp <= 255) - for (i = 0; i < sc->sectorsize; i++) - dst[i] = osp; - else + memset(dst, osp, sc->sectorsize); + else { bcopy((void *)osp, dst, sc->sectorsize); + cpu_flush_dcache(dst, sc->sectorsize); + } osp = 0; } else if (bp->bio_cmd == BIO_WRITE) { if (sc->flags & MD_COMPRESS) { @@ -491,6 +492,7 @@ mdstart_preload(struct md_s *sc, struct bio *bp) case BIO_READ: bcopy(sc->pl_ptr + bp->bio_offset, bp->bio_data, bp->bio_length); + cpu_flush_dcache(bp->bio_data, bp->bio_length); break; case BIO_WRITE: bcopy(bp->bio_data, sc->pl_ptr + bp->bio_offset, @@ -633,6 +635,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp) break; } bcopy((void *)(sf_buf_kva(sf) + offs), p, len); + cpu_flush_dcache(p, len); } else if (bp->bio_cmd == BIO_WRITE) { if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL) rv = vm_pager_get_pages(sc->object, &m, 1, 0); diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index a74db11cdc2c..e64bcd2b66c9 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1113,6 +1113,16 @@ cpu_boot(int howto) { } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* Not applicable */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c index 3c6e61d9872b..8ffdcf23ff1f 100644 --- a/sys/ia64/ia64/machdep.c +++ b/sys/ia64/ia64/machdep.c @@ -311,6 +311,21 @@ cpu_boot(int howto) efi_reset_system(); } +void +cpu_flush_dcache(void *ptr, size_t len) +{ + vm_offset_t lim, va; + + va = (uintptr_t)ptr & ~31; + lim = (uintptr_t)ptr + len; + while (va < lim) { + ia64_fc(va); + va += 32; + } + + ia64_srlz_d(); +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) diff --git a/sys/mips/mips/machdep.c b/sys/mips/mips/machdep.c index 9aa3044dc81c..6bd518018e7c 100644 --- a/sys/mips/mips/machdep.c +++ b/sys/mips/mips/machdep.c @@ -200,6 +200,16 @@ cpu_reset(void) platform_reset(); } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c index 4c0a2c994c9b..faa9302c31d1 100644 --- a/sys/nfs/nfs_common.c +++ b/sys/nfs/nfs_common.c @@ -127,9 +127,10 @@ nfsm_mbuftouio(struct mbuf **mrep, struct uio *uiop, int siz, caddr_t *dpos) (mbufcp, uiocp, xfer); else #endif - if (uiop->uio_segflg == UIO_SYSSPACE) + if (uiop->uio_segflg == UIO_SYSSPACE) { bcopy(mbufcp, uiocp, xfer); - else + cpu_flush_dcache(uiocp, xfer); + } else copyout(mbufcp, uiocp, xfer); left -= xfer; len -= xfer; diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index ba3d04796c62..0025a8aa52d5 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -1050,6 +1050,16 @@ cpu_boot(int howto) { } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* Not applicable */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) diff --git a/sys/powerpc/aim/machdep.c b/sys/powerpc/aim/machdep.c index 590741ce4cbe..662fd1445420 100644 --- a/sys/powerpc/aim/machdep.c +++ b/sys/powerpc/aim/machdep.c @@ -864,6 +864,16 @@ cpu_boot(int howto) { } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + void cpu_initclocks(void) { diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/machdep.c index 3d3dcadc6749..6d37653fcbc4 100644 --- a/sys/powerpc/booke/machdep.c +++ b/sys/powerpc/booke/machdep.c @@ -556,6 +556,16 @@ fill_fpregs(struct thread *td, struct fpreg *fpregs) return (0); } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c index 4eb8d9c4cb04..164a8b391fea 100644 --- a/sys/sparc64/sparc64/machdep.c +++ b/sys/sparc64/sparc64/machdep.c @@ -742,6 +742,16 @@ cpu_shutdown(void *args) ofw_exit(args); } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) diff --git a/sys/sun4v/sun4v/machdep.c b/sys/sun4v/sun4v/machdep.c index b7c0869c36c5..79750408c01f 100644 --- a/sys/sun4v/sun4v/machdep.c +++ b/sys/sun4v/sun4v/machdep.c @@ -767,6 +767,16 @@ cpu_shutdown(void *args) hv_mach_exit(0); } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 31fb750a206a..c20536a01a3c 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -147,6 +147,7 @@ void panic(const char *, ...) __dead2 __printflike(1, 2); #endif void cpu_boot(int); +void cpu_flush_dcache(void *, size_t); void cpu_rootconf(void); void critical_enter(void); void critical_exit(void);