From 4e612cddb918e81e48897f8c3fff2aa8c048eba1 Mon Sep 17 00:00:00 2001 From: Tijl Coosemans Date: Sun, 2 Nov 2014 11:26:37 +0000 Subject: [PATCH] In agp(4) avoid the need to flush all cpu caches with wbinvd between updating the GTT and flushing the AGP TLB by storing the GTT in write-combining memory. On x86 flushing the AGP TLB is done by an I/O operation or a store to a MMIO register in uncacheable memory. Both cases imply that WC buffers are flushed so no memory barriers are needed. On powerpc there is no WC memory type. It maps to uncacheable memory and two stores to uncacheable memory, such as to the GTT and then to an MMIO register, are strongly ordered, so no memory barriers are needed either. MFC after: 1 month --- sys/dev/agp/agp.c | 31 +++++++------------------------ sys/dev/agp/agp_amd.c | 29 ++++++++++++++--------------- sys/dev/agp/agp_amd64.c | 2 -- sys/dev/agp/agp_apple.c | 4 ---- sys/dev/agp/agp_ati.c | 25 +++++++++++++------------ sys/dev/agp/agp_i810.c | 13 ++++++------- sys/dev/agp/agppriv.h | 1 - 7 files changed, 40 insertions(+), 65 deletions(-) diff --git a/sys/dev/agp/agp.c b/sys/dev/agp/agp.c index d0cebb001d9c..566a0d97f9bb 100644 --- a/sys/dev/agp/agp.c +++ b/sys/dev/agp/agp.c @@ -50,6 +50,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include @@ -84,14 +86,6 @@ static devclass_t agp_devclass; /* Helper functions for implementing chipset mini drivers. */ -void -agp_flush_cache() -{ -#if defined(__i386__) || defined(__amd64__) - wbinvd(); -#endif -} - u_int8_t agp_find_caps(device_t dev) { @@ -158,17 +152,16 @@ agp_alloc_gatt(device_t dev) return 0; gatt->ag_entries = entries; - gatt->ag_virtual = contigmalloc(entries * sizeof(u_int32_t), M_AGP, 0, - 0, ~0, PAGE_SIZE, 0); + gatt->ag_virtual = (void *)kmem_alloc_contig(kernel_arena, + entries * sizeof(u_int32_t), M_NOWAIT | M_ZERO, 0, ~0, PAGE_SIZE, + 0, VM_MEMATTR_WRITE_COMBINING); if (!gatt->ag_virtual) { if (bootverbose) device_printf(dev, "contiguous allocation failed\n"); free(gatt, M_AGP); return 0; } - bzero(gatt->ag_virtual, entries * sizeof(u_int32_t)); gatt->ag_physical = vtophys((vm_offset_t) gatt->ag_virtual); - agp_flush_cache(); return gatt; } @@ -176,8 +169,8 @@ agp_alloc_gatt(device_t dev) void agp_free_gatt(struct agp_gatt *gatt) { - contigfree(gatt->ag_virtual, - gatt->ag_entries * sizeof(u_int32_t), M_AGP); + kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual, + gatt->ag_entries * sizeof(u_int32_t)); free(gatt, M_AGP); } @@ -280,7 +273,6 @@ agp_free_res(device_t dev) bus_release_resource(dev, SYS_RES_MEMORY, sc->as_aperture_rid, sc->as_aperture); mtx_destroy(&sc->as_lock); - agp_flush_cache(); } int @@ -604,12 +596,6 @@ agp_generic_bind_memory(device_t dev, struct agp_memory *mem, } VM_OBJECT_WUNLOCK(mem->am_obj); - /* - * Flush the cpu cache since we are providing a new mapping - * for these pages. - */ - agp_flush_cache(); - /* * Make sure the chipset gets the new mappings. */ @@ -668,7 +654,6 @@ agp_generic_unbind_memory(device_t dev, struct agp_memory *mem) } VM_OBJECT_WUNLOCK(mem->am_obj); - agp_flush_cache(); AGP_FLUSH_TLB(dev); mem->am_offset = 0; @@ -1040,7 +1025,6 @@ agp_bind_pages(device_t dev, vm_page_t *pages, vm_size_t size, } } - agp_flush_cache(); AGP_FLUSH_TLB(dev); mtx_unlock(&sc->as_lock); @@ -1063,7 +1047,6 @@ agp_unbind_pages(device_t dev, vm_size_t size, vm_offset_t offset) for (i = 0; i < size; i += AGP_PAGE_SIZE) AGP_UNBIND_PAGE(dev, offset + i); - agp_flush_cache(); AGP_FLUSH_TLB(dev); mtx_unlock(&sc->as_lock); diff --git a/sys/dev/agp/agp_amd.c b/sys/dev/agp/agp_amd.c index 2b58a3eff0b4..3023213cdfd2 100644 --- a/sys/dev/agp/agp_amd.c +++ b/sys/dev/agp/agp_amd.c @@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include @@ -92,34 +94,35 @@ agp_amd_alloc_gatt(device_t dev) /* * The AMD751 uses a page directory to map a non-contiguous - * gatt so we don't need to use contigmalloc. - * Malloc individual gatt pages and map them into the page + * gatt so we don't need to use kmem_alloc_contig. + * Allocate individual GATT pages and map them into the page * directory. */ gatt->ag_entries = entries; - gatt->ag_virtual = malloc(entries * sizeof(u_int32_t), - M_AGP, M_NOWAIT); + gatt->ag_virtual = (void *)kmem_alloc_attr(kernel_arena, + entries * sizeof(u_int32_t), M_NOWAIT | M_ZERO, 0, ~0, + VM_MEMATTR_WRITE_COMBINING); if (!gatt->ag_virtual) { if (bootverbose) device_printf(dev, "allocation failed\n"); free(gatt, M_AGP); return 0; } - bzero(gatt->ag_virtual, entries * sizeof(u_int32_t)); /* * Allocate the page directory. */ - gatt->ag_vdir = malloc(AGP_PAGE_SIZE, M_AGP, M_NOWAIT); + gatt->ag_vdir = (void *)kmem_alloc_attr(kernel_arena, AGP_PAGE_SIZE, + M_NOWAIT | M_ZERO, 0, ~0, VM_MEMATTR_WRITE_COMBINING); if (!gatt->ag_vdir) { if (bootverbose) device_printf(dev, "failed to allocate page directory\n"); - free(gatt->ag_virtual, M_AGP); + kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual, + entries * sizeof(u_int32_t)); free(gatt, M_AGP); return 0; } - bzero(gatt->ag_vdir, AGP_PAGE_SIZE); gatt->ag_pdir = vtophys((vm_offset_t) gatt->ag_vdir); if(bootverbose) @@ -158,19 +161,15 @@ agp_amd_alloc_gatt(device_t dev) gatt->ag_vdir[i + pdir_offset] = pa | 1; } - /* - * Make sure the chipset can see everything. - */ - agp_flush_cache(); - return gatt; } static void agp_amd_free_gatt(struct agp_amd_gatt *gatt) { - free(gatt->ag_virtual, M_AGP); - free(gatt->ag_vdir, M_AGP); + kmem_free(kernel_arena, (vm_offset_t)gatt->ag_vdir, AGP_PAGE_SIZE); + kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual, + gatt->ag_entries * sizeof(u_int32_t)); free(gatt, M_AGP); } diff --git a/sys/dev/agp/agp_amd64.c b/sys/dev/agp/agp_amd64.c index 5423d5a9c25f..56784f0ee543 100644 --- a/sys/dev/agp/agp_amd64.c +++ b/sys/dev/agp/agp_amd64.c @@ -241,8 +241,6 @@ agp_amd64_attach(device_t dev) 4); } - agp_flush_cache(); - return (0); } diff --git a/sys/dev/agp/agp_apple.c b/sys/dev/agp/agp_apple.c index 263631d7391c..cc3723b321d6 100644 --- a/sys/dev/agp/agp_apple.c +++ b/sys/dev/agp/agp_apple.c @@ -224,8 +224,6 @@ agp_apple_bind_page(device_t dev, vm_offset_t offset, vm_offset_t physical) return EINVAL; sc->gatt->ag_virtual[offset >> AGP_PAGE_SHIFT] = physical; - __asm __volatile("dcbst 0,%0; sync" :: - "r"(&sc->gatt->ag_virtual[offset >> AGP_PAGE_SHIFT]) : "memory"); return (0); } @@ -238,8 +236,6 @@ agp_apple_unbind_page(device_t dev, vm_offset_t offset) return EINVAL; sc->gatt->ag_virtual[offset >> AGP_PAGE_SHIFT] = 0; - __asm __volatile("dcbst 0,%0; sync" :: - "r"(&sc->gatt->ag_virtual[offset >> AGP_PAGE_SHIFT]) : "memory"); return (0); } diff --git a/sys/dev/agp/agp_ati.c b/sys/dev/agp/agp_ati.c index c8e35ef50888..eaf0efa80f97 100644 --- a/sys/dev/agp/agp_ati.c +++ b/sys/dev/agp/agp_ati.c @@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include @@ -129,20 +131,23 @@ agp_ati_alloc_gatt(device_t dev) /* Alloc the GATT -- pointers to pages of AGP memory */ sc->ag_entries = entries; - sc->ag_virtual = malloc(entries * sizeof(u_int32_t), M_AGP, - M_NOWAIT | M_ZERO); + sc->ag_virtual = (void *)kmem_alloc_attr(kernel_arena, + entries * sizeof(u_int32_t), M_NOWAIT | M_ZERO, 0, ~0, + VM_MEMATTR_WRITE_COMBINING); if (sc->ag_virtual == NULL) { if (bootverbose) - device_printf(dev, "aperture allocation failed\n"); + device_printf(dev, "GATT allocation failed\n"); return ENOMEM; } /* Alloc the page directory -- pointers to each page of the GATT */ - sc->ag_vdir = malloc(AGP_PAGE_SIZE, M_AGP, M_NOWAIT | M_ZERO); + sc->ag_vdir = (void *)kmem_alloc_attr(kernel_arena, AGP_PAGE_SIZE, + M_NOWAIT | M_ZERO, 0, ~0, VM_MEMATTR_WRITE_COMBINING); if (sc->ag_vdir == NULL) { if (bootverbose) device_printf(dev, "pagedir allocation failed\n"); - free(sc->ag_virtual, M_AGP); + kmem_free(kernel_arena, (vm_offset_t)sc->ag_virtual, + entries * sizeof(u_int32_t)); return ENOMEM; } sc->ag_pdir = vtophys((vm_offset_t)sc->ag_vdir); @@ -158,11 +163,6 @@ agp_ati_alloc_gatt(device_t dev) sc->ag_vdir[apbase_offset + i] = pa | 1; } - /* - * Make sure the chipset can see everything. - */ - agp_flush_cache(); - return 0; } @@ -264,8 +264,9 @@ agp_ati_detach(device_t dev) temp = pci_read_config(dev, apsize_reg, 4); pci_write_config(dev, apsize_reg, temp & ~1, 4); - free(sc->ag_vdir, M_AGP); - free(sc->ag_virtual, M_AGP); + kmem_free(kernel_arena, (vm_offset_t)sc->ag_vdir, AGP_PAGE_SIZE); + kmem_free(kernel_arena, (vm_offset_t)sc->ag_virtual, + sc->ag_entries * sizeof(u_int32_t)); bus_release_resource(dev, SYS_RES_MEMORY, ATI_GART_MMADDR, sc->regs); agp_free_res(dev); diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c index dbf34e326583..9afd201d2054 100644 --- a/sys/dev/agp/agp_i810.c +++ b/sys/dev/agp/agp_i810.c @@ -66,6 +66,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include @@ -1388,17 +1390,16 @@ agp_i810_install_gatt(device_t dev) sc->dcache_size = 0; /* According to the specs the gatt on the i810 must be 64k. */ - sc->gatt->ag_virtual = contigmalloc(64 * 1024, M_AGP, 0, 0, ~0, - PAGE_SIZE, 0); + sc->gatt->ag_virtual = (void *)kmem_alloc_contig(kernel_arena, + 64 * 1024, M_NOWAIT | M_ZERO, 0, ~0, PAGE_SIZE, + 0, VM_MEMATTR_WRITE_COMBINING); if (sc->gatt->ag_virtual == NULL) { if (bootverbose) device_printf(dev, "contiguous allocation failed\n"); return (ENOMEM); } - bzero(sc->gatt->ag_virtual, sc->gatt->ag_entries * sizeof(u_int32_t)); sc->gatt->ag_physical = vtophys((vm_offset_t)sc->gatt->ag_virtual); - agp_flush_cache(); /* Install the GATT. */ bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, sc->gatt->ag_physical | 1); @@ -1497,7 +1498,7 @@ agp_i810_deinstall_gatt(device_t dev) sc = device_get_softc(dev); bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, 0); - contigfree(sc->gatt->ag_virtual, 64 * 1024, M_AGP); + kmem_free(kernel_arena, (vm_offset_t)sc->gatt->ag_virtual, 64 * 1024); } static void @@ -2052,7 +2053,6 @@ agp_i810_bind_memory(device_t dev, struct agp_memory *mem, vm_offset_t offset) sc->match->driver->install_gtt_pte(dev, (offset + i) >> AGP_PAGE_SHIFT, mem->am_physical + i, 0); } - agp_flush_cache(); mem->am_offset = offset; mem->am_is_bound = 1; mtx_unlock(&sc->agp.as_lock); @@ -2093,7 +2093,6 @@ agp_i810_unbind_memory(device_t dev, struct agp_memory *mem) sc->match->driver->install_gtt_pte(dev, (mem->am_offset + i) >> AGP_PAGE_SHIFT, 0, 0); } - agp_flush_cache(); mem->am_is_bound = 0; mtx_unlock(&sc->agp.as_lock); return (0); diff --git a/sys/dev/agp/agppriv.h b/sys/dev/agp/agppriv.h index 00e7dc13b8d1..2436af29d57a 100644 --- a/sys/dev/agp/agppriv.h +++ b/sys/dev/agp/agppriv.h @@ -83,7 +83,6 @@ struct agp_gatt { vm_offset_t ag_physical; }; -void agp_flush_cache(void); u_int8_t agp_find_caps(device_t dev); struct agp_gatt *agp_alloc_gatt(device_t dev); void agp_set_aperture_resource(device_t dev, int rid);