Add cache handling code for sparc64.

svn path=/head/; revision=86221
2024-12-17 10:26:15 +00:00 · 2001-11-09 19:12:06 +00:00 · 2001-11-09 19:12:06 +00:00 · 3c78eb652c · 2020-12-20 02:59:44 +00:00
commit 3c78eb652c
parent 8838ea170e
2 changed files with 531 additions and 0 deletions
--- a/sys/sparc64/include/cache.h
+++ b/sys/sparc64/include/cache.h
@ -0,0 +1,126 @@
 /*
 * Copyright (c) 1996
 * 	The President and Fellows of Harvard College. All rights reserved.
 * Copyright (c) 1992, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by Aaron Brown and
 *	Harvard University.
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	from: @(#)cache.h	8.1 (Berkeley) 6/11/93
 *	from: NetBSD: cache.h,v 1.3 2000/08/01 00:28:02 eeh Exp
 *
 * $FreeBSD$
 */
 #ifndef _MACHINE_CACHE_H_
 #define _MACHINE_CACHE_H_
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <dev/ofw/openfirm.h>
 /*
 * Cache diagnostic access definitions.
 */
 /* ASI offsets for I$ diagnostic access */
 #define	ICDA_SET_SHIFT		13
 #define	ICDA_SET_MASK		(1UL << 13)
 #define	ICDA_SET(a)		(((a) << ICDA_SET_SHIFT) & ICDA_SET_MASK)
 /* I$ tag/valid format */
 #define	ICDT_TAG_SHIFT		8
 #define	ICDT_TAG_BITS		28
 #define	ICDT_TAG_MASK		(((1UL << ICDT_TAG_BITS) - 1) << ICDT_TAG_SHIFT)
 #define	ICDT_TAG(x)		(((x) & ICDT_TAG_MASK) >> ICDT_TAG_SHIFT)
 #define	ICDT_VALID		(1UL << 36)
 /* D$ tag/valid format */
 #define	DCDT_TAG_SHIFT		2
 #define	DCDT_TAG_BITS		28
 #define	DCDT_TAG_MASK		(((1UL << DCDT_TAG_BITS) - 1) << DCDT_TAG_SHIFT)
 #define	DCDT_TAG(x)		(((x) & DCDT_TAG_MASK) >> DCDT_TAG_SHIFT)
 #define	DCDT_VALID_BITS		2
 #define	DCDT_VALID_MASK		((1UL << DCDT_VALID_BITS) - 1)
 /* E$ ASI_ECACHE_W/ASI_ECACHE_R address flags */
 #define	ECDA_DATA		(1UL << 39)
 #define	ECDA_TAG		(1UL << 40)
 /* E$ tag/state/parity format */
 #define	ECDT_TAG_BITS		13
 #define	ECDT_TAG_SIZE		(1UL << ECDT_TAG_BITS)
 #define	ECDT_TAG_MASK		(ECDT_TAG_SIZE - 1)
 /*
 * Do two virtual addresses (at which the same page is mapped) form and illegal
 * alias in D$? XXX: should use cache.dc_size here.
 */
 #define	DCACHE_BOUNDARY		0x4000
 #define	DCACHE_BMASK		(DCACHE_BOUNDARY - 1)
 #define	CACHE_BADALIAS(v1, v2) \
 	(((v1) & DCACHE_BMASK) != ((v2) & DCACHE_BMASK))
 /*
 * Routines for dealing with the cache.
 */
 void	cache_init __P((phandle_t));		/* turn it on */
 void	icache_flush __P((vm_offset_t, vm_offset_t));
 void	icache_inval_phys __P((vm_offset_t, vm_offset_t));
 void	dcache_flush __P((vm_offset_t, vm_offset_t));
 void	dcache_inval __P((pmap_t, vm_offset_t, vm_offset_t));
 void	dcache_blast __P((void));
 void	ecache_flush __P((vm_offset_t, vm_offset_t));
 #if 0
 void	ecache_inval_phys __P((vm_offset_t, vm_offset_t));
 #endif
 /*
 * Cache control information.
 */
 struct cacheinfo {
 	int	c_enabled;		/* true => cache is enabled */
 	int 	ic_size;		/* instruction cache */
 	int	ic_l2set;
 	int 	ic_assoc;
 	int 	ic_linesize;
 	int 	dc_size;		/* data cache */
 	int	dc_l2size;
 	int 	dc_assoc;
 	int 	dc_linesize;
 	int	ec_size;		/* external cache info */
 	int 	ec_assoc;
 	int	ec_l2set;
 	int	ec_linesize;
 	int	ec_l2linesize;
 };
 #endif	/* !_MACHINE_CACHE_H_ */
--- a/sys/sparc64/sparc64/cache.c
+++ b/sys/sparc64/sparc64/cache.c
@ -0,0 +1,405 @@
 /*
 * Copyright (c) 1996
 *	The President and Fellows of Harvard College. All rights reserved.
 * Copyright (c) 1992, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *	This product includes software developed by Harvard University.
 *	This product includes software developed by the University of
 *	California, Lawrence Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by Aaron Brown and
 *	Harvard University.
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 /*-
 * Copyright (c) 2001 by Thomas Moestl <tmm@FreeBSD.org>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *	from: @(#)cache.c	8.2 (Berkeley) 10/30/93
 *	from: NetBSD: cache.c,v 1.5 2000/12/06 01:47:50 mrg Exp
 *
 * $FreeBSD$
 */
 /*
 * Cache routines.
 *
 * UltraSPARCs have an virtually indexed, physically tagged (VIPT) level 1 data
 * cache (D$) and physically indexed, physically tagged (PIPT) level 1
 * instruction (I$) and Level 2 (E$) caches.
 * D$ is directly mapped, I$ is pseudo 2-way associative. The Level 2 cache (E$)
 * is documented to be directly mapped on the UltraSPARC IIi, but there are
 * apparently models (using the IIe version) that have a 4-way associative E$.
 *
 * D$ uses a write-through model, while E$ uses write-back and is
 * write-allocating. The lines present in D$ are forced to be a subset of those
 * in E$.
 * This means that lines that are present in D$ always have an identical
 * corresponding (sub-) line in E$.
 *
 * The term "main memory" is used in the following to refer to the non-cache
 * memory as well as to memory-mapped device i/o space.
 *
 * There are 3 documented ways to flush the D$ and E$ caches:
 * - displacement flushing (a sequence of loads of addresses that alias to
 *   to-be-flushed ones in the caches). This only works for directly mapped
 *   caches, and is recommended to flush D$ and E$ in the IIi manual. It is not
 *   used to flush E$ because of the aforementioned models that have a
 *   multiple-associative E$. Displacement flushing invalidates the cache
 *   entries and writes modified lines back to main memory.
 * - diagnostic acceses can be used to invalidate cache pages. All lines
 *   are discarded, which means that changes in D$/E$ that have not been
 *   committed to main memory are lost.
 * - block-commit stores. Those use the block transfer ASIs to load a
 *   64-byte block to a set of FP registers and store them back using a
 *   special ASI that will cause the data to be immediately committed to main
 *   memory. This method has the same properties as the first method, but
 *   (hopefully) works regardless of the associativity of the caches. It is
 *   expected to be slow.
 *
 * I$ can be handled using the flush instruction.
 *
 * Some usage guidelines:
 *
 * The inval functions are variants of the flush ones that discard modified
 * cache lines.
 * PCI DMA transactions are cache-coherent and do not require flushing
 * before DMA reads or after DMA writes. It is unclear from the manual
 * how far this applies to UPA transactions.
 *
 * icache_flush(): needed before code that has been written to memory is
 *	executed, because I$ is not necessarily consistent with D$, E$, or
 *	main memory. An exception is that I$ snoops DMA transfers, so no
 *	flush is required after to-be-executed data has been fetched this way.
 * icache_inval_phys(): has roughly same effect as icache_flush() since there
 *	are no writes to I$.
 *
 * dcache_flush(): required when a page mapping is changed from cacheable to
 *	noncacheable, or to resolve illegal aliases. Both cases should happen
 *	seldom. Mapping address changes do not require this, since D$ is VIPT.
 * dcache_inval(): has roughly same effect as dcache_flush() since D$ is
 *	write-through.
 * dcache_blast(): discards all lines in D$.
 *
 * ecache_flush(): needed to commit modified lines to main memory, and to make
 *	sure that no stale data is used when the main memory has changed without
 *	the cache controller noticing. This is e.g. needed for device i/o space.
 *	It is usually better to use a non-cacheable mapping in this case.
 *	ecache_flush() is guaranteed to also flush the relevant addresses out of
 *	D$.
 * ecache_inval_phys():  like ecache_flush(), but invalidates a physical range
 *	in the cache. This function is usually dangerous and should not be used.
 *
 * All operations have a line size granularity!
 *
 * All flush methods tend to be expensive, so unnecessary flushes should be
 * avoided.
 */
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/cache.h>
 #include <machine/cpufunc.h>
 #include <machine/fp.h>
 #include <machine/pcb.h>
 #include <machine/pmap.h>
 #include <machine/tte.h>
 #include <machine/vmparam.h>
 static struct cacheinfo cache;
 #define	CDIAG_CLR(asi, addr) \
 	__asm __volatile("stxa %%g0, [%0] %1" : : "r" (addr), "I" (asi))
 /* Read to %g0, needed for E$ access. */
 #define	CDIAG_RDG0(asi, addr) \
 	__asm __volatile("ldxa [%0] %1, %%g0" : : "r" (addr), "I" (asi))
 #define	CDIAG_RD(asi, addr, r) \
 	__asm __volatile("ldxa [%1] %2, %0" : "=r" (r) : "r" (addr), "I" (asi))
 /* Sigh. I$ diagnostic registers want ldda. */
 #define	ICDIAG_RD(asi, addr, r) \
 	__asm __volatile("ldda [%1] %2, %%o4; mov %%o5, %0" : "=r" (r) : \
 	    "r" (addr), "I" (asi) : "%o4", "%o5");
 #define	OF_GET(h, n, v)	OF_getprop((h), (n), &(v), sizeof(v))
 /*
 * Fill in the cache parameters using the cpu node.
 */
 void
 cache_init(phandle_t node)
 {
 	u_long set;
 	if (OF_GET(node, "icache-size", cache.ic_size) == -1 ||
 	    OF_GET(node, "icache-line-size", cache.ic_linesize) == -1 ||
 	    OF_GET(node, "icache-associativity", cache.ic_assoc) == -1 ||
 	    OF_GET(node, "dcache-size", cache.dc_size) == -1 ||
 	    OF_GET(node, "dcache-line-size", cache.dc_linesize) == -1 ||
 	    OF_GET(node, "dcache-associativity", cache.dc_assoc) == -1 ||
 	    OF_GET(node, "ecache-size", cache.ec_size) == -1 ||
 	    OF_GET(node, "ecache-line-size", cache.ec_linesize) == -1 ||
 	    OF_GET(node, "ecache-associativity", cache.ec_assoc) == -1)
 		panic("cache_init: could not retrieve cache parameters");
 	set = cache.ic_size / cache.ic_assoc;
 	cache.ic_l2set = ffs(set) - 1;
 	if ((set & ~(1UL << cache.ic_l2set)) != 0)
 		panic("cache_init: I$ set size not a power of 2");
 	cache.dc_l2size = ffs(cache.dc_size) - 1;
 	if ((cache.dc_size & ~(1UL << cache.dc_l2size)) != 0)
 		panic("cache_init: D$ size not a power of 2");
 	if (cache.dc_assoc != 1)
 		panic("cache_init: D$ is not directly mapped!");
 	set = cache.ec_size / cache.ec_assoc;
 	cache.ec_l2set = ffs(set) - 1;
 	if ((set & ~(1UL << cache.ec_l2set)) != 0)
 		panic("cache_init: E$ set size not a power of 2");
 	cache.c_enabled = 1; /* enable cache flushing */
 }
 /* Flush a range of addresses from I$ using the flush instruction. */
 void
 icache_flush(vm_offset_t start, vm_offset_t end)
 {
 	char *p, *ep;
 	int ls;
 	if (!cache.c_enabled)
 		return;
 	ls = cache.ic_linesize;
 	ep = (char *)ulmin(end, start + cache.ic_size);
 	for (p = (char *)start; p < ep; p += ls)
 		flush(p);
 }
 /*
 * Blast a I$ physical range using diagnostic accesses.
 * NOTE: there is a race between checking the tag and invalidating it. It
 * cannot be closed by disabling interrupts, since the fetch for the next
 * instruction may be in that line, so we don't even bother.
 * Since blasting a line does not discard data, this has no ill effect except
 * a minor slowdown.
 */
 void
 icache_inval_phys(vm_offset_t start, vm_offset_t end)
 {
 	vm_offset_t addr, ica;
 	u_long tag;
 	u_long j;
 	if (!cache.c_enabled)
 		return;
 	for (addr = start & ~(cache.ic_linesize - 1); addr <= end;
 	     addr += cache.ic_linesize) {
 		for (j = 0; j < 2; j++) {
 			ica = (addr & (cache.ic_size - 1)) | ICDA_SET(j);
 			ICDIAG_RD(ASI_ICACHE_TAG, ica, tag);
 			if (ICDT_TAG(tag) != addr >> cache.ic_l2set)
 				continue;
 			CDIAG_CLR(ASI_ICACHE_TAG, ica);
 		}
 	}
 }
 /*
 * Flush a range of addresses from D$ using displacement flushes. This does
 * not necessarily flush E$, because we do not take care of flushing the
 * correct physical colors and E$ may not be directly mapped.
 */
 void
 dcache_flush(vm_offset_t start, vm_offset_t end)
 {
 	int j;
 	vm_offset_t baseoff;
 	u_long i, mask;
 	char *kp;
 	if (!cache.c_enabled)
 		return;
 	mask = cache.dc_size - 1;
 	/* No need to flush lines more than once. */
 	baseoff = start & mask;
 	/*
 	 * Use a locked page for flushing. D$ should be smaller than 4M, which
 	 * is somewhat likely...
 	 */
 	kp = (char *)KERNBASE;
 	j = 0;
 	for (i = 0; i <= ulmin((end - start), cache.dc_size);
 	     i += cache.dc_linesize)
 		j += kp[(baseoff + i) & mask];
 }
 /*
 * Blast a D$ range using diagnostic accesses.
 * This has the same (harmless) races as icache_blast().
 * Assumes a page in the kernel map.
 */
 void
 dcache_inval(pmap_t pmap, vm_offset_t start, vm_offset_t end)
 {
 	vm_offset_t va, pa, offs, dca;
 	u_long tag;
 	if (!cache.c_enabled)
 		return;
 	for (va = start & ~(cache.dc_linesize - 1); va <= end;
 	     va = (va + PAGE_SIZE_MIN) & ~PAGE_MASK_MIN) {
 		if ((pa = pmap_extract(pmap, va)) == 0)
 			continue;
 		for (offs = start & PAGE_MASK_MIN;
 		     offs < ulmin(PAGE_SIZE_MIN, end - va + 1);
 		     offs += cache.dc_linesize) {
 			dca = (va + offs) & (cache.dc_size - 1);
 			CDIAG_RD(ASI_DCACHE_TAG, dca, tag);
 			if (DCDT_TAG(tag) != (pa + offs) >> PAGE_SHIFT_MIN)
 				continue;
 			CDIAG_CLR(ASI_DCACHE_TAG, dca);
 		}
 	}
 }
 /* Discard all lines in D$. */
 void
 dcache_blast()
 {
 	vm_offset_t dca;
 	if (!cache.c_enabled)
 		return;
 	for (dca = 0; dca < cache.dc_size; dca += cache.dc_linesize)
 		CDIAG_CLR(ASI_DCACHE_TAG, dca);
 }
 /* Flush a E$ physical range using block commit stores. */
 void
 ecache_flush(vm_offset_t start, vm_offset_t end)
 {
 	vm_offset_t addr;
 	if (!cache.c_enabled)
 		return;
 	/* XXX: not needed in all cases, provide a wrapper in fp.c */
 	savefpctx(&curthread->td_pcb->pcb_fpstate);
 	wr(fprs, 0, FPRS_FEF);
 	for (addr = start & ~(cache.ec_linesize - 1); addr <= end;
 	     addr += cache.ec_linesize) {
 		__asm __volatile("ldda [%0] %1, %%f0; membar #Sync; "
 		    "stda %%f0, [%0] %2" : : "r" (addr), "I" (ASI_BLK_S),
 		    "I" (ASI_BLK_COMMIT_S));
 	}
 	membar(Sync);
 	restorefpctx(&curthread->td_pcb->pcb_fpstate);
 }
 #if 0
 /*
 * Blast a E$ range using diagnostic accesses.
 * This is disabled: it suffers from the same races as dcache_blast() and
 * icache_blast_phys(), but they may be fatal here because blasting an E$ line
 * can discard modified data.
 * There is no really use for this anyway.
 */
 void
 ecache_inval_phys(vm_offset_t start, vm_offset_t end)
 {
 	vm_offset_t addr, eca;
 	critical_t c;
 	u_long tag, j;
 	if (!cache.c_enabled)
 		return;
 	for (addr = start & ~(cache.ec_linesize - 1); addr <= end;
 	     addr += cache.ec_linesize) {
 		for (j = 0; j < cache.ec_assoc; j++) {
 			/* XXX: guesswork... */
 			eca = (addr & (cache.ec_size - 1)) |
 			    (j << (cache.ec_l2set));
 			c = critical_enter();
 			/*
 			 * Retrieve the tag:
 			 * A read from the appropriate VA in ASI_ECACHE_R
 			 * will transfer the tag from the tag RAM to the
 			 * data register (ASI_ECACHE_TAG_DATA, VA 0).
 			 */
 			CDIAG_RDG0(ASI_ECACHE_R, ECDA_TAG | eca);
 			CDIAG_RD(ASI_ECACHE_TAG_DATA, 0, tag);
 			if ((addr & ~cache.ec_size) >> cache.ec_l2set ==
 			    (tag & ECDT_TAG_MASK)) {
 				/*
 				 * Clear. Works like retrieving the tag, but
 				 * the other way round.
 				 */
 				CDIAG_CLR(ASI_ECACHE_TAG_DATA, 0);
 				CDIAG_CLR(ASI_ECACHE_W, ECDA_TAG | eca);
 			}
 		}
 	}
 }
 #endif