From 2f9e4e8025d489474ae350ac90f08d1879050dc0 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 20 Aug 2001 00:41:12 +0000 Subject: [PATCH] Limit the amount of KVM reserved for the buffer cache and for swap-meta information. The default limits only effect machines with > 1GB of ram and can be overriden with two new kernel conf variables VM_SWZONE_SIZE_MAX and VM_BCACHE_SIZE_MAX, or with loader variables kern.maxswzone and kern.maxbcache. This has the effect of leaving more KVM available for sizing NMBCLUSTERS and 'maxusers' and should avoid tripups where a sysad adds memory to a machine and then sees the kernel panic on boot due to running out of KVM. Also change the default swap-meta auto-sizing calculation to allocate half of what it was previously allocating. The prior defaults were way too high. Note that we cannot afford to run out of swap-meta structures so we still stay somewhat conservative here. --- sys/amd64/amd64/machdep.c | 7 ++++++- sys/boot/common/loader.8 | 27 +++++++++++++++++++++++++++ sys/conf/options | 2 ++ sys/i386/i386/machdep.c | 7 ++++++- sys/i386/include/param.h | 16 ++++++++++++++++ sys/kern/subr_param.c | 6 ++++++ sys/sys/buf.h | 2 ++ sys/vm/swap_pager.c | 7 +++++-- 8 files changed, 70 insertions(+), 4 deletions(-) diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 489e27787462..7bc1d3dffd56 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -299,7 +299,9 @@ cpu_startup(dummy) * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to * cover 1/4 of our ram. Beyond the first 64MB allocate additional - * buffers to cover 1/20 of our ram over 64MB. + * buffers to cover 1/20 of our ram over 64MB. When auto-sizing + * the buffer cache we limit the eventual kva reservation to + * maxbcache bytes. * * factor represents the 1/4 x ram conversion. */ @@ -312,6 +314,9 @@ cpu_startup(dummy) 16384 / factor); if (physmem_est > 16384) nbuf += (physmem_est - 16384) * 2 / (factor * 5); + + if (maxbcache && nbuf > physmem_est / BKVASIZE) + nbuf = maxbcache / BKVASIZE; } /* diff --git a/sys/boot/common/loader.8 b/sys/boot/common/loader.8 index 730f925ae8d5..2ee850b0d461 100644 --- a/sys/boot/common/loader.8 +++ b/sys/boot/common/loader.8 @@ -409,6 +409,33 @@ This overrides completely the value determined when the kernel was compiled. Modifies .Va VM_KMEM_SIZE . +.It Va kern.maxswzone +Limits the amount of KVM to be used to hold swap +meta information, which directly governs the +maximum amount of swap the system can support. +This value is specified in bytes of KVA space +and defaults to around 70MBytes. Care should be taken +to not reduce this value such that the actual +amount of configured swap exceeds 1/2 the +kernel-supported swap. The default 70MB allows +the kernel to support a maximum of (approximately) +14GB of configured swap. Only mess around with +this parameter if you need to greatly extend the +KVM reservation for other resources such as the +buffer cache or NMBCLUSTERS. Modifies +.Va VM_SWZONE_SIZE_MAX +.It Va kern.maxbcache +Limits the amount of KVM reserved for use by the +buffer cache, specified in bytes. The default +maximum is 200MB. This parameter is used to +prevent the buffer cache from eating to much +KVM in large-memory machine configurations. +Only mess around with this parameter if you need to +greatly extend the KVM reservation for other resources +such as the swap zone or NMBCLUSTERS. Note that +the NBUF parameter will override this limit. +Modifies +.Va VM_BCACHE_SIZE_MAX .It Va machdep.pccard.pcic_irq Overrides the IRQ normally assigned to a PCCARD controller. Typically the first available interrupt will be allocated, diff --git a/sys/conf/options b/sys/conf/options index 9721faa20624..6e62f52c87cd 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -173,6 +173,8 @@ MAXFILES opt_param.h NBUF opt_param.h NMBCLUSTERS opt_param.h NSFBUFS opt_param.h +VM_BCACHE_SIZE_MAX opt_param.h +VM_SWZONE_SIZE_MAX opt_param.h MAXUSERS # Generic SCSI options. diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 489e27787462..7bc1d3dffd56 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -299,7 +299,9 @@ cpu_startup(dummy) * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to * cover 1/4 of our ram. Beyond the first 64MB allocate additional - * buffers to cover 1/20 of our ram over 64MB. + * buffers to cover 1/20 of our ram over 64MB. When auto-sizing + * the buffer cache we limit the eventual kva reservation to + * maxbcache bytes. * * factor represents the 1/4 x ram conversion. */ @@ -312,6 +314,9 @@ cpu_startup(dummy) 16384 / factor); if (physmem_est > 16384) nbuf += (physmem_est - 16384) * 2 / (factor * 5); + + if (maxbcache && nbuf > physmem_est / BKVASIZE) + nbuf = maxbcache / BKVASIZE; } /* diff --git a/sys/i386/include/param.h b/sys/i386/include/param.h index dffc6485be4f..624138944889 100644 --- a/sys/i386/include/param.h +++ b/sys/i386/include/param.h @@ -112,6 +112,22 @@ #define IOPAGES 2 /* pages of i/o permission bitmap */ #define UPAGES 2 /* pages of u-area */ +/* + * Ceiling on amount of swblock kva space. + */ +#ifndef VM_SWZONE_SIZE_MAX +#define VM_SWZONE_SIZE_MAX (70 * 1024 * 1024) +#endif + +/* + * Ceiling on size of buffer cache (really only effects write queueing, + * the VM page cache is not effected). + */ +#ifndef VM_BCACHE_SIZE_MAX +#define VM_BCACHE_SIZE_MAX (200 * 1024 * 1024) +#endif + + /* * Constants related to network buffer management. * MCLBYTES must be no larger than PAGE_SIZE. diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c index 272ca3c13a19..4f755badf2c0 100644 --- a/sys/kern/subr_param.c +++ b/sys/kern/subr_param.c @@ -72,6 +72,8 @@ int maxfilesperproc; /* per-proc open files limit */ int ncallout; /* maximum # of timer events */ int nbuf; int nswbuf; +int maxswzone; /* max swmeta KVA storage */ +int maxbcache; /* max buffer cache KVA storage */ /* * These have to be allocated somewhere; allocating @@ -114,6 +116,10 @@ init_param(void) /* Cannot be changed after boot */ nbuf = NBUF; TUNABLE_INT_FETCH("kern.nbuf", &nbuf); + maxswzone = VM_SWZONE_SIZE_MAX; + TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone); + maxbcache = VM_BCACHE_SIZE_MAX; + TUNABLE_INT_FETCH("kern.maxbcache", &maxbcache); ncallout = 16 + maxproc + maxfiles; TUNABLE_INT_FETCH("kern.ncallout", &ncallout); } diff --git a/sys/sys/buf.h b/sys/sys/buf.h index b285db226952..30fcd13ac271 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -499,6 +499,8 @@ buf_countdeps(struct buf *bp, int i) #ifdef _KERNEL extern int nbuf; /* The number of buffer headers */ +extern int maxswzone; /* Max KVA for swap structures */ +extern int maxbcache; /* Max KVA for buffer cache */ extern int runningbufspace; extern int buf_maxio; /* nominal maximum I/O for buffer */ extern struct buf *buf; /* The buffer headers. */ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index f2d606185095..e25a5567cad1 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -323,10 +323,13 @@ swap_pager_swap_init() /* * Initialize our zone. Right now I'm just guessing on the number * we need based on the number of pages in the system. Each swblock - * can hold 16 pages, so this is probably overkill. + * can hold 16 pages, so this is probably overkill. This reservation + * is typically limited to around 70MB by default. */ - n = min(cnt.v_page_count, (kernel_map->max_offset - kernel_map->min_offset) / PAGE_SIZE) * 2; + n = cnt.v_page_count; + if (maxswzone && n > maxswzone / sizeof(struct swblock)) + n = maxswzone / sizeof(struct swblock); n2 = n; do {