mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-20 15:43:16 +00:00
Implement the concept of the unmapped VMIO buffers, i.e. buffers which
do not map the b_pages pages into buffer_map KVA. The use of the unmapped buffers eliminate the need to perform TLB shootdown for mapping on the buffer creation and reuse, greatly reducing the amount of IPIs for shootdown on big-SMP machines and eliminating up to 25-30% of the system time on i/o intensive workloads. The unmapped buffer should be explicitely requested by the GB_UNMAPPED flag by the consumer. For unmapped buffer, no KVA reservation is performed at all. The consumer might request unmapped buffer which does have a KVA reserve, to manually map it without recursing into buffer cache and blocking, with the GB_KVAALLOC flag. When the mapped buffer is requested and unmapped buffer already exists, the cache performs an upgrade, possibly reusing the KVA reservation. Unmapped buffer is translated into unmapped bio in g_vfs_strategy(). Unmapped bio carry a pointer to the vm_page_t array, offset and length instead of the data pointer. The provider which processes the bio should explicitely specify a readiness to accept unmapped bio, otherwise g_down geom thread performs the transient upgrade of the bio request by mapping the pages into the new bio_transient_map KVA submap. The bio_transient_map submap claims up to 10% of the buffer map, and the total buffer_map + bio_transient_map KVA usage stays the same. Still, it could be manually tuned by kern.bio_transient_maxcnt tunable, in the units of the transient mappings. Eventually, the bio_transient_map could be removed after all geom classes and drivers can accept unmapped i/o requests. Unmapped support can be turned off by the vfs.unmapped_buf_allowed tunable, disabling which makes the buffer (or cluster) creation requests to ignore GB_UNMAPPED and GB_KVAALLOC flags. Unmapped buffers are only enabled by default on the architectures where pmap_copy_page() was implemented and tested. In the rework, filesystem metadata is not the subject to maxbufspace limit anymore. Since the metadata buffers are always mapped, the buffers still have to fit into the buffer map, which provides a reasonable (but practically unreachable) upper bound on it. The non-metadata buffer allocations, both mapped and unmapped, is accounted against maxbufspace, as before. Effectively, this means that the maxbufspace is forced on mapped and unmapped buffers separately. The pre-patch bufspace limiting code did not worked, because buffer_map fragmentation does not allow the limit to be reached. By Jeff Roberson request, the getnewbuf() function was split into smaller single-purpose functions. Sponsored by: The FreeBSD Foundation Discussed with: jeff (previous version) Tested by: pho, scottl (previous version), jhb, bf MFC after: 2 weeks
This commit is contained in:
parent
093012686d
commit
ee75e7de7b
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=248508
@ -4235,6 +4235,8 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
|
||||
pagecopy((void *)src, (void *)dst);
|
||||
}
|
||||
|
||||
int unmapped_buf_allowed = 1;
|
||||
|
||||
void
|
||||
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
|
||||
vm_offset_t b_offset, int xfersize)
|
||||
|
@ -3312,6 +3312,8 @@ pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
|
||||
mtx_unlock(&cmtx);
|
||||
}
|
||||
|
||||
int unmapped_buf_allowed = 1;
|
||||
|
||||
void
|
||||
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
|
||||
vm_offset_t b_offset, int xfersize)
|
||||
|
@ -4428,6 +4428,8 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
|
||||
#endif
|
||||
}
|
||||
|
||||
int unmapped_buf_allowed = 1;
|
||||
|
||||
void
|
||||
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
|
||||
vm_offset_t b_offset, int xfersize)
|
||||
|
@ -205,6 +205,7 @@ struct g_provider {
|
||||
u_int flags;
|
||||
#define G_PF_WITHER 0x2
|
||||
#define G_PF_ORPHAN 0x4
|
||||
#define G_PF_ACCEPT_UNMAPPED 0x8
|
||||
|
||||
/* Two fields for the implementing class to use */
|
||||
void *private;
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*-
|
||||
* Copyright (c) 2002 Poul-Henning Kamp
|
||||
* Copyright (c) 2002 Networks Associates Technology, Inc.
|
||||
* Copyright (c) 2013 The FreeBSD Foundation
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software was developed for the FreeBSD Project by Poul-Henning Kamp
|
||||
@ -8,6 +9,9 @@
|
||||
* under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
|
||||
* DARPA CHATS research program.
|
||||
*
|
||||
* Portions of this software were developed by Konstantin Belousov
|
||||
* under sponsorship from the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
@ -44,6 +48,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/ktr.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/stack.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <sys/errno.h>
|
||||
#include <geom/geom.h>
|
||||
@ -51,6 +56,13 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/devicestat.h>
|
||||
|
||||
#include <vm/uma.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/vm_kern.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_map.h>
|
||||
|
||||
static struct g_bioq g_bio_run_down;
|
||||
static struct g_bioq g_bio_run_up;
|
||||
@ -180,12 +192,17 @@ g_clone_bio(struct bio *bp)
|
||||
/*
|
||||
* BIO_ORDERED flag may be used by disk drivers to enforce
|
||||
* ordering restrictions, so this flag needs to be cloned.
|
||||
* BIO_UNMAPPED should be inherited, to properly indicate
|
||||
* which way the buffer is passed.
|
||||
* Other bio flags are not suitable for cloning.
|
||||
*/
|
||||
bp2->bio_flags = bp->bio_flags & BIO_ORDERED;
|
||||
bp2->bio_flags = bp->bio_flags & (BIO_ORDERED | BIO_UNMAPPED);
|
||||
bp2->bio_length = bp->bio_length;
|
||||
bp2->bio_offset = bp->bio_offset;
|
||||
bp2->bio_data = bp->bio_data;
|
||||
bp2->bio_ma = bp->bio_ma;
|
||||
bp2->bio_ma_n = bp->bio_ma_n;
|
||||
bp2->bio_ma_offset = bp->bio_ma_offset;
|
||||
bp2->bio_attribute = bp->bio_attribute;
|
||||
/* Inherit classification info from the parent */
|
||||
bp2->bio_classifier1 = bp->bio_classifier1;
|
||||
@ -210,11 +227,15 @@ g_duplicate_bio(struct bio *bp)
|
||||
struct bio *bp2;
|
||||
|
||||
bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
|
||||
bp2->bio_flags = bp->bio_flags & BIO_UNMAPPED;
|
||||
bp2->bio_parent = bp;
|
||||
bp2->bio_cmd = bp->bio_cmd;
|
||||
bp2->bio_length = bp->bio_length;
|
||||
bp2->bio_offset = bp->bio_offset;
|
||||
bp2->bio_data = bp->bio_data;
|
||||
bp2->bio_ma = bp->bio_ma;
|
||||
bp2->bio_ma_n = bp->bio_ma_n;
|
||||
bp2->bio_ma_offset = bp->bio_ma_offset;
|
||||
bp2->bio_attribute = bp->bio_attribute;
|
||||
bp->bio_children++;
|
||||
#ifdef KTR
|
||||
@ -575,6 +596,83 @@ g_io_deliver(struct bio *bp, int error)
|
||||
return;
|
||||
}
|
||||
|
||||
SYSCTL_DECL(_kern_geom);
|
||||
|
||||
static long transient_maps;
|
||||
SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD,
|
||||
&transient_maps, 0,
|
||||
"Total count of the transient mapping requests");
|
||||
u_int transient_map_retries = 10;
|
||||
SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW,
|
||||
&transient_map_retries, 0,
|
||||
"Max count of retries used before giving up on creating transient map");
|
||||
int transient_map_hard_failures;
|
||||
SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD,
|
||||
&transient_map_hard_failures, 0,
|
||||
"Failures to establish the transient mapping due to retry attempts "
|
||||
"exhausted");
|
||||
int transient_map_soft_failures;
|
||||
SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD,
|
||||
&transient_map_soft_failures, 0,
|
||||
"Count of retried failures to establish the transient mapping");
|
||||
int inflight_transient_maps;
|
||||
SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD,
|
||||
&inflight_transient_maps, 0,
|
||||
"Current count of the active transient maps");
|
||||
|
||||
static int
|
||||
g_io_transient_map_bio(struct bio *bp)
|
||||
{
|
||||
vm_offset_t addr;
|
||||
long size;
|
||||
u_int retried;
|
||||
int rv;
|
||||
|
||||
size = round_page(bp->bio_ma_offset + bp->bio_length);
|
||||
KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp));
|
||||
addr = 0;
|
||||
retried = 0;
|
||||
atomic_add_long(&transient_maps, 1);
|
||||
retry:
|
||||
vm_map_lock(bio_transient_map);
|
||||
if (vm_map_findspace(bio_transient_map, vm_map_min(bio_transient_map),
|
||||
size, &addr)) {
|
||||
vm_map_unlock(bio_transient_map);
|
||||
if (transient_map_retries != 0 &&
|
||||
retried >= transient_map_retries) {
|
||||
g_io_deliver(bp, EDEADLK/* XXXKIB */);
|
||||
CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
|
||||
bp, bp->bio_to->name);
|
||||
atomic_add_int(&transient_map_hard_failures, 1);
|
||||
return (1);
|
||||
} else {
|
||||
/*
|
||||
* Naive attempt to quisce the I/O to get more
|
||||
* in-flight requests completed and defragment
|
||||
* the bio_transient_map.
|
||||
*/
|
||||
CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
|
||||
bp, bp->bio_to->name, retried);
|
||||
pause("g_d_tra", hz / 10);
|
||||
retried++;
|
||||
atomic_add_int(&transient_map_soft_failures, 1);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
rv = vm_map_insert(bio_transient_map, NULL, 0, addr, addr + size,
|
||||
VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
|
||||
KASSERT(rv == KERN_SUCCESS,
|
||||
("vm_map_insert(bio_transient_map) rv %d %jx %lx",
|
||||
rv, (uintmax_t)addr, size));
|
||||
vm_map_unlock(bio_transient_map);
|
||||
atomic_add_int(&inflight_transient_maps, 1);
|
||||
pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
|
||||
bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
|
||||
bp->bio_flags |= BIO_TRANSIENT_MAPPING;
|
||||
bp->bio_flags &= ~BIO_UNMAPPED;
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
g_io_schedule_down(struct thread *tp __unused)
|
||||
{
|
||||
@ -636,6 +734,12 @@ g_io_schedule_down(struct thread *tp __unused)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
|
||||
(bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
|
||||
(bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
|
||||
if (g_io_transient_map_bio(bp))
|
||||
continue;
|
||||
}
|
||||
THREAD_NO_SLEEPING();
|
||||
CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
|
||||
"len %ld", bp, bp->bio_to->name, bp->bio_offset,
|
||||
|
@ -188,14 +188,14 @@ g_vfs_strategy(struct bufobj *bo, struct buf *bp)
|
||||
bip = g_alloc_bio();
|
||||
bip->bio_cmd = bp->b_iocmd;
|
||||
bip->bio_offset = bp->b_iooffset;
|
||||
bip->bio_data = bp->b_data;
|
||||
bip->bio_done = g_vfs_done;
|
||||
bip->bio_caller2 = bp;
|
||||
bip->bio_length = bp->b_bcount;
|
||||
if (bp->b_flags & B_BARRIER) {
|
||||
bdata2bio(bp, bip);
|
||||
if ((bp->b_flags & B_BARRIER) != 0) {
|
||||
bip->bio_flags |= BIO_ORDERED;
|
||||
bp->b_flags &= ~B_BARRIER;
|
||||
}
|
||||
bip->bio_done = g_vfs_done;
|
||||
bip->bio_caller2 = bp;
|
||||
g_io_request(bip, cp);
|
||||
}
|
||||
|
||||
|
@ -4205,6 +4205,8 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
|
||||
mtx_unlock(&sysmaps->lock);
|
||||
}
|
||||
|
||||
int unmapped_buf_allowed = 1;
|
||||
|
||||
void
|
||||
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
|
||||
vm_offset_t b_offset, int xfersize)
|
||||
|
@ -3448,6 +3448,8 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
|
||||
mtx_unlock(&sysmaps->lock);
|
||||
}
|
||||
|
||||
int unmapped_buf_allowed = 1;
|
||||
|
||||
void
|
||||
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
|
||||
vm_offset_t b_offset, int xfersize)
|
||||
|
@ -2014,6 +2014,8 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
|
||||
bcopy(src, dst, PAGE_SIZE);
|
||||
}
|
||||
|
||||
int unmapped_buf_allowed;
|
||||
|
||||
void
|
||||
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
|
||||
vm_offset_t b_offset, int xfersize)
|
||||
|
@ -126,11 +126,28 @@ static int
|
||||
_bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio,
|
||||
int *nsegs, int flags)
|
||||
{
|
||||
int error;
|
||||
vm_paddr_t paddr;
|
||||
bus_size_t len, tlen;
|
||||
int error, i, ma_offs;
|
||||
|
||||
error = _bus_dmamap_load_buffer(dmat, map, bio->bio_data,
|
||||
bio->bio_bcount, kernel_pmap, flags, NULL, nsegs);
|
||||
if ((bio->bio_flags & BIO_UNMAPPED) == 0) {
|
||||
error = _bus_dmamap_load_buffer(dmat, map, bio->bio_data,
|
||||
bio->bio_bcount, kernel_pmap, flags, NULL, nsegs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
error = 0;
|
||||
tlen = bio->bio_bcount;
|
||||
ma_offs = bio->bio_ma_offset;
|
||||
for (i = 0; tlen > 0; i++, tlen -= len) {
|
||||
len = min(PAGE_SIZE - ma_offs, tlen);
|
||||
paddr = VM_PAGE_TO_PHYS(bio->bio_ma[i]) + ma_offs;
|
||||
error = _bus_dmamap_load_phys(dmat, map, paddr, len,
|
||||
flags, NULL, nsegs);
|
||||
if (error != 0)
|
||||
break;
|
||||
ma_offs = 0;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
@ -92,6 +92,7 @@ int maxfiles; /* sys. wide open files limit */
|
||||
int maxfilesperproc; /* per-proc open files limit */
|
||||
int msgbufsize; /* size of kernel message buffer */
|
||||
int nbuf;
|
||||
int bio_transient_maxcnt;
|
||||
int ngroups_max; /* max # groups per process */
|
||||
int nswbuf;
|
||||
pid_t pid_max = PID_MAX;
|
||||
@ -118,6 +119,9 @@ SYSCTL_LONG(_kern, OID_AUTO, maxswzone, CTLFLAG_RDTUN, &maxswzone, 0,
|
||||
"Maximum memory for swap metadata");
|
||||
SYSCTL_LONG(_kern, OID_AUTO, maxbcache, CTLFLAG_RDTUN, &maxbcache, 0,
|
||||
"Maximum value of vfs.maxbufspace");
|
||||
SYSCTL_INT(_kern, OID_AUTO, bio_transient_maxcnt, CTLFLAG_RDTUN,
|
||||
&bio_transient_maxcnt, 0,
|
||||
"Maximum number of transient BIOs mappings");
|
||||
SYSCTL_ULONG(_kern, OID_AUTO, maxtsiz, CTLFLAG_RW | CTLFLAG_TUN, &maxtsiz, 0,
|
||||
"Maximum text size");
|
||||
SYSCTL_ULONG(_kern, OID_AUTO, dfldsiz, CTLFLAG_RW | CTLFLAG_TUN, &dfldsiz, 0,
|
||||
@ -266,6 +270,8 @@ init_param1(void)
|
||||
pid_max = PID_MAX;
|
||||
else if (pid_max < 300)
|
||||
pid_max = 300;
|
||||
|
||||
TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed", &unmapped_buf_allowed);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -322,6 +328,7 @@ init_param2(long physpages)
|
||||
*/
|
||||
nbuf = NBUF;
|
||||
TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
|
||||
TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt);
|
||||
|
||||
/*
|
||||
* The default for maxpipekva is min(1/64 of the kernel address space,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -61,11 +61,11 @@ SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0,
|
||||
|
||||
static MALLOC_DEFINE(M_SEGMENT, "cl_savebuf", "cluster_save buffer");
|
||||
|
||||
static struct cluster_save *
|
||||
cluster_collectbufs(struct vnode *vp, struct buf *last_bp);
|
||||
static struct buf *
|
||||
cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
|
||||
daddr_t blkno, long size, int run, struct buf *fbp);
|
||||
static struct cluster_save *cluster_collectbufs(struct vnode *vp,
|
||||
struct buf *last_bp, int gbflags);
|
||||
static struct buf *cluster_rbuild(struct vnode *vp, u_quad_t filesize,
|
||||
daddr_t lbn, daddr_t blkno, long size, int run, int gbflags,
|
||||
struct buf *fbp);
|
||||
static void cluster_callback(struct buf *);
|
||||
|
||||
static int write_behind = 1;
|
||||
@ -97,6 +97,8 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
|
||||
|
||||
error = 0;
|
||||
bo = &vp->v_bufobj;
|
||||
if (!unmapped_buf_allowed)
|
||||
gbflags &= ~GB_UNMAPPED;
|
||||
|
||||
/*
|
||||
* Try to limit the amount of read-ahead by a few
|
||||
@ -112,7 +114,7 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
|
||||
/*
|
||||
* get the requested block
|
||||
*/
|
||||
*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, 0);
|
||||
*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags);
|
||||
origblkno = lblkno;
|
||||
|
||||
/*
|
||||
@ -203,7 +205,7 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
|
||||
if (ncontig < nblks)
|
||||
nblks = ncontig;
|
||||
bp = cluster_rbuild(vp, filesize, lblkno,
|
||||
blkno, size, nblks, bp);
|
||||
blkno, size, nblks, gbflags, bp);
|
||||
lblkno += (bp->b_bufsize / size);
|
||||
} else {
|
||||
bp->b_flags |= B_RAM;
|
||||
@ -247,14 +249,14 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
|
||||
if (ncontig) {
|
||||
ncontig = min(ncontig + 1, racluster);
|
||||
rbp = cluster_rbuild(vp, filesize, lblkno, blkno,
|
||||
size, ncontig, NULL);
|
||||
size, ncontig, gbflags, NULL);
|
||||
lblkno += (rbp->b_bufsize / size);
|
||||
if (rbp->b_flags & B_DELWRI) {
|
||||
bqrelse(rbp);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
rbp = getblk(vp, lblkno, size, 0, 0, 0);
|
||||
rbp = getblk(vp, lblkno, size, 0, 0, gbflags);
|
||||
lblkno += 1;
|
||||
if (rbp->b_flags & B_DELWRI) {
|
||||
bqrelse(rbp);
|
||||
@ -293,14 +295,8 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
|
||||
* and then parcel them up into logical blocks in the buffer hash table.
|
||||
*/
|
||||
static struct buf *
|
||||
cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
|
||||
struct vnode *vp;
|
||||
u_quad_t filesize;
|
||||
daddr_t lbn;
|
||||
daddr_t blkno;
|
||||
long size;
|
||||
int run;
|
||||
struct buf *fbp;
|
||||
cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
|
||||
daddr_t blkno, long size, int run, int gbflags, struct buf *fbp)
|
||||
{
|
||||
struct bufobj *bo;
|
||||
struct buf *bp, *tbp;
|
||||
@ -324,7 +320,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
|
||||
tbp = fbp;
|
||||
tbp->b_iocmd = BIO_READ;
|
||||
} else {
|
||||
tbp = getblk(vp, lbn, size, 0, 0, 0);
|
||||
tbp = getblk(vp, lbn, size, 0, 0, gbflags);
|
||||
if (tbp->b_flags & B_CACHE)
|
||||
return tbp;
|
||||
tbp->b_flags |= B_ASYNC | B_RAM;
|
||||
@ -345,9 +341,14 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
|
||||
* address may not be either. Inherit the b_data offset
|
||||
* from the original buffer.
|
||||
*/
|
||||
bp->b_data = (char *)((vm_offset_t)bp->b_data |
|
||||
((vm_offset_t)tbp->b_data & PAGE_MASK));
|
||||
bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO;
|
||||
if ((gbflags & GB_UNMAPPED) != 0) {
|
||||
bp->b_flags |= B_UNMAPPED;
|
||||
bp->b_data = unmapped_buf;
|
||||
} else {
|
||||
bp->b_data = (char *)((vm_offset_t)bp->b_data |
|
||||
((vm_offset_t)tbp->b_data & PAGE_MASK));
|
||||
}
|
||||
bp->b_iocmd = BIO_READ;
|
||||
bp->b_iodone = cluster_callback;
|
||||
bp->b_blkno = blkno;
|
||||
@ -371,7 +372,8 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
|
||||
break;
|
||||
}
|
||||
|
||||
tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT);
|
||||
tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT |
|
||||
(gbflags & GB_UNMAPPED));
|
||||
|
||||
/* Don't wait around for locked bufs. */
|
||||
if (tbp == NULL)
|
||||
@ -493,8 +495,10 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
|
||||
bp->b_bufsize, bp->b_kvasize);
|
||||
bp->b_kvasize = bp->b_bufsize;
|
||||
|
||||
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
|
||||
(vm_page_t *)bp->b_pages, bp->b_npages);
|
||||
if ((bp->b_flags & B_UNMAPPED) == 0) {
|
||||
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
|
||||
(vm_page_t *)bp->b_pages, bp->b_npages);
|
||||
}
|
||||
return (bp);
|
||||
}
|
||||
|
||||
@ -517,7 +521,10 @@ cluster_callback(bp)
|
||||
if (bp->b_ioflags & BIO_ERROR)
|
||||
error = bp->b_error;
|
||||
|
||||
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
|
||||
if ((bp->b_flags & B_UNMAPPED) == 0) {
|
||||
pmap_qremove(trunc_page((vm_offset_t) bp->b_data),
|
||||
bp->b_npages);
|
||||
}
|
||||
/*
|
||||
* Move memory from the large cluster buffer into the component
|
||||
* buffers and mark IO as done on these.
|
||||
@ -559,7 +566,8 @@ cluster_callback(bp)
|
||||
*/
|
||||
|
||||
static __inline int
|
||||
cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len)
|
||||
cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len,
|
||||
int gbflags)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
@ -570,7 +578,7 @@ cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len)
|
||||
start_lbn -= len;
|
||||
/* FALLTHROUGH */
|
||||
case 1:
|
||||
r = cluster_wbuild(vp, size, start_lbn, len, 0);
|
||||
r = cluster_wbuild(vp, size, start_lbn, len, gbflags);
|
||||
/* FALLTHROUGH */
|
||||
default:
|
||||
/* FALLTHROUGH */
|
||||
@ -598,6 +606,9 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount,
|
||||
int lblocksize;
|
||||
int async;
|
||||
|
||||
if (!unmapped_buf_allowed)
|
||||
gbflags &= ~GB_UNMAPPED;
|
||||
|
||||
if (vp->v_type == VREG) {
|
||||
async = DOINGASYNC(vp);
|
||||
lblocksize = vp->v_mount->mnt_stat.f_iosize;
|
||||
@ -637,13 +648,13 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount,
|
||||
lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
|
||||
if (!async && seqcount > 0) {
|
||||
cluster_wbuild_wb(vp, lblocksize,
|
||||
vp->v_cstart, cursize);
|
||||
vp->v_cstart, cursize, gbflags);
|
||||
}
|
||||
} else {
|
||||
struct buf **bpp, **endbp;
|
||||
struct cluster_save *buflist;
|
||||
|
||||
buflist = cluster_collectbufs(vp, bp);
|
||||
buflist = cluster_collectbufs(vp, bp, gbflags);
|
||||
endbp = &buflist->bs_children
|
||||
[buflist->bs_nchildren - 1];
|
||||
if (VOP_REALLOCBLKS(vp, buflist)) {
|
||||
@ -662,7 +673,7 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount,
|
||||
if (seqcount > 1) {
|
||||
cluster_wbuild_wb(vp,
|
||||
lblocksize, vp->v_cstart,
|
||||
cursize);
|
||||
cursize, gbflags);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
@ -710,8 +721,10 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount,
|
||||
* update daemon handle it.
|
||||
*/
|
||||
bdwrite(bp);
|
||||
if (seqcount > 1)
|
||||
cluster_wbuild_wb(vp, lblocksize, vp->v_cstart, vp->v_clen + 1);
|
||||
if (seqcount > 1) {
|
||||
cluster_wbuild_wb(vp, lblocksize, vp->v_cstart,
|
||||
vp->v_clen + 1, gbflags);
|
||||
}
|
||||
vp->v_clen = 0;
|
||||
vp->v_cstart = lbn + 1;
|
||||
} else if (vm_page_count_severe()) {
|
||||
@ -746,6 +759,9 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
|
||||
int totalwritten = 0;
|
||||
int dbsize = btodb(size);
|
||||
|
||||
if (!unmapped_buf_allowed)
|
||||
gbflags &= ~GB_UNMAPPED;
|
||||
|
||||
bo = &vp->v_bufobj;
|
||||
while (len > 0) {
|
||||
/*
|
||||
@ -824,10 +840,16 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
|
||||
* address may not be either. Inherit the b_data offset
|
||||
* from the original buffer.
|
||||
*/
|
||||
bp->b_data = (char *)((vm_offset_t)bp->b_data |
|
||||
((vm_offset_t)tbp->b_data & PAGE_MASK));
|
||||
bp->b_flags |= B_CLUSTER |
|
||||
(tbp->b_flags & (B_VMIO | B_NEEDCOMMIT));
|
||||
if ((gbflags & GB_UNMAPPED) == 0 ||
|
||||
(tbp->b_flags & B_VMIO) == 0) {
|
||||
bp->b_data = (char *)((vm_offset_t)bp->b_data |
|
||||
((vm_offset_t)tbp->b_data & PAGE_MASK));
|
||||
} else {
|
||||
bp->b_flags |= B_UNMAPPED;
|
||||
bp->b_data = unmapped_buf;
|
||||
}
|
||||
bp->b_flags |= B_CLUSTER | (tbp->b_flags & (B_VMIO |
|
||||
B_NEEDCOMMIT));
|
||||
bp->b_iodone = cluster_callback;
|
||||
pbgetvp(vp, bp);
|
||||
/*
|
||||
@ -954,8 +976,10 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
|
||||
tbp, b_cluster.cluster_entry);
|
||||
}
|
||||
finishcluster:
|
||||
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
|
||||
(vm_page_t *) bp->b_pages, bp->b_npages);
|
||||
if ((bp->b_flags & B_UNMAPPED) == 0) {
|
||||
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
|
||||
(vm_page_t *)bp->b_pages, bp->b_npages);
|
||||
}
|
||||
if (bp->b_bufsize > bp->b_kvasize)
|
||||
panic(
|
||||
"cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
|
||||
@ -976,9 +1000,7 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
|
||||
* Plus add one additional buffer.
|
||||
*/
|
||||
static struct cluster_save *
|
||||
cluster_collectbufs(vp, last_bp)
|
||||
struct vnode *vp;
|
||||
struct buf *last_bp;
|
||||
cluster_collectbufs(struct vnode *vp, struct buf *last_bp, int gbflags)
|
||||
{
|
||||
struct cluster_save *buflist;
|
||||
struct buf *bp;
|
||||
@ -991,7 +1013,8 @@ cluster_collectbufs(vp, last_bp)
|
||||
buflist->bs_nchildren = 0;
|
||||
buflist->bs_children = (struct buf **) (buflist + 1);
|
||||
for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) {
|
||||
(void) bread(vp, lbn, last_bp->b_bcount, NOCRED, &bp);
|
||||
(void)bread_gb(vp, lbn, last_bp->b_bcount, NOCRED,
|
||||
gbflags, &bp);
|
||||
buflist->bs_children[i] = bp;
|
||||
if (bp->b_blkno == bp->b_lblkno)
|
||||
VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
|
||||
|
@ -2576,6 +2576,8 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
|
||||
}
|
||||
}
|
||||
|
||||
int unmapped_buf_allowed;
|
||||
|
||||
void
|
||||
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
|
||||
vm_offset_t b_offset, int xfersize)
|
||||
|
@ -648,6 +648,14 @@ moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart,
|
||||
moea64_kenter(mmup, pa, pa);
|
||||
}
|
||||
ENABLE_TRANS(msr);
|
||||
|
||||
/*
|
||||
* Allow user to override unmapped_buf_allowed for testing.
|
||||
* XXXKIB Only direct map implementation was tested.
|
||||
*/
|
||||
if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed",
|
||||
&unmapped_buf_allowed))
|
||||
unmapped_buf_allowed = hw_direct_map;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -574,3 +574,5 @@ pmap_mmu_install(char *name, int prio)
|
||||
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
int unmapped_buf_allowed;
|
||||
|
@ -1918,6 +1918,8 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
|
||||
}
|
||||
}
|
||||
|
||||
int unmapped_buf_allowed;
|
||||
|
||||
void
|
||||
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
|
||||
vm_offset_t b_offset, int xfersize)
|
||||
|
@ -55,10 +55,13 @@
|
||||
#define BIO_DONE 0x02
|
||||
#define BIO_ONQUEUE 0x04
|
||||
#define BIO_ORDERED 0x08
|
||||
#define BIO_UNMAPPED 0x10
|
||||
#define BIO_TRANSIENT_MAPPING 0x20
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct disk;
|
||||
struct bio;
|
||||
struct vm_map;
|
||||
|
||||
/* Empty classifier tag, to prevent further classification. */
|
||||
#define BIO_NOTCLASSIFIED (void *)(~0UL)
|
||||
@ -78,6 +81,9 @@ struct bio {
|
||||
off_t bio_offset; /* Offset into file. */
|
||||
long bio_bcount; /* Valid bytes in buffer. */
|
||||
caddr_t bio_data; /* Memory, superblocks, indirect etc. */
|
||||
struct vm_page **bio_ma; /* Or unmapped. */
|
||||
int bio_ma_offset; /* Offset in the first page of bio_ma. */
|
||||
int bio_ma_n; /* Number of pages in bio_ma. */
|
||||
int bio_error; /* Errno for BIO_ERROR. */
|
||||
long bio_resid; /* Remaining I/O in bytes. */
|
||||
void (*bio_done)(struct bio *);
|
||||
@ -121,6 +127,9 @@ struct bio_queue_head {
|
||||
struct bio *insert_point;
|
||||
};
|
||||
|
||||
extern struct vm_map *bio_transient_map;
|
||||
extern int bio_transient_maxcnt;
|
||||
|
||||
void biodone(struct bio *bp);
|
||||
void biofinish(struct bio *bp, struct devstat *stat, int error);
|
||||
int biowait(struct bio *bp, const char *wchan);
|
||||
|
@ -117,6 +117,7 @@ struct buf {
|
||||
long b_bufsize; /* Allocated buffer size. */
|
||||
long b_runningbufspace; /* when I/O is running, pipelining */
|
||||
caddr_t b_kvabase; /* base kva for buffer */
|
||||
caddr_t b_kvaalloc; /* allocated kva for B_KVAALLOC */
|
||||
int b_kvasize; /* size of kva for buffer */
|
||||
daddr_t b_lblkno; /* Logical block number. */
|
||||
struct vnode *b_vp; /* Device vnode. */
|
||||
@ -202,8 +203,8 @@ struct buf {
|
||||
#define B_PERSISTENT 0x00000100 /* Perm. ref'ed while EXT2FS mounted. */
|
||||
#define B_DONE 0x00000200 /* I/O completed. */
|
||||
#define B_EINTR 0x00000400 /* I/O was interrupted */
|
||||
#define B_00000800 0x00000800 /* Available flag. */
|
||||
#define B_00001000 0x00001000 /* Available flag. */
|
||||
#define B_UNMAPPED 0x00000800 /* KVA is not mapped. */
|
||||
#define B_KVAALLOC 0x00001000 /* But allocated. */
|
||||
#define B_INVAL 0x00002000 /* Does not contain valid info. */
|
||||
#define B_BARRIER 0x00004000 /* Write this and all preceeding first. */
|
||||
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
|
||||
@ -453,7 +454,9 @@ buf_countdeps(struct buf *bp, int i)
|
||||
*/
|
||||
#define GB_LOCK_NOWAIT 0x0001 /* Fail if we block on a buf lock. */
|
||||
#define GB_NOCREAT 0x0002 /* Don't create a buf if not found. */
|
||||
#define GB_NOWAIT_BD 0x0004 /* Do not wait for bufdaemon */
|
||||
#define GB_NOWAIT_BD 0x0004 /* Do not wait for bufdaemon. */
|
||||
#define GB_UNMAPPED 0x0008 /* Do not mmap buffer pages. */
|
||||
#define GB_KVAALLOC 0x0010 /* But allocate KVA. */
|
||||
|
||||
#ifdef _KERNEL
|
||||
extern int nbuf; /* The number of buffer headers */
|
||||
@ -470,11 +473,13 @@ extern struct buf *swbuf; /* Swap I/O buffer headers. */
|
||||
extern int nswbuf; /* Number of swap I/O buffer headers. */
|
||||
extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
|
||||
extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
|
||||
extern caddr_t unmapped_buf;
|
||||
|
||||
void runningbufwakeup(struct buf *);
|
||||
void waitrunningbufspace(void);
|
||||
caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est);
|
||||
void bufinit(void);
|
||||
void bdata2bio(struct buf *bp, struct bio *bip);
|
||||
void bwillwrite(void);
|
||||
int buf_dirty_count_severe(void);
|
||||
void bremfree(struct buf *);
|
||||
|
@ -138,6 +138,7 @@ extern char **kenvp;
|
||||
|
||||
extern const void *zero_region; /* address space maps to a zeroed page */
|
||||
|
||||
extern int unmapped_buf_allowed;
|
||||
extern int iosize_max_clamp;
|
||||
#define IOSIZE_MAX (iosize_max_clamp ? INT_MAX : SSIZE_MAX)
|
||||
|
||||
|
@ -136,6 +136,8 @@ struct kva_md_info {
|
||||
vm_offset_t clean_eva;
|
||||
vm_offset_t pager_sva;
|
||||
vm_offset_t pager_eva;
|
||||
vm_offset_t bio_transient_sva;
|
||||
vm_offset_t bio_transient_eva;
|
||||
};
|
||||
|
||||
extern struct kva_md_info kmi;
|
||||
|
@ -184,10 +184,15 @@ vm_ksubmap_init(struct kva_md_info *kmi)
|
||||
panic("startup: table size inconsistency");
|
||||
|
||||
clean_map = kmem_suballoc(kernel_map, &kmi->clean_sva, &kmi->clean_eva,
|
||||
(long)nbuf * BKVASIZE + (long)nswbuf * MAXPHYS, TRUE);
|
||||
(long)nbuf * BKVASIZE + (long)nswbuf * MAXPHYS +
|
||||
(long)bio_transient_maxcnt * MAXPHYS, TRUE);
|
||||
buffer_map = kmem_suballoc(clean_map, &kmi->buffer_sva,
|
||||
&kmi->buffer_eva, (long)nbuf * BKVASIZE, FALSE);
|
||||
buffer_map->system_map = 1;
|
||||
bio_transient_map = kmem_suballoc(clean_map, &kmi->bio_transient_sva,
|
||||
&kmi->bio_transient_eva, (long)bio_transient_maxcnt * MAXPHYS,
|
||||
FALSE);
|
||||
bio_transient_map->system_map = 1;
|
||||
pager_map = kmem_suballoc(clean_map, &kmi->pager_sva, &kmi->pager_eva,
|
||||
(long)nswbuf * MAXPHYS, FALSE);
|
||||
pager_map->system_map = 1;
|
||||
|
@ -90,6 +90,7 @@ vm_map_t kmem_map;
|
||||
vm_map_t exec_map;
|
||||
vm_map_t pipe_map;
|
||||
vm_map_t buffer_map;
|
||||
vm_map_t bio_transient_map;
|
||||
|
||||
const void *zero_region;
|
||||
CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0);
|
||||
|
Loading…
Reference in New Issue
Block a user