1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-15 10:17:20 +00:00

Right now, for non-coherent DMARs, page table update code flushes the

cache for whole page containing modified pte, and more, only last page
in the series of the consequtive pages is flushed (i.e. the affected
mappings should be larger than 2MB).

Avoid excessive flushing and do missed neccessary flushing, by
splitting invalidation and unmapping.  For now, flush exactly the
range of the changed pte.  This is still somewhat bigger than
neccessary, since pte is 8 bytes, while cache flush line is at least
32 bytes.

The originator of the issue reports that after the change,
'dmar_bus_dmamap_unload went from 13,288 cycles down to
3,257. dmar_bus_dmamap_load_buffer went from 9,686 cycles down to
3,517.  and I am now able to get line 1GbE speed with Netperf TCP
(even with 1K message size).'

Diagnosed and tested by:	Nadav Amit <nadav.amit@gmail.com>
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
This commit is contained in:
Konstantin Belousov 2015-01-11 20:27:15 +00:00
parent 516f1b89f1
commit 6b7c46afec
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=277023
4 changed files with 54 additions and 21 deletions

View File

@ -97,7 +97,8 @@ dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
re += bus;
dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
VM_PAGE_TO_PHYS(ctxm)));
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
dmar_flush_root_to_ram(dmar, re);
dmar_unmap_pgtbl(sf);
TD_PINNED_ASSERT;
}
@ -158,6 +159,7 @@ ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp)
(DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
DMAR_CTX1_P);
}
dmar_flush_ctx_to_ram(unit, ctxp);
}
static int
@ -364,7 +366,7 @@ dmar_get_ctx(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped,
ctx->domain = alloc_unrl(dmar->domids);
if (ctx->domain == -1) {
DMAR_UNLOCK(dmar);
dmar_unmap_pgtbl(sf, true);
dmar_unmap_pgtbl(sf);
dmar_ctx_dtr(ctx, true, true);
TD_PINNED_ASSERT;
return (NULL);
@ -389,7 +391,7 @@ dmar_get_ctx(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped,
} else {
dmar_ctx_dtr(ctx1, true, true);
}
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
dmar_unmap_pgtbl(sf);
}
ctx->refs++;
if ((ctx->flags & DMAR_CTX_RMRR) != 0)
@ -480,7 +482,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
if (ctx->refs > 1) {
ctx->refs--;
DMAR_UNLOCK(dmar);
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
dmar_unmap_pgtbl(sf);
TD_PINNED_ASSERT;
return;
}
@ -496,6 +498,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
*/
dmar_pte_clear(&ctxp->ctx1);
ctxp->ctx2 = 0;
dmar_flush_ctx_to_ram(dmar, ctxp);
dmar_inv_ctx_glob(dmar);
if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
if (dmar->qi_enabled)
@ -513,7 +516,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
taskqueue_drain(dmar->delayed_taskqueue, &ctx->unload_task);
KASSERT(TAILQ_EMPTY(&ctx->unload_entries),
("unfinished unloads %p", ctx));
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
dmar_unmap_pgtbl(sf);
free_unr(dmar->domids, ctx->domain);
dmar_ctx_dtr(ctx, true, true);
TD_PINNED_ASSERT;

View File

@ -228,11 +228,14 @@ struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags);
void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags);
void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
struct sf_buf **sf);
void dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent);
void dmar_unmap_pgtbl(struct sf_buf *sf);
int dmar_load_root_entry_ptr(struct dmar_unit *unit);
int dmar_inv_ctx_glob(struct dmar_unit *unit);
int dmar_inv_iotlb_glob(struct dmar_unit *unit);
int dmar_flush_write_bufs(struct dmar_unit *unit);
void dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst);
void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst);
void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst);
int dmar_enable_translation(struct dmar_unit *unit);
int dmar_disable_translation(struct dmar_unit *unit);
bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id);

View File

@ -146,7 +146,7 @@ ctx_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx,
}
}
/* ctx_get_idmap_pgtbl flushes CPU cache if needed. */
dmar_unmap_pgtbl(sf, true);
dmar_unmap_pgtbl(sf);
VM_OBJECT_WLOCK(tbl->pgtbl_obj);
}
@ -361,7 +361,7 @@ ctx_pgtbl_map_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags,
pte = (dmar_pte_t *)sf_buf_kva(*sf);
} else {
if (*sf != NULL)
dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
dmar_unmap_pgtbl(*sf);
*idxp = idx;
retry:
pte = dmar_map_pgtbl(ctx->pgtbl_obj, idx, flags, sf);
@ -397,9 +397,10 @@ ctx_pgtbl_map_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags,
}
dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W |
VM_PAGE_TO_PHYS(m));
dmar_flush_pte_to_ram(ctx->dmar, ptep);
sf_buf_page(sfp)->wire_count += 1;
m->wire_count--;
dmar_unmap_pgtbl(sfp, DMAR_IS_COHERENT(ctx->dmar));
dmar_unmap_pgtbl(sfp);
/* Only executed once. */
goto retry;
}
@ -467,20 +468,19 @@ ctx_map_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
if (pte == NULL) {
KASSERT((flags & DMAR_PGF_WAITOK) == 0,
("failed waitable pte alloc %p", ctx));
if (sf != NULL) {
dmar_unmap_pgtbl(sf,
DMAR_IS_COHERENT(ctx->dmar));
}
if (sf != NULL)
dmar_unmap_pgtbl(sf);
ctx_unmap_buf_locked(ctx, base1, base - base1, flags);
TD_PINNED_ASSERT;
return (ENOMEM);
}
dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags |
(superpage ? DMAR_PTE_SP : 0));
dmar_flush_pte_to_ram(ctx->dmar, pte);
sf_buf_page(sf)->wire_count += 1;
}
if (sf != NULL)
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
dmar_unmap_pgtbl(sf);
TD_PINNED_ASSERT;
return (0);
}
@ -567,9 +567,10 @@ ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl,
vm_page_t m;
dmar_pte_clear(&pte->pte);
dmar_flush_pte_to_ram(ctx->dmar, pte);
m = sf_buf_page(*sf);
if (free_sf) {
dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
dmar_unmap_pgtbl(*sf);
*sf = NULL;
}
m->wire_count--;
@ -651,7 +652,7 @@ ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base,
(uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
}
if (sf != NULL)
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
dmar_unmap_pgtbl(sf);
/*
* See 11.1 Write Buffer Flushing for an explanation why RWBF
* can be ignored there.

View File

@ -354,20 +354,46 @@ dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
}
void
dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent)
dmar_unmap_pgtbl(struct sf_buf *sf)
{
vm_page_t m;
m = sf_buf_page(sf);
sf_buf_free(sf);
sched_unpin();
}
static void
dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
{
if (DMAR_IS_COHERENT(unit))
return;
/*
* If DMAR does not snoop paging structures accesses, flush
* CPU cache to memory.
*/
if (!coherent)
pmap_invalidate_cache_pages(&m, 1);
pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz,
TRUE);
}
void
dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst)
{
dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
}
void
dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst)
{
dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
}
void
dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst)
{
dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
}
/*