mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-15 10:17:20 +00:00
Right now, for non-coherent DMARs, page table update code flushes the
cache for whole page containing modified pte, and more, only last page in the series of the consequtive pages is flushed (i.e. the affected mappings should be larger than 2MB). Avoid excessive flushing and do missed neccessary flushing, by splitting invalidation and unmapping. For now, flush exactly the range of the changed pte. This is still somewhat bigger than neccessary, since pte is 8 bytes, while cache flush line is at least 32 bytes. The originator of the issue reports that after the change, 'dmar_bus_dmamap_unload went from 13,288 cycles down to 3,257. dmar_bus_dmamap_load_buffer went from 9,686 cycles down to 3,517. and I am now able to get line 1GbE speed with Netperf TCP (even with 1K message size).' Diagnosed and tested by: Nadav Amit <nadav.amit@gmail.com> Sponsored by: The FreeBSD Foundation MFC after: 1 week
This commit is contained in:
parent
516f1b89f1
commit
6b7c46afec
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=277023
@ -97,7 +97,8 @@ dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
|
||||
re += bus;
|
||||
dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
|
||||
VM_PAGE_TO_PHYS(ctxm)));
|
||||
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
|
||||
dmar_flush_root_to_ram(dmar, re);
|
||||
dmar_unmap_pgtbl(sf);
|
||||
TD_PINNED_ASSERT;
|
||||
}
|
||||
|
||||
@ -158,6 +159,7 @@ ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp)
|
||||
(DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
|
||||
DMAR_CTX1_P);
|
||||
}
|
||||
dmar_flush_ctx_to_ram(unit, ctxp);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -364,7 +366,7 @@ dmar_get_ctx(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped,
|
||||
ctx->domain = alloc_unrl(dmar->domids);
|
||||
if (ctx->domain == -1) {
|
||||
DMAR_UNLOCK(dmar);
|
||||
dmar_unmap_pgtbl(sf, true);
|
||||
dmar_unmap_pgtbl(sf);
|
||||
dmar_ctx_dtr(ctx, true, true);
|
||||
TD_PINNED_ASSERT;
|
||||
return (NULL);
|
||||
@ -389,7 +391,7 @@ dmar_get_ctx(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped,
|
||||
} else {
|
||||
dmar_ctx_dtr(ctx1, true, true);
|
||||
}
|
||||
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
|
||||
dmar_unmap_pgtbl(sf);
|
||||
}
|
||||
ctx->refs++;
|
||||
if ((ctx->flags & DMAR_CTX_RMRR) != 0)
|
||||
@ -480,7 +482,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
|
||||
if (ctx->refs > 1) {
|
||||
ctx->refs--;
|
||||
DMAR_UNLOCK(dmar);
|
||||
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
|
||||
dmar_unmap_pgtbl(sf);
|
||||
TD_PINNED_ASSERT;
|
||||
return;
|
||||
}
|
||||
@ -496,6 +498,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
|
||||
*/
|
||||
dmar_pte_clear(&ctxp->ctx1);
|
||||
ctxp->ctx2 = 0;
|
||||
dmar_flush_ctx_to_ram(dmar, ctxp);
|
||||
dmar_inv_ctx_glob(dmar);
|
||||
if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
|
||||
if (dmar->qi_enabled)
|
||||
@ -513,7 +516,7 @@ dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
|
||||
taskqueue_drain(dmar->delayed_taskqueue, &ctx->unload_task);
|
||||
KASSERT(TAILQ_EMPTY(&ctx->unload_entries),
|
||||
("unfinished unloads %p", ctx));
|
||||
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
|
||||
dmar_unmap_pgtbl(sf);
|
||||
free_unr(dmar->domids, ctx->domain);
|
||||
dmar_ctx_dtr(ctx, true, true);
|
||||
TD_PINNED_ASSERT;
|
||||
|
@ -228,11 +228,14 @@ struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags);
|
||||
void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags);
|
||||
void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
|
||||
struct sf_buf **sf);
|
||||
void dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent);
|
||||
void dmar_unmap_pgtbl(struct sf_buf *sf);
|
||||
int dmar_load_root_entry_ptr(struct dmar_unit *unit);
|
||||
int dmar_inv_ctx_glob(struct dmar_unit *unit);
|
||||
int dmar_inv_iotlb_glob(struct dmar_unit *unit);
|
||||
int dmar_flush_write_bufs(struct dmar_unit *unit);
|
||||
void dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst);
|
||||
void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst);
|
||||
void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst);
|
||||
int dmar_enable_translation(struct dmar_unit *unit);
|
||||
int dmar_disable_translation(struct dmar_unit *unit);
|
||||
bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id);
|
||||
|
@ -146,7 +146,7 @@ ctx_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx,
|
||||
}
|
||||
}
|
||||
/* ctx_get_idmap_pgtbl flushes CPU cache if needed. */
|
||||
dmar_unmap_pgtbl(sf, true);
|
||||
dmar_unmap_pgtbl(sf);
|
||||
VM_OBJECT_WLOCK(tbl->pgtbl_obj);
|
||||
}
|
||||
|
||||
@ -361,7 +361,7 @@ ctx_pgtbl_map_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags,
|
||||
pte = (dmar_pte_t *)sf_buf_kva(*sf);
|
||||
} else {
|
||||
if (*sf != NULL)
|
||||
dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
|
||||
dmar_unmap_pgtbl(*sf);
|
||||
*idxp = idx;
|
||||
retry:
|
||||
pte = dmar_map_pgtbl(ctx->pgtbl_obj, idx, flags, sf);
|
||||
@ -397,9 +397,10 @@ ctx_pgtbl_map_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags,
|
||||
}
|
||||
dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W |
|
||||
VM_PAGE_TO_PHYS(m));
|
||||
dmar_flush_pte_to_ram(ctx->dmar, ptep);
|
||||
sf_buf_page(sfp)->wire_count += 1;
|
||||
m->wire_count--;
|
||||
dmar_unmap_pgtbl(sfp, DMAR_IS_COHERENT(ctx->dmar));
|
||||
dmar_unmap_pgtbl(sfp);
|
||||
/* Only executed once. */
|
||||
goto retry;
|
||||
}
|
||||
@ -467,20 +468,19 @@ ctx_map_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
|
||||
if (pte == NULL) {
|
||||
KASSERT((flags & DMAR_PGF_WAITOK) == 0,
|
||||
("failed waitable pte alloc %p", ctx));
|
||||
if (sf != NULL) {
|
||||
dmar_unmap_pgtbl(sf,
|
||||
DMAR_IS_COHERENT(ctx->dmar));
|
||||
}
|
||||
if (sf != NULL)
|
||||
dmar_unmap_pgtbl(sf);
|
||||
ctx_unmap_buf_locked(ctx, base1, base - base1, flags);
|
||||
TD_PINNED_ASSERT;
|
||||
return (ENOMEM);
|
||||
}
|
||||
dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags |
|
||||
(superpage ? DMAR_PTE_SP : 0));
|
||||
dmar_flush_pte_to_ram(ctx->dmar, pte);
|
||||
sf_buf_page(sf)->wire_count += 1;
|
||||
}
|
||||
if (sf != NULL)
|
||||
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
|
||||
dmar_unmap_pgtbl(sf);
|
||||
TD_PINNED_ASSERT;
|
||||
return (0);
|
||||
}
|
||||
@ -567,9 +567,10 @@ ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl,
|
||||
vm_page_t m;
|
||||
|
||||
dmar_pte_clear(&pte->pte);
|
||||
dmar_flush_pte_to_ram(ctx->dmar, pte);
|
||||
m = sf_buf_page(*sf);
|
||||
if (free_sf) {
|
||||
dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
|
||||
dmar_unmap_pgtbl(*sf);
|
||||
*sf = NULL;
|
||||
}
|
||||
m->wire_count--;
|
||||
@ -651,7 +652,7 @@ ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base,
|
||||
(uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
|
||||
}
|
||||
if (sf != NULL)
|
||||
dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
|
||||
dmar_unmap_pgtbl(sf);
|
||||
/*
|
||||
* See 11.1 Write Buffer Flushing for an explanation why RWBF
|
||||
* can be ignored there.
|
||||
|
@ -354,20 +354,46 @@ dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
|
||||
}
|
||||
|
||||
void
|
||||
dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent)
|
||||
dmar_unmap_pgtbl(struct sf_buf *sf)
|
||||
{
|
||||
vm_page_t m;
|
||||
|
||||
m = sf_buf_page(sf);
|
||||
sf_buf_free(sf);
|
||||
sched_unpin();
|
||||
}
|
||||
|
||||
static void
|
||||
dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
|
||||
{
|
||||
|
||||
if (DMAR_IS_COHERENT(unit))
|
||||
return;
|
||||
/*
|
||||
* If DMAR does not snoop paging structures accesses, flush
|
||||
* CPU cache to memory.
|
||||
*/
|
||||
if (!coherent)
|
||||
pmap_invalidate_cache_pages(&m, 1);
|
||||
pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz,
|
||||
TRUE);
|
||||
}
|
||||
|
||||
void
|
||||
dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst)
|
||||
{
|
||||
|
||||
dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
|
||||
}
|
||||
|
||||
void
|
||||
dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst)
|
||||
{
|
||||
|
||||
dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
|
||||
}
|
||||
|
||||
void
|
||||
dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst)
|
||||
{
|
||||
|
||||
dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user