mirror of
https://git.FreeBSD.org/ports.git
synced 2025-02-02 11:09:29 +00:00
xen-kernel: apply XSA-{237..244}
Approved by: bapt (implicit) MFH: 2017Q4 Sponsored by: Citrix Systems R&D
This commit is contained in:
parent
0e31c2371d
commit
aac1e9a2f0
Notes:
svn2git
2021-03-31 03:12:20 +00:00
svn path=/head/; revision=451882
@ -2,7 +2,7 @@
|
||||
|
||||
PORTNAME= xen
|
||||
PORTVERSION= 4.7.2
|
||||
PORTREVISION= 5
|
||||
PORTREVISION= 6
|
||||
CATEGORIES= emulators
|
||||
MASTER_SITES= http://downloads.xenproject.org/release/xen/${PORTVERSION}/
|
||||
PKGNAMESUFFIX= -kernel
|
||||
@ -67,7 +67,20 @@ EXTRA_PATCHES= ${FILESDIR}/0001-xen-logdirty-prevent-preemption-if-finished.patc
|
||||
${FILESDIR}/xsa231-4.7.patch:-p1 \
|
||||
${FILESDIR}/xsa232.patch:-p1 \
|
||||
${FILESDIR}/xsa233.patch:-p1 \
|
||||
${FILESDIR}/xsa234-4.8.patch:-p1
|
||||
${FILESDIR}/xsa234-4.8.patch:-p1 \
|
||||
${FILESDIR}/0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch:-p1 \
|
||||
${FILESDIR}/0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch:-p1 \
|
||||
${FILESDIR}/0003-x86-MSI-disallow-redundant-enabling.patch:-p1 \
|
||||
${FILESDIR}/0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch:-p1 \
|
||||
${FILESDIR}/0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch:-p1 \
|
||||
${FILESDIR}/xsa238.patch:-p1 \
|
||||
${FILESDIR}/xsa239.patch:-p1 \
|
||||
${FILESDIR}/0001-x86-limit-linear-page-table-use-to-a-single-level.patch:-p1 \
|
||||
${FILESDIR}/0002-x86-mm-Disable-PV-linear-pagetables-by-default.patch:-p1 \
|
||||
${FILESDIR}/xsa241-4.8.patch:-p1 \
|
||||
${FILESDIR}/xsa242-4.9.patch:-p1 \
|
||||
${FILESDIR}/xsa243-4.7.patch:-p1 \
|
||||
${FILESDIR}/xsa244-4.7.patch:-p1
|
||||
|
||||
.include <bsd.port.options.mk>
|
||||
|
||||
|
@ -0,0 +1,27 @@
|
||||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: x86: don't allow MSI pIRQ mapping on unowned device
|
||||
|
||||
MSI setup should be permitted only for existing devices owned by the
|
||||
respective guest (the operation may still be carried out by the domain
|
||||
controlling that guest).
|
||||
|
||||
This is part of XSA-237.
|
||||
|
||||
Reported-by: HW42 <hw42@ipsumj.de>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
--- a/xen/arch/x86/irq.c
|
||||
+++ b/xen/arch/x86/irq.c
|
||||
@@ -1964,7 +1964,10 @@ int map_domain_pirq(
|
||||
if ( !cpu_has_apic )
|
||||
goto done;
|
||||
|
||||
- pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
|
||||
+ pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn);
|
||||
+ if ( !pdev )
|
||||
+ goto done;
|
||||
+
|
||||
ret = pci_enable_msi(msi, &msi_desc);
|
||||
if ( ret )
|
||||
{
|
@ -0,0 +1,494 @@
|
||||
From ea7513a3e3f28cfec59dda6e128b6b4968685762 Mon Sep 17 00:00:00 2001
|
||||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Date: Thu, 28 Sep 2017 15:17:27 +0100
|
||||
Subject: [PATCH 1/2] x86: limit linear page table use to a single level
|
||||
|
||||
That's the only way that they're meant to be used. Without such a
|
||||
restriction arbitrarily long chains of same-level page tables can be
|
||||
built, tearing down of which may then cause arbitrarily deep recursion,
|
||||
causing a stack overflow. To facilitate this restriction, a counter is
|
||||
being introduced to track both the number of same-level entries in a
|
||||
page table as well as the number of uses of a page table in another
|
||||
same-level one (counting into positive and negative direction
|
||||
respectively, utilizing the fact that both counts can't be non-zero at
|
||||
the same time).
|
||||
|
||||
Note that the added accounting introduces a restriction on the number
|
||||
of times a page can be used in other same-level page tables - more than
|
||||
32k of such uses are no longer possible.
|
||||
|
||||
Note also that some put_page_and_type[_preemptible]() calls are
|
||||
replaced with open-coded equivalents. This seemed preferrable to
|
||||
adding "parent_table" to the matrix of functions.
|
||||
|
||||
Note further that cross-domain same-level page table references are no
|
||||
longer permitted (they probably never should have been).
|
||||
|
||||
This is XSA-240.
|
||||
|
||||
Reported-by: Jann Horn <jannh@google.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
|
||||
---
|
||||
xen/arch/x86/domain.c | 1 +
|
||||
xen/arch/x86/mm.c | 171 ++++++++++++++++++++++++++++++++++++++-----
|
||||
xen/include/asm-x86/domain.h | 2 +
|
||||
xen/include/asm-x86/mm.h | 25 +++++--
|
||||
4 files changed, 175 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
|
||||
index 452748dd5b..44ed2ccd0a 100644
|
||||
--- a/xen/arch/x86/domain.c
|
||||
+++ b/xen/arch/x86/domain.c
|
||||
@@ -1237,6 +1237,7 @@ int arch_set_info_guest(
|
||||
case -EINTR:
|
||||
rc = -ERESTART;
|
||||
case -ERESTART:
|
||||
+ v->arch.old_guest_ptpg = NULL;
|
||||
v->arch.old_guest_table =
|
||||
pagetable_get_page(v->arch.guest_table);
|
||||
v->arch.guest_table = pagetable_null();
|
||||
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
|
||||
index e97ecccd93..e81a461b91 100644
|
||||
--- a/xen/arch/x86/mm.c
|
||||
+++ b/xen/arch/x86/mm.c
|
||||
@@ -732,6 +732,61 @@ static void put_data_page(
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
+static bool_t inc_linear_entries(struct page_info *pg)
|
||||
+{
|
||||
+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc;
|
||||
+
|
||||
+ do {
|
||||
+ /*
|
||||
+ * The check below checks for the "linear use" count being non-zero
|
||||
+ * as well as overflow. Signed integer overflow is undefined behavior
|
||||
+ * according to the C spec. However, as long as linear_pt_count is
|
||||
+ * smaller in size than 'int', the arithmetic operation of the
|
||||
+ * increment below won't overflow; rather the result will be truncated
|
||||
+ * when stored. Ensure that this is always true.
|
||||
+ */
|
||||
+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int));
|
||||
+ oc = nc++;
|
||||
+ if ( nc <= 0 )
|
||||
+ return 0;
|
||||
+ nc = cmpxchg(&pg->linear_pt_count, oc, nc);
|
||||
+ } while ( oc != nc );
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+static void dec_linear_entries(struct page_info *pg)
|
||||
+{
|
||||
+ typeof(pg->linear_pt_count) oc;
|
||||
+
|
||||
+ oc = arch_fetch_and_add(&pg->linear_pt_count, -1);
|
||||
+ ASSERT(oc > 0);
|
||||
+}
|
||||
+
|
||||
+static bool_t inc_linear_uses(struct page_info *pg)
|
||||
+{
|
||||
+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc;
|
||||
+
|
||||
+ do {
|
||||
+ /* See the respective comment in inc_linear_entries(). */
|
||||
+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int));
|
||||
+ oc = nc--;
|
||||
+ if ( nc >= 0 )
|
||||
+ return 0;
|
||||
+ nc = cmpxchg(&pg->linear_pt_count, oc, nc);
|
||||
+ } while ( oc != nc );
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+static void dec_linear_uses(struct page_info *pg)
|
||||
+{
|
||||
+ typeof(pg->linear_pt_count) oc;
|
||||
+
|
||||
+ oc = arch_fetch_and_add(&pg->linear_pt_count, 1);
|
||||
+ ASSERT(oc < 0);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* We allow root tables to map each other (a.k.a. linear page tables). It
|
||||
* needs some special care with reference counts and access permissions:
|
||||
@@ -761,15 +816,35 @@ get_##level##_linear_pagetable( \
|
||||
\
|
||||
if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \
|
||||
{ \
|
||||
+ struct page_info *ptpg = mfn_to_page(pde_pfn); \
|
||||
+ \
|
||||
+ /* Make sure the page table belongs to the correct domain. */ \
|
||||
+ if ( unlikely(page_get_owner(ptpg) != d) ) \
|
||||
+ return 0; \
|
||||
+ \
|
||||
/* Make sure the mapped frame belongs to the correct domain. */ \
|
||||
if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \
|
||||
return 0; \
|
||||
\
|
||||
/* \
|
||||
- * Ensure that the mapped frame is an already-validated page table. \
|
||||
+ * Ensure that the mapped frame is an already-validated page table \
|
||||
+ * and is not itself having linear entries, as well as that the \
|
||||
+ * containing page table is not iself in use as a linear page table \
|
||||
+ * elsewhere. \
|
||||
* If so, atomically increment the count (checking for overflow). \
|
||||
*/ \
|
||||
page = mfn_to_page(pfn); \
|
||||
+ if ( !inc_linear_entries(ptpg) ) \
|
||||
+ { \
|
||||
+ put_page(page); \
|
||||
+ return 0; \
|
||||
+ } \
|
||||
+ if ( !inc_linear_uses(page) ) \
|
||||
+ { \
|
||||
+ dec_linear_entries(ptpg); \
|
||||
+ put_page(page); \
|
||||
+ return 0; \
|
||||
+ } \
|
||||
y = page->u.inuse.type_info; \
|
||||
do { \
|
||||
x = y; \
|
||||
@@ -777,6 +852,8 @@ get_##level##_linear_pagetable( \
|
||||
unlikely((x & (PGT_type_mask|PGT_validated)) != \
|
||||
(PGT_##level##_page_table|PGT_validated)) ) \
|
||||
{ \
|
||||
+ dec_linear_uses(page); \
|
||||
+ dec_linear_entries(ptpg); \
|
||||
put_page(page); \
|
||||
return 0; \
|
||||
} \
|
||||
@@ -1201,6 +1278,9 @@ get_page_from_l4e(
|
||||
l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
|
||||
} while ( 0 )
|
||||
|
||||
+static int _put_page_type(struct page_info *page, bool_t preemptible,
|
||||
+ struct page_info *ptpg);
|
||||
+
|
||||
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
|
||||
{
|
||||
unsigned long pfn = l1e_get_pfn(l1e);
|
||||
@@ -1270,17 +1350,22 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
|
||||
if ( l2e_get_flags(l2e) & _PAGE_PSE )
|
||||
put_superpage(l2e_get_pfn(l2e));
|
||||
else
|
||||
- put_page_and_type(l2e_get_page(l2e));
|
||||
+ {
|
||||
+ struct page_info *pg = l2e_get_page(l2e);
|
||||
+ int rc = _put_page_type(pg, 0, mfn_to_page(pfn));
|
||||
+
|
||||
+ ASSERT(!rc);
|
||||
+ put_page(pg);
|
||||
+ }
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int __put_page_type(struct page_info *, int preemptible);
|
||||
-
|
||||
static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
|
||||
int partial, bool_t defer)
|
||||
{
|
||||
struct page_info *pg;
|
||||
+ int rc;
|
||||
|
||||
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
|
||||
return 1;
|
||||
@@ -1303,21 +1388,28 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
|
||||
if ( unlikely(partial > 0) )
|
||||
{
|
||||
ASSERT(!defer);
|
||||
- return __put_page_type(pg, 1);
|
||||
+ return _put_page_type(pg, 1, mfn_to_page(pfn));
|
||||
}
|
||||
|
||||
if ( defer )
|
||||
{
|
||||
+ current->arch.old_guest_ptpg = mfn_to_page(pfn);
|
||||
current->arch.old_guest_table = pg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
- return put_page_and_type_preemptible(pg);
|
||||
+ rc = _put_page_type(pg, 1, mfn_to_page(pfn));
|
||||
+ if ( likely(!rc) )
|
||||
+ put_page(pg);
|
||||
+
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
|
||||
int partial, bool_t defer)
|
||||
{
|
||||
+ int rc = 1;
|
||||
+
|
||||
if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
|
||||
(l4e_get_pfn(l4e) != pfn) )
|
||||
{
|
||||
@@ -1326,18 +1418,22 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
|
||||
if ( unlikely(partial > 0) )
|
||||
{
|
||||
ASSERT(!defer);
|
||||
- return __put_page_type(pg, 1);
|
||||
+ return _put_page_type(pg, 1, mfn_to_page(pfn));
|
||||
}
|
||||
|
||||
if ( defer )
|
||||
{
|
||||
+ current->arch.old_guest_ptpg = mfn_to_page(pfn);
|
||||
current->arch.old_guest_table = pg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
- return put_page_and_type_preemptible(pg);
|
||||
+ rc = _put_page_type(pg, 1, mfn_to_page(pfn));
|
||||
+ if ( likely(!rc) )
|
||||
+ put_page(pg);
|
||||
}
|
||||
- return 1;
|
||||
+
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
static int alloc_l1_table(struct page_info *page)
|
||||
@@ -1535,6 +1631,7 @@ static int alloc_l3_table(struct page_info *page)
|
||||
{
|
||||
page->nr_validated_ptes = i;
|
||||
page->partial_pte = 0;
|
||||
+ current->arch.old_guest_ptpg = NULL;
|
||||
current->arch.old_guest_table = page;
|
||||
}
|
||||
while ( i-- > 0 )
|
||||
@@ -1627,6 +1724,7 @@ static int alloc_l4_table(struct page_info *page)
|
||||
{
|
||||
if ( current->arch.old_guest_table )
|
||||
page->nr_validated_ptes++;
|
||||
+ current->arch.old_guest_ptpg = NULL;
|
||||
current->arch.old_guest_table = page;
|
||||
}
|
||||
}
|
||||
@@ -2369,14 +2467,20 @@ int free_page_type(struct page_info *pag
|
||||
}
|
||||
|
||||
|
||||
-static int __put_final_page_type(
|
||||
- struct page_info *page, unsigned long type, int preemptible)
|
||||
+static int _put_final_page_type(struct page_info *page, unsigned long type,
|
||||
+ bool_t preemptible, struct page_info *ptpg)
|
||||
{
|
||||
int rc = free_page_type(page, type, preemptible);
|
||||
|
||||
/* No need for atomic update of type_info here: noone else updates it. */
|
||||
if ( rc == 0 )
|
||||
{
|
||||
+ if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) )
|
||||
+ {
|
||||
+ dec_linear_uses(page);
|
||||
+ dec_linear_entries(ptpg);
|
||||
+ }
|
||||
+ ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying);
|
||||
/*
|
||||
* Record TLB information for flush later. We do not stamp page tables
|
||||
* when running in shadow mode:
|
||||
@@ -2412,8 +2516,8 @@ static int __put_final_page_type(
|
||||
}
|
||||
|
||||
|
||||
-static int __put_page_type(struct page_info *page,
|
||||
- int preemptible)
|
||||
+static int _put_page_type(struct page_info *page, bool_t preemptible,
|
||||
+ struct page_info *ptpg)
|
||||
{
|
||||
unsigned long nx, x, y = page->u.inuse.type_info;
|
||||
int rc = 0;
|
||||
@@ -2440,12 +2544,28 @@ static int __put_page_type(struct page_info *page,
|
||||
x, nx)) != x) )
|
||||
continue;
|
||||
/* We cleared the 'valid bit' so we do the clean up. */
|
||||
- rc = __put_final_page_type(page, x, preemptible);
|
||||
+ rc = _put_final_page_type(page, x, preemptible, ptpg);
|
||||
+ ptpg = NULL;
|
||||
if ( x & PGT_partial )
|
||||
put_page(page);
|
||||
break;
|
||||
}
|
||||
|
||||
+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
|
||||
+ {
|
||||
+ /*
|
||||
+ * page_set_tlbflush_timestamp() accesses the same union
|
||||
+ * linear_pt_count lives in. Unvalidated page table pages,
|
||||
+ * however, should occur during domain destruction only
|
||||
+ * anyway. Updating of linear_pt_count luckily is not
|
||||
+ * necessary anymore for a dying domain.
|
||||
+ */
|
||||
+ ASSERT(page_get_owner(page)->is_dying);
|
||||
+ ASSERT(page->linear_pt_count < 0);
|
||||
+ ASSERT(ptpg->linear_pt_count > 0);
|
||||
+ ptpg = NULL;
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Record TLB information for flush later. We do not stamp page
|
||||
* tables when running in shadow mode:
|
||||
@@ -2465,6 +2585,13 @@ static int __put_page_type(struct page_info *page,
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
|
||||
+ {
|
||||
+ ASSERT(!rc);
|
||||
+ dec_linear_uses(page);
|
||||
+ dec_linear_entries(ptpg);
|
||||
+ }
|
||||
+
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -2599,6 +2726,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
|
||||
page->nr_validated_ptes = 0;
|
||||
page->partial_pte = 0;
|
||||
}
|
||||
+ page->linear_pt_count = 0;
|
||||
rc = alloc_page_type(page, type, preemptible);
|
||||
}
|
||||
|
||||
@@ -2610,7 +2738,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
|
||||
|
||||
void put_page_type(struct page_info *page)
|
||||
{
|
||||
- int rc = __put_page_type(page, 0);
|
||||
+ int rc = _put_page_type(page, 0, NULL);
|
||||
ASSERT(rc == 0);
|
||||
(void)rc;
|
||||
}
|
||||
@@ -2626,7 +2754,7 @@ int get_page_type(struct page_info *page, unsigned long type)
|
||||
|
||||
int put_page_type_preemptible(struct page_info *page)
|
||||
{
|
||||
- return __put_page_type(page, 1);
|
||||
+ return _put_page_type(page, 1, NULL);
|
||||
}
|
||||
|
||||
int get_page_type_preemptible(struct page_info *page, unsigned long type)
|
||||
@@ -2832,11 +2960,14 @@ int put_old_guest_table(struct vcpu *v)
|
||||
if ( !v->arch.old_guest_table )
|
||||
return 0;
|
||||
|
||||
- switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) )
|
||||
+ switch ( rc = _put_page_type(v->arch.old_guest_table, 1,
|
||||
+ v->arch.old_guest_ptpg) )
|
||||
{
|
||||
case -EINTR:
|
||||
case -ERESTART:
|
||||
return -ERESTART;
|
||||
+ case 0:
|
||||
+ put_page(v->arch.old_guest_table);
|
||||
}
|
||||
|
||||
v->arch.old_guest_table = NULL;
|
||||
@@ -2993,6 +3124,7 @@ int new_guest_cr3(unsigned long mfn)
|
||||
rc = -ERESTART;
|
||||
/* fallthrough */
|
||||
case -ERESTART:
|
||||
+ curr->arch.old_guest_ptpg = NULL;
|
||||
curr->arch.old_guest_table = page;
|
||||
break;
|
||||
default:
|
||||
@@ -3260,7 +3392,10 @@ long do_mmuext_op(
|
||||
if ( type == PGT_l1_page_table )
|
||||
put_page_and_type(page);
|
||||
else
|
||||
+ {
|
||||
+ curr->arch.old_guest_ptpg = NULL;
|
||||
curr->arch.old_guest_table = page;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3293,6 +3428,7 @@ long do_mmuext_op(
|
||||
{
|
||||
case -EINTR:
|
||||
case -ERESTART:
|
||||
+ curr->arch.old_guest_ptpg = NULL;
|
||||
curr->arch.old_guest_table = page;
|
||||
rc = 0;
|
||||
break;
|
||||
@@ -3371,6 +3507,7 @@ long do_mmuext_op(
|
||||
rc = -ERESTART;
|
||||
/* fallthrough */
|
||||
case -ERESTART:
|
||||
+ curr->arch.old_guest_ptpg = NULL;
|
||||
curr->arch.old_guest_table = page;
|
||||
break;
|
||||
default:
|
||||
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
|
||||
index 165e533ab3..5ef761be8b 100644
|
||||
--- a/xen/include/asm-x86/domain.h
|
||||
+++ b/xen/include/asm-x86/domain.h
|
||||
@@ -529,6 +529,8 @@ struct arch_vcpu
|
||||
pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
|
||||
pagetable_t guest_table; /* (MFN) guest notion of cr3 */
|
||||
struct page_info *old_guest_table; /* partially destructed pagetable */
|
||||
+ struct page_info *old_guest_ptpg; /* containing page table of the */
|
||||
+ /* former, if any */
|
||||
/* guest_table holds a ref to the page, and also a type-count unless
|
||||
* shadow refcounts are in use */
|
||||
pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
|
||||
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
|
||||
index a30e76db1e..905c7971f2 100644
|
||||
--- a/xen/include/asm-x86/mm.h
|
||||
+++ b/xen/include/asm-x86/mm.h
|
||||
@@ -125,11 +125,11 @@ struct page_info
|
||||
u32 tlbflush_timestamp;
|
||||
|
||||
/*
|
||||
- * When PGT_partial is true then this field is valid and indicates
|
||||
- * that PTEs in the range [0, @nr_validated_ptes) have been validated.
|
||||
- * An extra page reference must be acquired (or not dropped) whenever
|
||||
- * PGT_partial gets set, and it must be dropped when the flag gets
|
||||
- * cleared. This is so that a get() leaving a page in partially
|
||||
+ * When PGT_partial is true then the first two fields are valid and
|
||||
+ * indicate that PTEs in the range [0, @nr_validated_ptes) have been
|
||||
+ * validated. An extra page reference must be acquired (or not dropped)
|
||||
+ * whenever PGT_partial gets set, and it must be dropped when the flag
|
||||
+ * gets cleared. This is so that a get() leaving a page in partially
|
||||
* validated state (where the caller would drop the reference acquired
|
||||
* due to the getting of the type [apparently] failing [-ERESTART])
|
||||
* would not accidentally result in a page left with zero general
|
||||
@@ -153,10 +153,18 @@ struct page_info
|
||||
* put_page_from_lNe() (due to the apparent failure), and hence it
|
||||
* must be dropped when the put operation is resumed (and completes),
|
||||
* but it must not be acquired if picking up the page for validation.
|
||||
+ *
|
||||
+ * The 3rd field, @linear_pt_count, indicates
|
||||
+ * - by a positive value, how many same-level page table entries a page
|
||||
+ * table has,
|
||||
+ * - by a negative value, in how many same-level page tables a page is
|
||||
+ * in use.
|
||||
*/
|
||||
struct {
|
||||
- u16 nr_validated_ptes;
|
||||
- s8 partial_pte;
|
||||
+ u16 nr_validated_ptes:PAGETABLE_ORDER + 1;
|
||||
+ u16 :16 - PAGETABLE_ORDER - 1 - 2;
|
||||
+ s16 partial_pte:2;
|
||||
+ s16 linear_pt_count;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -207,6 +215,9 @@ struct page_info
|
||||
#define PGT_count_width PG_shift(9)
|
||||
#define PGT_count_mask ((1UL<<PGT_count_width)-1)
|
||||
|
||||
+/* Are the 'type mask' bits identical? */
|
||||
+#define PGT_type_equal(x, y) (!(((x) ^ (y)) & PGT_type_mask))
|
||||
+
|
||||
/* Cleared when the owning guest 'frees' this page. */
|
||||
#define _PGC_allocated PG_shift(1)
|
||||
#define PGC_allocated PG_mask(1, 1)
|
||||
--
|
||||
2.14.1
|
||||
|
@ -0,0 +1,66 @@
|
||||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: x86: enforce proper privilege when (un)mapping pIRQ-s
|
||||
|
||||
(Un)mapping of IRQs, just like other RESOURCE__ADD* / RESOURCE__REMOVE*
|
||||
actions (in FLASK terms) should be XSM_DM_PRIV rather than XSM_TARGET.
|
||||
This in turn requires bypassing the XSM check in physdev_unmap_pirq()
|
||||
for the HVM emuirq case just like is being done in physdev_map_pirq().
|
||||
The primary goal security wise, however, is to no longer allow HVM
|
||||
guests, by specifying their own domain ID instead of DOMID_SELF, to
|
||||
enter code paths intended for PV guest and the control domains of HVM
|
||||
guests only.
|
||||
|
||||
This is part of XSA-237.
|
||||
|
||||
Reported-by: HW42 <hw42@ipsumj.de>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
|
||||
|
||||
--- a/xen/arch/x86/physdev.c
|
||||
+++ b/xen/arch/x86/physdev.c
|
||||
@@ -110,7 +110,7 @@ int physdev_map_pirq(domid_t domid, int
|
||||
if ( d == NULL )
|
||||
return -ESRCH;
|
||||
|
||||
- ret = xsm_map_domain_pirq(XSM_TARGET, d);
|
||||
+ ret = xsm_map_domain_pirq(XSM_DM_PRIV, d);
|
||||
if ( ret )
|
||||
goto free_domain;
|
||||
|
||||
@@ -255,13 +255,14 @@ int physdev_map_pirq(domid_t domid, int
|
||||
int physdev_unmap_pirq(domid_t domid, int pirq)
|
||||
{
|
||||
struct domain *d;
|
||||
- int ret;
|
||||
+ int ret = 0;
|
||||
|
||||
d = rcu_lock_domain_by_any_id(domid);
|
||||
if ( d == NULL )
|
||||
return -ESRCH;
|
||||
|
||||
- ret = xsm_unmap_domain_pirq(XSM_TARGET, d);
|
||||
+ if ( domid != DOMID_SELF || !is_hvm_domain(d) )
|
||||
+ ret = xsm_unmap_domain_pirq(XSM_DM_PRIV, d);
|
||||
if ( ret )
|
||||
goto free_domain;
|
||||
|
||||
--- a/xen/include/xsm/dummy.h
|
||||
+++ b/xen/include/xsm/dummy.h
|
||||
@@ -453,7 +453,7 @@ static XSM_INLINE char *xsm_show_irq_sid
|
||||
|
||||
static XSM_INLINE int xsm_map_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
|
||||
{
|
||||
- XSM_ASSERT_ACTION(XSM_TARGET);
|
||||
+ XSM_ASSERT_ACTION(XSM_DM_PRIV);
|
||||
return xsm_default_action(action, current->domain, d);
|
||||
}
|
||||
|
||||
@@ -465,7 +465,7 @@ static XSM_INLINE int xsm_map_domain_irq
|
||||
|
||||
static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
|
||||
{
|
||||
- XSM_ASSERT_ACTION(XSM_TARGET);
|
||||
+ XSM_ASSERT_ACTION(XSM_DM_PRIV);
|
||||
return xsm_default_action(action, current->domain, d);
|
||||
}
|
||||
|
@ -0,0 +1,82 @@
|
||||
From 9a4b34729f1bb92eea1e1efe52e6face9f0b17ae Mon Sep 17 00:00:00 2001
|
||||
From: George Dunlap <george.dunlap@citrix.com>
|
||||
Date: Fri, 22 Sep 2017 11:46:55 +0100
|
||||
Subject: [PATCH 2/2] x86/mm: Disable PV linear pagetables by default
|
||||
|
||||
Allowing pagetables to point to other pagetables of the same level
|
||||
(often called 'linear pagetables') has been included in Xen since its
|
||||
inception. But it is not used by the most common PV guests (Linux,
|
||||
NetBSD, minios), and has been the source of a number of subtle
|
||||
reference-counting bugs.
|
||||
|
||||
Add a command-line option to control whether PV linear pagetables are
|
||||
allowed (disabled by default).
|
||||
|
||||
Reported-by: Jann Horn <jannh@google.com>
|
||||
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
|
||||
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
---
|
||||
Changes since v2:
|
||||
- s/_/-/; in command-line option
|
||||
- Added __read_mostly
|
||||
---
|
||||
docs/misc/xen-command-line.markdown | 15 +++++++++++++++
|
||||
xen/arch/x86/mm.c | 9 +++++++++
|
||||
2 files changed, 24 insertions(+)
|
||||
|
||||
diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
|
||||
index 73f5265fc6..061aff5edc 100644
|
||||
--- a/docs/misc/xen-command-line.markdown
|
||||
+++ b/docs/misc/xen-command-line.markdown
|
||||
@@ -1280,6 +1280,21 @@ The following resources are available:
|
||||
CDP, one COS will corespond two CBMs other than one with CAT, due to the
|
||||
sum of CBMs is fixed, that means actual `cos_max` in use will automatically
|
||||
reduce to half when CDP is enabled.
|
||||
+
|
||||
+### pv-linear-pt
|
||||
+> `= <boolean>`
|
||||
+
|
||||
+> Default: `false`
|
||||
+
|
||||
+Allow PV guests to have pagetable entries pointing to other pagetables
|
||||
+of the same level (i.e., allowing L2 PTEs to point to other L2 pages).
|
||||
+This technique is often called "linear pagetables", and is sometimes
|
||||
+used to allow operating systems a simple way to consistently map the
|
||||
+current process's pagetables into its own virtual address space.
|
||||
+
|
||||
+None of the most common PV operating systems (Linux, NetBSD, MiniOS)
|
||||
+use this technique, but there may be custom operating systems which
|
||||
+do.
|
||||
|
||||
### reboot
|
||||
> `= t[riple] | k[bd] | a[cpi] | p[ci] | P[ower] | e[fi] | n[o] [, [w]arm | [c]old]`
|
||||
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
|
||||
index e81a461b91..f748d4a221 100644
|
||||
--- a/xen/arch/x86/mm.c
|
||||
+++ b/xen/arch/x86/mm.c
|
||||
@@ -799,6 +799,9 @@ static void dec_linear_uses(struct page_info *pg)
|
||||
* frame if it is mapped by a different root table. This is sufficient and
|
||||
* also necessary to allow validation of a root table mapping itself.
|
||||
*/
|
||||
+static bool_t __read_mostly pv_linear_pt_enable = 0;
|
||||
+boolean_param("pv-linear-pt", pv_linear_pt_enable);
|
||||
+
|
||||
#define define_get_linear_pagetable(level) \
|
||||
static int \
|
||||
get_##level##_linear_pagetable( \
|
||||
@@ -808,6 +811,12 @@ get_##level##_linear_pagetable( \
|
||||
struct page_info *page; \
|
||||
unsigned long pfn; \
|
||||
\
|
||||
+ if ( !pv_linear_pt_enable ) \
|
||||
+ { \
|
||||
+ MEM_LOG("Attempt to create linear p.t. (feature disabled)"); \
|
||||
+ return 0; \
|
||||
+ } \
|
||||
+ \
|
||||
if ( (level##e_get_flags(pde) & _PAGE_RW) ) \
|
||||
{ \
|
||||
MEM_LOG("Attempt to create linear p.t. with write perms"); \
|
||||
--
|
||||
2.14.1
|
||||
|
@ -0,0 +1,55 @@
|
||||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: x86/MSI: disallow redundant enabling
|
||||
|
||||
At the moment, Xen attempts to allow redundant enabling of MSI by
|
||||
having pci_enable_msi() return 0, and point to the existing MSI
|
||||
descriptor, when the msi already exists.
|
||||
|
||||
Unfortunately, if subsequent errors are encountered, the cleanup
|
||||
paths assume pci_enable_msi() had done full initialization, and
|
||||
hence undo everything that was assumed to be done by that
|
||||
function without also undoing other setup that would normally
|
||||
occur only after that function was called (in map_domain_pirq()
|
||||
itself).
|
||||
|
||||
Rather than try to make the redundant enabling case work properly, just
|
||||
forbid it entirely by having pci_enable_msi() return -EEXIST when MSI
|
||||
is already set up.
|
||||
|
||||
This is part of XSA-237.
|
||||
|
||||
Reported-by: HW42 <hw42@ipsumj.de>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
|
||||
|
||||
--- a/xen/arch/x86/msi.c
|
||||
+++ b/xen/arch/x86/msi.c
|
||||
@@ -1050,11 +1050,10 @@ static int __pci_enable_msi(struct msi_i
|
||||
old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI);
|
||||
if ( old_desc )
|
||||
{
|
||||
- printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
|
||||
+ printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
|
||||
msi->irq, msi->seg, msi->bus,
|
||||
PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
|
||||
- *desc = old_desc;
|
||||
- return 0;
|
||||
+ return -EEXIST;
|
||||
}
|
||||
|
||||
old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
|
||||
@@ -1118,11 +1117,10 @@ static int __pci_enable_msix(struct msi_
|
||||
old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX);
|
||||
if ( old_desc )
|
||||
{
|
||||
- printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
|
||||
+ printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
|
||||
msi->irq, msi->seg, msi->bus,
|
||||
PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
|
||||
- *desc = old_desc;
|
||||
- return 0;
|
||||
+ return -EEXIST;
|
||||
}
|
||||
|
||||
old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
|
@ -0,0 +1,124 @@
|
||||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: x86/IRQ: conditionally preserve irq <-> pirq mapping on map error paths
|
||||
|
||||
Mappings that had been set up before should not be torn down when
|
||||
handling unrelated errors.
|
||||
|
||||
This is part of XSA-237.
|
||||
|
||||
Reported-by: HW42 <hw42@ipsumj.de>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
|
||||
|
||||
--- a/xen/arch/x86/irq.c
|
||||
+++ b/xen/arch/x86/irq.c
|
||||
@@ -1252,7 +1252,8 @@ static int prepare_domain_irq_pirq(struc
|
||||
return -ENOMEM;
|
||||
}
|
||||
*pinfo = info;
|
||||
- return 0;
|
||||
+
|
||||
+ return !!err;
|
||||
}
|
||||
|
||||
static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
|
||||
@@ -1295,7 +1296,10 @@ int init_domain_irq_mapping(struct domai
|
||||
continue;
|
||||
err = prepare_domain_irq_pirq(d, i, i, &info);
|
||||
if ( err )
|
||||
+ {
|
||||
+ ASSERT(err < 0);
|
||||
break;
|
||||
+ }
|
||||
set_domain_irq_pirq(d, i, info);
|
||||
}
|
||||
|
||||
@@ -1903,6 +1907,7 @@ int map_domain_pirq(
|
||||
struct pirq *info;
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
+ DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {};
|
||||
|
||||
ASSERT(spin_is_locked(&d->event_lock));
|
||||
|
||||
@@ -1946,8 +1951,10 @@ int map_domain_pirq(
|
||||
}
|
||||
|
||||
ret = prepare_domain_irq_pirq(d, irq, pirq, &info);
|
||||
- if ( ret )
|
||||
+ if ( ret < 0 )
|
||||
goto revoke;
|
||||
+ if ( !ret )
|
||||
+ __set_bit(0, prepared);
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
|
||||
@@ -2019,8 +2026,10 @@ int map_domain_pirq(
|
||||
irq = create_irq(NUMA_NO_NODE);
|
||||
ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
|
||||
: irq;
|
||||
- if ( ret )
|
||||
+ if ( ret < 0 )
|
||||
break;
|
||||
+ if ( !ret )
|
||||
+ __set_bit(nr, prepared);
|
||||
msi_desc[nr].irq = irq;
|
||||
|
||||
if ( irq_permit_access(d, irq) != 0 )
|
||||
@@ -2053,15 +2062,15 @@ int map_domain_pirq(
|
||||
desc->msi_desc = NULL;
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
- while ( nr-- )
|
||||
+ while ( nr )
|
||||
{
|
||||
if ( irq >= 0 && irq_deny_access(d, irq) )
|
||||
printk(XENLOG_G_ERR
|
||||
"dom%d: could not revoke access to IRQ%d (pirq %d)\n",
|
||||
d->domain_id, irq, pirq);
|
||||
- if ( info )
|
||||
+ if ( info && test_bit(nr, prepared) )
|
||||
cleanup_domain_irq_pirq(d, irq, info);
|
||||
- info = pirq_info(d, pirq + nr);
|
||||
+ info = pirq_info(d, pirq + --nr);
|
||||
irq = info->arch.irq;
|
||||
}
|
||||
msi_desc->irq = -1;
|
||||
@@ -2077,12 +2086,14 @@ int map_domain_pirq(
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
set_domain_irq_pirq(d, irq, info);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
+ ret = 0;
|
||||
}
|
||||
|
||||
done:
|
||||
if ( ret )
|
||||
{
|
||||
- cleanup_domain_irq_pirq(d, irq, info);
|
||||
+ if ( test_bit(0, prepared) )
|
||||
+ cleanup_domain_irq_pirq(d, irq, info);
|
||||
revoke:
|
||||
if ( irq_deny_access(d, irq) )
|
||||
printk(XENLOG_G_ERR
|
||||
--- a/xen/arch/x86/physdev.c
|
||||
+++ b/xen/arch/x86/physdev.c
|
||||
@@ -185,7 +185,7 @@ int physdev_map_pirq(domid_t domid, int
|
||||
}
|
||||
else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
|
||||
{
|
||||
- if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
|
||||
+ if ( msi->entry_nr <= 0 || msi->entry_nr > MAX_MSI_IRQS )
|
||||
ret = -EDOM;
|
||||
else if ( msi->entry_nr != 1 && !iommu_intremap )
|
||||
ret = -EOPNOTSUPP;
|
||||
--- a/xen/include/asm-x86/msi.h
|
||||
+++ b/xen/include/asm-x86/msi.h
|
||||
@@ -55,6 +55,8 @@
|
||||
/* MAX fixed pages reserved for mapping MSIX tables. */
|
||||
#define FIX_MSIX_MAX_PAGES 512
|
||||
|
||||
+#define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */
|
||||
+
|
||||
struct msi_info {
|
||||
u16 seg;
|
||||
u8 bus;
|
@ -0,0 +1,37 @@
|
||||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: x86/FLASK: fix unmap-domain-IRQ XSM hook
|
||||
|
||||
The caller and the FLASK implementation of xsm_unmap_domain_irq()
|
||||
disagreed about what the "data" argument points to in the MSI case:
|
||||
Change both sides to pass/take a PCI device.
|
||||
|
||||
This is part of XSA-237.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
--- a/xen/arch/x86/irq.c
|
||||
+++ b/xen/arch/x86/irq.c
|
||||
@@ -2144,7 +2144,8 @@ int unmap_domain_pirq(struct domain *d,
|
||||
nr = msi_desc->msi.nvec;
|
||||
}
|
||||
|
||||
- ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
|
||||
+ ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
|
||||
+ msi_desc ? msi_desc->dev : NULL);
|
||||
if ( ret )
|
||||
goto done;
|
||||
|
||||
--- a/xen/xsm/flask/hooks.c
|
||||
+++ b/xen/xsm/flask/hooks.c
|
||||
@@ -915,8 +915,8 @@ static int flask_unmap_domain_msi (struc
|
||||
u32 *sid, struct avc_audit_data *ad)
|
||||
{
|
||||
#ifdef CONFIG_HAS_PCI
|
||||
- struct msi_info *msi = data;
|
||||
- u32 machine_bdf = (msi->seg << 16) | (msi->bus << 8) | msi->devfn;
|
||||
+ const struct pci_dev *pdev = data;
|
||||
+ u32 machine_bdf = (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn;
|
||||
|
||||
AVC_AUDIT_DATA_INIT(ad, DEV);
|
||||
ad->device = machine_bdf;
|
45
emulators/xen-kernel/files/xsa238.patch
Normal file
45
emulators/xen-kernel/files/xsa238.patch
Normal file
@ -0,0 +1,45 @@
|
||||
From cdc2887076b19b39fab9faec495082586f3113df Mon Sep 17 00:00:00 2001
|
||||
From: XenProject Security Team <security@xenproject.org>
|
||||
Date: Tue, 5 Sep 2017 13:41:37 +0200
|
||||
Subject: x86/ioreq server: correctly handle bogus
|
||||
XEN_DMOP_{,un}map_io_range_to_ioreq_server arguments
|
||||
|
||||
Misbehaving device model can pass incorrect XEN_DMOP_map/
|
||||
unmap_io_range_to_ioreq_server arguments, namely end < start when
|
||||
specifying address range. When this happens we hit ASSERT(s <= e) in
|
||||
rangeset_contains_range()/rangeset_overlaps_range() with debug builds.
|
||||
Production builds will not trap right away but may misbehave later
|
||||
while handling such bogus ranges.
|
||||
|
||||
This is XSA-238.
|
||||
|
||||
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
Reviewed-by: Jan Beulich <jbeulich@suse.com>
|
||||
---
|
||||
xen/arch/x86/hvm/ioreq.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
|
||||
index b2a8b0e986..8c8bf1f0ec 100644
|
||||
--- a/xen/arch/x86/hvm/ioreq.c
|
||||
+++ b/xen/arch/x86/hvm/ioreq.c
|
||||
@@ -820,6 +820,9 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id,
|
||||
struct hvm_ioreq_server *s;
|
||||
int rc;
|
||||
|
||||
+ if ( start > end )
|
||||
+ return -EINVAL;
|
||||
+
|
||||
spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
|
||||
|
||||
rc = -ENOENT;
|
||||
@@ -872,6 +875,9 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
|
||||
struct hvm_ioreq_server *s;
|
||||
int rc;
|
||||
|
||||
+ if ( start > end )
|
||||
+ return -EINVAL;
|
||||
+
|
||||
spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
|
||||
|
||||
rc = -ENOENT;
|
46
emulators/xen-kernel/files/xsa239.patch
Normal file
46
emulators/xen-kernel/files/xsa239.patch
Normal file
@ -0,0 +1,46 @@
|
||||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: x86/HVM: prefill partially used variable on emulation paths
|
||||
|
||||
Certain handlers ignore the access size (vioapic_write() being the
|
||||
example this was found with), perhaps leading to subsequent reads
|
||||
seeing data that wasn't actually written by the guest. For
|
||||
consistency and extra safety also do this on the read path of
|
||||
hvm_process_io_intercept(), even if this doesn't directly affect what
|
||||
guests get to see, as we've supposedly already dealt with read handlers
|
||||
leaving data completely unitialized.
|
||||
|
||||
This is XSA-239.
|
||||
|
||||
Reported-by: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/hvm/emulate.c
|
||||
+++ b/xen/arch/x86/hvm/emulate.c
|
||||
@@ -129,7 +129,7 @@ static int hvmemul_do_io(
|
||||
.count = *reps,
|
||||
.dir = dir,
|
||||
.df = df,
|
||||
- .data = data,
|
||||
+ .data = data_is_addr ? data : 0,
|
||||
.data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */
|
||||
.state = STATE_IOREQ_READY,
|
||||
};
|
||||
--- a/xen/arch/x86/hvm/intercept.c
|
||||
+++ b/xen/arch/x86/hvm/intercept.c
|
||||
@@ -127,6 +127,7 @@ int hvm_process_io_intercept(const struc
|
||||
addr = (p->type == IOREQ_TYPE_COPY) ?
|
||||
p->addr + step * i :
|
||||
p->addr;
|
||||
+ data = 0;
|
||||
rc = ops->read(handler, addr, p->size, &data);
|
||||
if ( rc != X86EMUL_OKAY )
|
||||
break;
|
||||
@@ -161,6 +162,7 @@ int hvm_process_io_intercept(const struc
|
||||
{
|
||||
if ( p->data_is_ptr )
|
||||
{
|
||||
+ data = 0;
|
||||
switch ( hvm_copy_from_guest_phys(&data, p->data + step * i,
|
||||
p->size) )
|
||||
{
|
120
emulators/xen-kernel/files/xsa241-4.8.patch
Normal file
120
emulators/xen-kernel/files/xsa241-4.8.patch
Normal file
@ -0,0 +1,120 @@
|
||||
x86: don't store possibly stale TLB flush time stamp
|
||||
|
||||
While the timing window is extremely narrow, it is theoretically
|
||||
possible for an update to the TLB flush clock and a subsequent flush
|
||||
IPI to happen between the read and write parts of the update of the
|
||||
per-page stamp. Exclude this possibility by disabling interrupts
|
||||
across the update, preventing the IPI to be serviced in the middle.
|
||||
|
||||
This is XSA-241.
|
||||
|
||||
Reported-by: Jann Horn <jannh@google.com>
|
||||
Suggested-by: George Dunlap <george.dunlap@citrix.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
|
||||
|
||||
--- a/xen/arch/arm/smp.c
|
||||
+++ b/xen/arch/arm/smp.c
|
||||
@@ -1,4 +1,5 @@
|
||||
#include <xen/config.h>
|
||||
+#include <xen/mm.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/cpregs.h>
|
||||
--- a/xen/arch/x86/mm.c
|
||||
+++ b/xen/arch/x86/mm.c
|
||||
@@ -2524,7 +2524,7 @@ static int _put_final_page_type(struct p
|
||||
*/
|
||||
if ( !(shadow_mode_enabled(page_get_owner(page)) &&
|
||||
(page->count_info & PGC_page_table)) )
|
||||
- page->tlbflush_timestamp = tlbflush_current_time();
|
||||
+ page_set_tlbflush_timestamp(page);
|
||||
wmb();
|
||||
page->u.inuse.type_info--;
|
||||
}
|
||||
@@ -2534,7 +2534,7 @@ static int _put_final_page_type(struct p
|
||||
(PGT_count_mask|PGT_validated|PGT_partial)) == 1);
|
||||
if ( !(shadow_mode_enabled(page_get_owner(page)) &&
|
||||
(page->count_info & PGC_page_table)) )
|
||||
- page->tlbflush_timestamp = tlbflush_current_time();
|
||||
+ page_set_tlbflush_timestamp(page);
|
||||
wmb();
|
||||
page->u.inuse.type_info |= PGT_validated;
|
||||
}
|
||||
@@ -2588,7 +2588,7 @@ static int _put_page_type(struct page_in
|
||||
if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
|
||||
{
|
||||
/*
|
||||
- * page_set_tlbflush_timestamp() accesses the same union
|
||||
+ * set_tlbflush_timestamp() accesses the same union
|
||||
* linear_pt_count lives in. Unvalidated page table pages,
|
||||
* however, should occur during domain destruction only
|
||||
* anyway. Updating of linear_pt_count luckily is not
|
||||
@@ -2609,7 +2609,7 @@ static int _put_page_type(struct page_in
|
||||
*/
|
||||
if ( !(shadow_mode_enabled(page_get_owner(page)) &&
|
||||
(page->count_info & PGC_page_table)) )
|
||||
- page->tlbflush_timestamp = tlbflush_current_time();
|
||||
+ page_set_tlbflush_timestamp(page);
|
||||
}
|
||||
|
||||
if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
|
||||
--- a/xen/arch/x86/mm/shadow/common.c
|
||||
+++ b/xen/arch/x86/mm/shadow/common.c
|
||||
@@ -1464,7 +1464,7 @@ void shadow_free(struct domain *d, mfn_t
|
||||
* TLBs when we reuse the page. Because the destructors leave the
|
||||
* contents of the pages in place, we can delay TLB flushes until
|
||||
* just before the allocator hands the page out again. */
|
||||
- sp->tlbflush_timestamp = tlbflush_current_time();
|
||||
+ page_set_tlbflush_timestamp(sp);
|
||||
perfc_decr(shadow_alloc_count);
|
||||
page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
|
||||
sp = next;
|
||||
--- a/xen/common/page_alloc.c
|
||||
+++ b/xen/common/page_alloc.c
|
||||
@@ -960,7 +960,7 @@ static void free_heap_pages(
|
||||
/* If a page has no owner it will need no safety TLB flush. */
|
||||
pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
|
||||
if ( pg[i].u.free.need_tlbflush )
|
||||
- pg[i].tlbflush_timestamp = tlbflush_current_time();
|
||||
+ page_set_tlbflush_timestamp(&pg[i]);
|
||||
|
||||
/* This page is not a guest frame any more. */
|
||||
page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
|
||||
--- a/xen/include/asm-arm/flushtlb.h
|
||||
+++ b/xen/include/asm-arm/flushtlb.h
|
||||
@@ -12,6 +12,11 @@ static inline void tlbflush_filter(cpuma
|
||||
|
||||
#define tlbflush_current_time() (0)
|
||||
|
||||
+static inline void page_set_tlbflush_timestamp(struct page_info *page)
|
||||
+{
|
||||
+ page->tlbflush_timestamp = tlbflush_current_time();
|
||||
+}
|
||||
+
|
||||
#if defined(CONFIG_ARM_32)
|
||||
# include <asm/arm32/flushtlb.h>
|
||||
#elif defined(CONFIG_ARM_64)
|
||||
--- a/xen/include/asm-x86/flushtlb.h
|
||||
+++ b/xen/include/asm-x86/flushtlb.h
|
||||
@@ -23,6 +23,20 @@ DECLARE_PER_CPU(u32, tlbflush_time);
|
||||
|
||||
#define tlbflush_current_time() tlbflush_clock
|
||||
|
||||
+static inline void page_set_tlbflush_timestamp(struct page_info *page)
|
||||
+{
|
||||
+ /*
|
||||
+ * Prevent storing a stale time stamp, which could happen if an update
|
||||
+ * to tlbflush_clock plus a subsequent flush IPI happen between the
|
||||
+ * reading of tlbflush_clock and the writing of the struct page_info
|
||||
+ * field.
|
||||
+ */
|
||||
+ ASSERT(local_irq_is_enabled());
|
||||
+ local_irq_disable();
|
||||
+ page->tlbflush_timestamp = tlbflush_current_time();
|
||||
+ local_irq_enable();
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing.
|
||||
* @lastuse_stamp is a timestamp taken when the PFN we are testing was last
|
43
emulators/xen-kernel/files/xsa242-4.9.patch
Normal file
43
emulators/xen-kernel/files/xsa242-4.9.patch
Normal file
@ -0,0 +1,43 @@
|
||||
From: Jan Beulich <jbeulich@suse.com>
|
||||
Subject: x86: don't allow page_unlock() to drop the last type reference
|
||||
|
||||
Only _put_page_type() does the necessary cleanup, and hence not all
|
||||
domain pages can be released during guest cleanup (leaving around
|
||||
zombie domains) if we get this wrong.
|
||||
|
||||
This is XSA-242.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/mm.c
|
||||
+++ b/xen/arch/x86/mm.c
|
||||
@@ -1923,7 +1923,11 @@ void page_unlock(struct page_info *page)
|
||||
|
||||
do {
|
||||
x = y;
|
||||
+ ASSERT((x & PGT_count_mask) && (x & PGT_locked));
|
||||
+
|
||||
nx = x - (1 | PGT_locked);
|
||||
+ /* We must not drop the last reference here. */
|
||||
+ ASSERT(nx & PGT_count_mask);
|
||||
} while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
|
||||
}
|
||||
|
||||
@@ -2611,6 +2615,17 @@ static int _put_page_type(struct page_in
|
||||
(page->count_info & PGC_page_table)) )
|
||||
page_set_tlbflush_timestamp(page);
|
||||
}
|
||||
+ else if ( unlikely((nx & (PGT_locked | PGT_count_mask)) ==
|
||||
+ (PGT_locked | 1)) )
|
||||
+ {
|
||||
+ /*
|
||||
+ * We must not drop the second to last reference when the page is
|
||||
+ * locked, as page_unlock() doesn't do any cleanup of the type.
|
||||
+ */
|
||||
+ cpu_relax();
|
||||
+ y = page->u.inuse.type_info;
|
||||
+ continue;
|
||||
+ }
|
||||
|
||||
if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
|
||||
break;
|
93
emulators/xen-kernel/files/xsa243-4.7.patch
Normal file
93
emulators/xen-kernel/files/xsa243-4.7.patch
Normal file
@ -0,0 +1,93 @@
|
||||
From: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Subject: x86/shadow: Don't create self-linear shadow mappings for 4-level translated guests
|
||||
|
||||
When initially creating a monitor table for 4-level translated guests, don't
|
||||
install a shadow-linear mapping. This mapping is actually self-linear, and
|
||||
trips up the writeable heuristic logic into following Xen's mappings, not the
|
||||
guests' shadows it was expecting to follow.
|
||||
|
||||
A consequence of this is that sh_guess_wrmap() needs to cope with there being
|
||||
no shadow-linear mapping present, which in practice occurs once each time a
|
||||
vcpu switches to 4-level paging from a different paging mode.
|
||||
|
||||
An appropriate shadow-linear slot will be inserted into the monitor table
|
||||
either while constructing lower level monitor tables, or by sh_update_cr3().
|
||||
|
||||
While fixing this, clarify the safety of the other mappings. Despite
|
||||
appearing unsafe, it is correct to create a guest-linear mapping for
|
||||
translated domains; this is self-linear and doesn't point into the translated
|
||||
domain. Drop a dead clause for translate != external guests.
|
||||
|
||||
This is XSA-243.
|
||||
|
||||
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
|
||||
index 428be37..c83932f 100644
|
||||
--- a/xen/arch/x86/mm/shadow/multi.c
|
||||
+++ b/xen/arch/x86/mm/shadow/multi.c
|
||||
@@ -1472,26 +1472,38 @@ void sh_install_xen_entries_in_l4(struct domain *d, mfn_t gl4mfn, mfn_t sl4mfn)
|
||||
sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty();
|
||||
}
|
||||
|
||||
- /* Shadow linear mapping for 4-level shadows. N.B. for 3-level
|
||||
- * shadows on 64-bit xen, this linear mapping is later replaced by the
|
||||
- * monitor pagetable structure, which is built in make_monitor_table
|
||||
- * and maintained by sh_update_linear_entries. */
|
||||
- sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
|
||||
- shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
|
||||
-
|
||||
- /* Self linear mapping. */
|
||||
- if ( shadow_mode_translate(d) && !shadow_mode_external(d) )
|
||||
+ /*
|
||||
+ * Linear mapping slots:
|
||||
+ *
|
||||
+ * Calling this function with gl4mfn == sl4mfn is used to construct a
|
||||
+ * monitor table for translated domains. In this case, gl4mfn forms the
|
||||
+ * self-linear mapping (i.e. not pointing into the translated domain), and
|
||||
+ * the shadow-linear slot is skipped. The shadow-linear slot is either
|
||||
+ * filled when constructing lower level monitor tables, or via
|
||||
+ * sh_update_cr3() for 4-level guests.
|
||||
+ *
|
||||
+ * Calling this function with gl4mfn != sl4mfn is used for non-translated
|
||||
+ * guests, where the shadow-linear slot is actually self-linear, and the
|
||||
+ * guest-linear slot points into the guests view of its pagetables.
|
||||
+ */
|
||||
+ if ( shadow_mode_translate(d) )
|
||||
{
|
||||
- // linear tables may not be used with translated PV guests
|
||||
- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
|
||||
+ ASSERT(mfn_x(gl4mfn) == mfn_x(sl4mfn));
|
||||
+
|
||||
+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
|
||||
shadow_l4e_empty();
|
||||
}
|
||||
else
|
||||
{
|
||||
- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
|
||||
- shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
|
||||
+ ASSERT(mfn_x(gl4mfn) != mfn_x(sl4mfn));
|
||||
+
|
||||
+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
|
||||
+ shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
|
||||
}
|
||||
|
||||
+ sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
|
||||
+ shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
|
||||
+
|
||||
unmap_domain_page(sl4e);
|
||||
}
|
||||
#endif
|
||||
@@ -4293,6 +4305,11 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
|
||||
|
||||
/* Carefully look in the shadow linear map for the l1e we expect */
|
||||
#if SHADOW_PAGING_LEVELS >= 4
|
||||
+ /* Is a shadow linear map is installed in the first place? */
|
||||
+ sl4p = v->arch.paging.shadow.guest_vtable;
|
||||
+ sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START);
|
||||
+ if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
|
||||
+ return 0;
|
||||
sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
|
||||
if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
|
||||
return 0;
|
51
emulators/xen-kernel/files/xsa244-4.7.patch
Normal file
51
emulators/xen-kernel/files/xsa244-4.7.patch
Normal file
@ -0,0 +1,51 @@
|
||||
From: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Subject: x86/cpu: fix IST handling during PCPU bringup
|
||||
|
||||
Clear IST references in newly allocated IDTs. Nothing good will come of
|
||||
having them set before the TSS is suitably constructed (although the chances
|
||||
of the CPU surviving such an IST interrupt/exception is extremely slim).
|
||||
|
||||
Uniformly set the IST references after the TSS is in place. This fixes an
|
||||
issue on AMD hardware, where onlining a PCPU while PCPU0 is in HVM context
|
||||
will cause IST_NONE to be copied into the new IDT, making that PCPU vulnerable
|
||||
to privilege escalation from PV guests until it subsequently schedules an HVM
|
||||
guest.
|
||||
|
||||
This is XSA-244.
|
||||
|
||||
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Reviewed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/cpu/common.c
|
||||
+++ b/xen/arch/x86/cpu/common.c
|
||||
@@ -617,6 +617,7 @@ void __init early_cpu_init(void)
|
||||
* - Sets up TSS with stack pointers, including ISTs
|
||||
* - Inserts TSS selector into regular and compat GDTs
|
||||
* - Loads GDT, IDT, TR then null LDT
|
||||
+ * - Sets up IST references in the IDT
|
||||
*/
|
||||
void load_system_tables(void)
|
||||
{
|
||||
@@ -663,6 +664,10 @@ void load_system_tables(void)
|
||||
asm volatile ("lidt %0" : : "m" (idtr) );
|
||||
asm volatile ("ltr %w0" : : "rm" (TSS_ENTRY << 3) );
|
||||
asm volatile ("lldt %w0" : : "rm" (0) );
|
||||
+
|
||||
+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF);
|
||||
+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI);
|
||||
+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/xen/arch/x86/smpboot.c
|
||||
+++ b/xen/arch/x86/smpboot.c
|
||||
@@ -715,6 +715,9 @@ static int cpu_smpboot_alloc(unsigned in
|
||||
if ( idt_tables[cpu] == NULL )
|
||||
goto oom;
|
||||
memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t));
|
||||
+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE);
|
||||
+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE);
|
||||
+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
|
||||
|
||||
for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
|
||||
i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
|
Loading…
x
Reference in New Issue
Block a user