From d665d229cef8b8617a89e94898a4e8d770aedd34 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Wed, 23 Jul 2014 04:28:51 +0000 Subject: [PATCH] Emulate instructions emitted by OpenBSD/i386 version 5.5: - CMP REG, r/m - MOV AX/EAX/RAX, moffset - MOV moffset, AX/EAX/RAX - PUSH r/m --- lib/libvmmapi/vmmapi.c | 40 ++- lib/libvmmapi/vmmapi.h | 9 +- sys/amd64/include/vmm.h | 32 ++ sys/amd64/include/vmm_instruction_emul.h | 4 +- sys/amd64/vmm/vmm.c | 119 +++++++- sys/amd64/vmm/vmm_instruction_emul.c | 356 +++++++++++++++++++---- usr.sbin/bhyve/bhyverun.c | 8 +- usr.sbin/bhyve/inout.c | 12 +- usr.sbin/bhyve/mem.c | 8 +- usr.sbin/bhyve/mem.h | 3 +- usr.sbin/bhyve/task_switch.c | 15 +- 11 files changed, 516 insertions(+), 90 deletions(-) diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 483aa5199b6..087d0b789f1 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -326,6 +327,16 @@ vm_get_desc(struct vmctx *ctx, int vcpu, int reg, return (error); } +int +vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc) +{ + int error; + + error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit, + &seg_desc->access); + return (error); +} + int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) { @@ -988,7 +999,7 @@ gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, #endif int -vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, +vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt) { uint64_t gpa; @@ -1135,3 +1146,30 @@ vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); return (error); } + +void +vm_inject_ss(struct vmctx *ctx, int vcpu, int errcode) +{ + int error; + + error = vm_inject_exception2(ctx, vcpu, IDT_SS, errcode); + assert(error == 0); +} + +void +vm_inject_ac(struct vmctx *ctx, int vcpu, int errcode) +{ + int error; + + error = vm_inject_exception2(ctx, vcpu, IDT_AC, errcode); + assert(error == 0); +} + +void +vm_inject_gp(struct vmctx *ctx, int vcpu, int errcode) +{ + int error; + + error = vm_inject_exception2(ctx, vcpu, IDT_GP, errcode); + assert(error == 0); +} diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 2040c91e205..72d75c00267 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -66,6 +66,8 @@ int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access); int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t *base, uint32_t *limit, uint32_t *access); +int vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, + struct seg_desc *seg_desc); int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val); int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, @@ -124,13 +126,18 @@ int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities); * The 'iovcnt' should be big enough to accomodate all GPA segments. * Returns 0 on success, 1 on a guest fault condition and -1 otherwise. */ -int vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, +int vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg, uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt); void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov, void *host_dst, size_t len); void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src, struct iovec *guest_iov, size_t len); +/* Helper functions to inject exceptions */ +void vm_inject_ss(struct vmctx *ctx, int vcpu, int errcode); +void vm_inject_ac(struct vmctx *ctx, int vcpu, int errcode); +void vm_inject_gp(struct vmctx *ctx, int vcpu, int errcode); + /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 6895e64037e..6f476b4afa3 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -114,6 +114,7 @@ struct vioapic; struct vlapic; struct vmspace; struct vm_object; +struct vm_guest_paging; struct pmap; typedef int (*vmm_init_func_t)(int ipinum); @@ -317,10 +318,41 @@ int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ +void vm_inject_ac(struct vm *vm, int vcpuid, int errcode); /* #AC */ +void vm_inject_ss(struct vm *vm, int vcpuid, int errcode); /* #SS */ void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2); enum vm_reg_name vm_segment_name(int seg_encoding); +struct vm_copyinfo { + uint64_t gpa; + size_t len; + void *hva; + void *cookie; +}; + +/* + * Set up 'copyinfo[]' to copy to/from guest linear address space starting + * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for + * a copyin or PROT_WRITE for a copyout. + * + * Returns 0 on success. + * Returns 1 if an exception was injected into the guest. + * Returns -1 otherwise. + * + * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if + * the return value is 0. The 'copyinfo[]' resources should be freed by calling + * 'vm_copy_teardown()' after the copy is done. + */ +int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, + uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, + int num_copyinfo); +void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, + int num_copyinfo); +void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, + void *kaddr, size_t len); +void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, + struct vm_copyinfo *copyinfo, size_t len); #endif /* KERNEL */ #define VM_MAXCPU 16 /* maximum virtual cpus */ diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h index 05b60fb66dd..bbd3d88d9cf 100644 --- a/sys/amd64/include/vmm_instruction_emul.h +++ b/sys/amd64/include/vmm_instruction_emul.h @@ -52,8 +52,8 @@ typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa, * s */ int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t mrr, mem_region_write_t mrw, - void *mrarg); + struct vm_guest_paging *paging, mem_region_read_t mrr, + mem_region_write_t mrw, void *mrarg); int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t val, int size); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 25042546b39..b667b482669 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1235,8 +1235,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) return (0); } - error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, - retu); + error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, + mread, mwrite, retu); return (error); } @@ -1751,6 +1751,30 @@ vm_inject_ud(struct vm *vm, int vcpuid) vm_inject_fault(vm, vcpuid, &udf); } +void +vm_inject_ac(struct vm *vm, int vcpuid, int error_code) +{ + struct vm_exception acf = { + .vector = IDT_AC, + .error_code_valid = 1, + .error_code = error_code + }; + + vm_inject_fault(vm, vcpuid, &acf); +} + +void +vm_inject_ss(struct vm *vm, int vcpuid, int error_code) +{ + struct vm_exception ssf = { + .vector = IDT_SS, + .error_code_valid = 1, + .error_code = error_code + }; + + vm_inject_fault(vm, vcpuid, &ssf); +} + static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); int @@ -2182,6 +2206,97 @@ vm_segment_name(int seg) return (seg_names[seg]); } +void +vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, + int num_copyinfo) +{ + int idx; + + for (idx = 0; idx < num_copyinfo; idx++) { + if (copyinfo[idx].cookie != NULL) + vm_gpa_release(copyinfo[idx].cookie); + } + bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); +} + +int +vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, + uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, + int num_copyinfo) +{ + int error, idx, nused; + size_t n, off, remaining; + void *hva, *cookie; + uint64_t gpa; + + bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); + + nused = 0; + remaining = len; + while (remaining > 0) { + KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); + error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa); + if (error) + return (error); + off = gpa & PAGE_MASK; + n = min(remaining, PAGE_SIZE - off); + copyinfo[nused].gpa = gpa; + copyinfo[nused].len = n; + remaining -= n; + gla += n; + nused++; + } + + for (idx = 0; idx < nused; idx++) { + hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len, + prot, &cookie); + if (hva == NULL) + break; + copyinfo[idx].hva = hva; + copyinfo[idx].cookie = cookie; + } + + if (idx != nused) { + vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); + return (-1); + } else { + return (0); + } +} + +void +vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, + size_t len) +{ + char *dst; + int idx; + + dst = kaddr; + idx = 0; + while (len > 0) { + bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); + len -= copyinfo[idx].len; + dst += copyinfo[idx].len; + idx++; + } +} + +void +vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, + struct vm_copyinfo *copyinfo, size_t len) +{ + const char *src; + int idx; + + src = kaddr; + idx = 0; + while (len > 0) { + bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); + len -= copyinfo[idx].len; + src += copyinfo[idx].len; + idx++; + } +} /* * Return the amount of in-use and wired memory for the VM. Since diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index 7e09ccbb56c..e8a5f7bcd4e 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #else /* !_KERNEL */ #include #include +#include #include @@ -65,6 +66,8 @@ enum { VIE_OP_TYPE_AND, VIE_OP_TYPE_OR, VIE_OP_TYPE_TWO_BYTE, + VIE_OP_TYPE_PUSH, + VIE_OP_TYPE_CMP, VIE_OP_TYPE_LAST }; @@ -72,6 +75,7 @@ enum { #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ #define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ +#define VIE_OP_F_NO_MODRM (1 << 3) static const struct vie_op two_byte_opcodes[256] = { [0xB6] = { @@ -89,6 +93,10 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x0F, .op_type = VIE_OP_TYPE_TWO_BYTE }, + [0x3B] = { + .op_byte = 0x3B, + .op_type = VIE_OP_TYPE_CMP, + }, [0x88] = { .op_byte = 0x88, .op_type = VIE_OP_TYPE_MOV, @@ -105,6 +113,16 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x8B, .op_type = VIE_OP_TYPE_MOV, }, + [0xA1] = { + .op_byte = 0xA1, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, + [0xA3] = { + .op_byte = 0xA3, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, [0xC6] = { /* XXX Group 11 extended opcode - not just MOV */ .op_byte = 0xC6, @@ -132,6 +150,11 @@ static const struct vie_op one_byte_opcodes[256] = { .op_type = VIE_OP_TYPE_OR, .op_flags = VIE_OP_F_IMM8, }, + [0xFF] = { + /* XXX Group 5 extended opcode - not just PUSH */ + .op_byte = 0xFF, + .op_type = VIE_OP_TYPE_PUSH, + } }; /* struct vie.mod */ @@ -284,6 +307,53 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, return (error); } +/* + * Return the status flags that would result from doing (x - y). + */ +static u_long +getcc16(uint16_t x, uint16_t y) +{ + u_long rflags; + + __asm __volatile("sub %1,%2; pushfq; popq %0" : + "=r" (rflags) : "m" (y), "r" (x)); + return (rflags); +} + +static u_long +getcc32(uint32_t x, uint32_t y) +{ + u_long rflags; + + __asm __volatile("sub %1,%2; pushfq; popq %0" : + "=r" (rflags) : "m" (y), "r" (x)); + return (rflags); +} + +static u_long +getcc64(uint64_t x, uint64_t y) +{ + u_long rflags; + + __asm __volatile("sub %1,%2; pushfq; popq %0" : + "=r" (rflags) : "m" (y), "r" (x)); + return (rflags); +} + +static u_long +getcc(int opsize, uint64_t x, uint64_t y) +{ + KASSERT(opsize == 2 || opsize == 4 || opsize == 8, + ("getcc: invalid operand size %d", opsize)); + + if (opsize == 2) + return (getcc16(x, y)); + else if (opsize == 4) + return (getcc32(x, y)); + else + return (getcc64(x, y)); +} + static int emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) @@ -346,6 +416,32 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = vie_update_register(vm, vcpuid, reg, val, size); } break; + case 0xA1: + /* + * MOV from seg:moffset to AX/EAX/RAX + * A1: mov AX, moffs16 + * A1: mov EAX, moffs32 + * REX.W + A1: mov RAX, moffs64 + */ + error = memread(vm, vcpuid, gpa, &val, size, arg); + if (error == 0) { + reg = VM_REG_GUEST_RAX; + error = vie_update_register(vm, vcpuid, reg, val, size); + } + break; + case 0xA3: + /* + * MOV from AX/EAX/RAX to seg:moffset + * A3: mov moffs16, AX + * A3: mov moffs32, EAX + * REX.W + A3: mov moffs64, RAX + */ + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); + if (error == 0) { + val &= size2mask[size]; + error = memwrite(vm, vcpuid, gpa, val, size, arg); + } + break; case 0xC6: /* * MOV from imm8 to mem (ModRM:r/m) @@ -553,10 +649,150 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (error); } +#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V) + +static int +emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +{ + int error, size; + uint64_t op1, op2, rflags, rflags2; + enum vm_reg_name reg; + + size = vie->opsize; + switch (vie->op.op_byte) { + case 0x3B: + /* + * 3B/r CMP r16, r/m16 + * 3B/r CMP r32, r/m32 + * REX.W + 3B/r CMP r64, r/m64 + * + * Compare first operand (reg) with second operand (r/m) and + * set status flags in EFLAGS register. The comparison is + * performed by subtracting the second operand from the first + * operand and then setting the status flags. + */ + + /* Get the first operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vm, vcpuid, reg, &op1); + if (error) + return (error); + + /* Get the second operand */ + error = memread(vm, vcpuid, gpa, &op2, size, arg); + if (error) + return (error); + + break; + default: + return (EINVAL); + } + rflags2 = getcc(size, op1, op2); + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + if (error) + return (error); + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & RFLAGS_STATUS_BITS; + + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + return (error); +} + +static int +emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ +#ifdef _KERNEL + struct vm_copyinfo copyinfo[2]; +#else + struct iovec copyinfo[2]; +#endif + struct seg_desc ss_desc; + uint64_t cr0, rflags, rsp, stack_gla, val; + int error, size, stackaddrsize; + + /* + * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. + * + * PUSH is part of the group 5 extended opcodes and is identified + * by ModRM:reg = b110. + */ + if ((vie->reg & 7) != 6) + return (EINVAL); + + size = vie->opsize; + /* + * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 + */ + if (paging->cpu_mode == CPU_MODE_REAL) + stackaddrsize = 2; + else if (paging->cpu_mode == CPU_MODE_64BIT) + stackaddrsize = 8; + else { + /* + * In protected or compability mode the 'B' flag in the + * stack-segment descriptor determines the size of the + * stack pointer. + */ + error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc); + KASSERT(error == 0, ("%s: error %d getting SS descriptor", + __func__, error)); + if (SEG_DESC_DEF32(ss_desc.access)) + stackaddrsize = 4; + else + stackaddrsize = 2; + } + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); + KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); + KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); + + rsp -= size; + if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, + rsp, size, stackaddrsize, PROT_WRITE, &stack_gla)) { + vm_inject_ss(vm, vcpuid, 0); + return (0); + } + + if (vie_canonical_check(paging->cpu_mode, stack_gla)) { + vm_inject_ss(vm, vcpuid, 0); + return (0); + } + + if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { + vm_inject_ac(vm, vcpuid, 0); + return (0); + } + + error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, PROT_WRITE, + copyinfo, nitems(copyinfo)); + if (error) + return (error); + + error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); + if (error == 0) { + vm_copyout(vm, vcpuid, &val, copyinfo, size); + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, + stackaddrsize); + KASSERT(error == 0, ("error %d updating rsp", error)); + } +#ifdef _KERNEL + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); +#endif + return (error); +} + int vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, - void *memarg) + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *memarg) { int error; @@ -564,6 +800,14 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (EINVAL); switch (vie->op.op_type) { + case VIE_OP_TYPE_PUSH: + error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, + memwrite, memarg); + break; + case VIE_OP_TYPE_CMP: + error = emulate_cmp(vm, vcpuid, gpa, vie, + memread, memwrite, memarg); + break; case VIE_OP_TYPE_MOV: error = emulate_mov(vm, vcpuid, gpa, vie, memread, memwrite, memarg); @@ -970,45 +1214,24 @@ fault: } int -vmm_fetch_instruction(struct vm *vm, int cpuid, struct vm_guest_paging *paging, +vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t rip, int inst_length, struct vie *vie) { - int n, error, prot; - uint64_t gpa, off; - void *hpa, *cookie; + struct vm_copyinfo copyinfo[2]; + int error, prot; - /* - * XXX cache previously fetched instructions using 'rip' as the tag - */ - - prot = VM_PROT_READ | VM_PROT_EXECUTE; if (inst_length > VIE_INST_SIZE) panic("vmm_fetch_instruction: invalid length %d", inst_length); - /* Copy the instruction into 'vie' */ - while (vie->num_valid < inst_length) { - error = vmm_gla2gpa(vm, cpuid, paging, rip, prot, &gpa); - if (error) - return (error); - - off = gpa & PAGE_MASK; - n = min(inst_length - vie->num_valid, PAGE_SIZE - off); - - if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL) - break; - - bcopy(hpa, &vie->inst[vie->num_valid], n); - - vm_gpa_release(cookie); - - rip += n; - vie->num_valid += n; + prot = PROT_READ | PROT_EXEC; + error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, + copyinfo, nitems(copyinfo)); + if (error == 0) { + vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + vie->num_valid = inst_length; } - - if (vie->num_valid == inst_length) - return (0); - else - return (-1); + return (error); } static int @@ -1138,6 +1361,9 @@ decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode) if (cpu_mode == CPU_MODE_REAL) return (-1); + if (vie->op.op_flags & VIE_OP_F_NO_MODRM) + return (0); + if (vie_peek(vie, &x)) return (-1); @@ -1314,24 +1540,14 @@ decode_immediate(struct vie *vie) int i, n; uint8_t x; union { - char buf[8]; + char buf[4]; int8_t signed8; int16_t signed16; int32_t signed32; - int64_t signed64; } u; /* Figure out immediate operand size (if any) */ - if (vie->op.op_flags & VIE_OP_F_MOFFSET) { - /* - * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: - * The memory offset size follows the address-size of the - * instruction. Although this is treated as an immediate - * value during instruction decoding it is interpreted as - * a segment offset by the instruction emulation. - */ - vie->imm_bytes = vie->addrsize; - } else if (vie->op.op_flags & VIE_OP_F_IMM) { + if (vie->op.op_flags & VIE_OP_F_IMM) { /* * Section 2.2.1.5 "Immediates", Intel SDM: * In 64-bit mode the typical size of immediate operands @@ -1350,7 +1566,7 @@ decode_immediate(struct vie *vie) if ((n = vie->imm_bytes) == 0) return (0); - KASSERT(n == 1 || n == 2 || n == 4 || n == 8, + KASSERT(n == 1 || n == 2 || n == 4, ("%s: invalid number of immediate bytes: %d", __func__, n)); for (i = 0; i < n; i++) { @@ -1366,20 +1582,41 @@ decode_immediate(struct vie *vie) vie->immediate = u.signed8; else if (n == 2) vie->immediate = u.signed16; - else if (n == 4) - vie->immediate = u.signed32; else - vie->immediate = u.signed64; + vie->immediate = u.signed32; + return (0); +} - if (vie->op.op_flags & VIE_OP_F_MOFFSET) { - /* - * If the immediate value is going to be interpreted as a - * segment offset then undo the sign-extension above. - */ - vie->immediate &= size2mask[n]; +static int +decode_moffset(struct vie *vie) +{ + int i, n; + uint8_t x; + union { + char buf[8]; + uint64_t u64; + } u; + + if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0) + return (0); + + /* + * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: + * The memory offset size follows the address-size of the instruction. + */ + n = vie->addrsize; + KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n)); + + u.u64 = 0; + for (i = 0; i < n; i++) { + if (vie_peek(vie, &x)) + return (-1); + + u.buf[i] = x; + vie_advance(vie); } - + vie->displacement = u.u64; return (0); } @@ -1470,10 +1707,13 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, if (decode_displacement(vie)) return (-1); - + if (decode_immediate(vie)) return (-1); + if (decode_moffset(vie)) + return (-1); + if (verify_inst_length(vie)) return (-1); diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 2b95d9cf880..26c6e537819 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -347,8 +347,7 @@ vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", vme->u.msr.code, *pvcpu); if (strictmsr) { - error = vm_inject_exception2(ctx, *pvcpu, IDT_GP, 0); - assert(error == 0); + vm_inject_gp(ctx, *pvcpu, 0); return (VMEXIT_RESTART); } } @@ -374,8 +373,7 @@ vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", vme->u.msr.code, vme->u.msr.wval, *pvcpu); if (strictmsr) { - error = vm_inject_exception2(ctx, *pvcpu, IDT_GP, 0); - assert(error == 0); + vm_inject_gp(ctx, *pvcpu, 0); return (VMEXIT_RESTART); } } @@ -484,7 +482,7 @@ vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) stats.vmexit_inst_emul++; err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, - &vmexit->u.inst_emul.vie); + &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging); if (err) { if (err == EINVAL) { diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index fe9e0d85625..145ac1cbcba 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -157,15 +157,13 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) if (vie_calculate_gla(vis->paging.cpu_mode, vis->seg_name, &vis->seg_desc, index, bytes, addrsize, prot, &gla)) { - error = vm_inject_exception2(ctx, vcpu, - IDT_GP, 0); - assert(error == 0); + vm_inject_gp(ctx, vcpu, 0); retval = INOUT_RESTART; break; } - error = vm_gla2gpa(ctx, vcpu, &vis->paging, gla, bytes, - prot, iov, nitems(iov)); + error = vm_copy_setup(ctx, vcpu, &vis->paging, gla, + bytes, prot, iov, nitems(iov)); assert(error == 0 || error == 1 || error == -1); if (error) { retval = (error == 1) ? INOUT_RESTART : @@ -175,9 +173,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) if (vie_alignment_check(vis->paging.cpl, bytes, vis->cr0, vis->rflags, gla)) { - error = vm_inject_exception2(ctx, vcpu, - IDT_AC, 0); - assert(error == 0); + vm_inject_ac(ctx, vcpu, 0); return (INOUT_RESTART); } diff --git a/usr.sbin/bhyve/mem.c b/usr.sbin/bhyve/mem.c index 7ea630f2a58..37cf055f2cc 100644 --- a/usr.sbin/bhyve/mem.c +++ b/usr.sbin/bhyve/mem.c @@ -157,7 +157,9 @@ mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) } int -emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie) +emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, + struct vm_guest_paging *paging) + { struct mmio_rb_range *entry; int err; @@ -184,10 +186,10 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie) } assert(entry != NULL); - err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, + err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging, mem_read, mem_write, &entry->mr_param); pthread_rwlock_unlock(&mmio_rwlock); - + return (err); } diff --git a/usr.sbin/bhyve/mem.h b/usr.sbin/bhyve/mem.h index 264bff9e82b..eb648c145df 100644 --- a/usr.sbin/bhyve/mem.h +++ b/usr.sbin/bhyve/mem.h @@ -50,7 +50,8 @@ struct mem_range { #define MEM_F_RW 0x3 void init_mem(void); -int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie); +int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie, + struct vm_guest_paging *paging); int register_mem(struct mem_range *memp); int register_mem_fallback(struct mem_range *memp); diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c index e946807aa8e..64339827ea2 100644 --- a/usr.sbin/bhyve/task_switch.c +++ b/usr.sbin/bhyve/task_switch.c @@ -214,7 +214,7 @@ desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, assert(error == 0); assert(limit >= SEL_LIMIT(sel)); - error = vm_gla2gpa(ctx, vcpu, paging, base + SEL_START(sel), + error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel), sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov)); if (error == 0) { if (doread) @@ -508,9 +508,7 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, */ reserved = ~maxphyaddr | 0x1E6; if (pdpte[i] & reserved) { - error = vm_inject_exception2(ctx, vcpu, - IDT_GP, 0); - assert(error == 0); + vm_inject_gp(ctx, vcpu, 0); return (VMEXIT_RESTART); } } @@ -649,12 +647,11 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, } if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { - error = vm_inject_exception2(ctx, vcpu, IDT_AC, 1); - assert(error == 0); + vm_inject_ac(ctx, vcpu, 1); return (VMEXIT_RESTART); } - error = vm_gla2gpa(ctx, vcpu, paging, gla, bytes, PROT_WRITE, + error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE, iov, nitems(iov)); assert(error == 0 || error == 1 || error == -1); if (error) { @@ -753,7 +750,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) } /* Fetch the new TSS */ - error = vm_gla2gpa(ctx, vcpu, &sup_paging, nt.base, minlimit + 1, + error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1, PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov)); if (error == 1) { /* Restart vcpu execution to handle the page fault */ @@ -793,7 +790,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) return (error); /* Get the old TSS */ - error = vm_gla2gpa(ctx, vcpu, &sup_paging, ot_base, minlimit + 1, + error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1, PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov)); if (error == 1) { /* Restart vcpu execution to handle the page fault */