1
0
mirror of https://git.FreeBSD.org/src.git synced 2025-01-11 14:10:34 +00:00

Framework for ARM64 instruction disassembler

Provide an easy to use framework for ARM64 DDB disassembler.
    This commit does not contain full list of instruction opcodes.

Obtained from:         Semihalf
Sponsored by:          Cavium
Approved by:           cognet (mentor)
Reviewed by:           zbb, andrew, cognet
Differential revision: https://reviews.freebsd.org/D5114
This commit is contained in:
Wojciech Macek 2016-01-29 13:06:30 +00:00
parent 4f7da057cb
commit 8a1867f4aa
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=295038
5 changed files with 405 additions and 1 deletions

View File

@ -31,11 +31,40 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <ddb/ddb.h>
#include <ddb/db_access.h>
#include <ddb/db_sym.h>
#include <machine/disassem.h>
static u_int db_disasm_read_word(vm_offset_t);
static void db_disasm_printaddr(vm_offset_t);
/* Glue code to interface db_disasm to the generic ARM disassembler */
static const struct disasm_interface db_disasm_interface = {
db_disasm_read_word,
db_disasm_printaddr,
db_printf
};
static u_int
db_disasm_read_word(vm_offset_t address)
{
return (db_get_value(address, INSN_SIZE, 0));
}
static void
db_disasm_printaddr(vm_offset_t address)
{
db_printsym((db_addr_t)address, DB_STGY_ANY);
}
vm_offset_t
db_disasm(vm_offset_t loc, bool altfmt)
{
return 0;
return (disasm(&db_disasm_interface, loc, altfmt));
}
/* End of db_disasm.c */

330
sys/arm64/arm64/disassem.c Normal file
View File

@ -0,0 +1,330 @@
/*-
* Copyright (c) 2016 Cavium
* All rights reserved.
*
* This software was developed by Semihalf.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <machine/disassem.h>
#include <machine/armreg.h>
#include <ddb/ddb.h>
#define ARM64_MAX_TOKEN_LEN 8
#define ARM64_MAX_TOKEN_CNT 10
static const char *w_reg[] = {
"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP",
};
static const char *x_reg[] = {
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
"x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP",
};
static const char *shift_2[] = {
"LSL", "LSR", "ASR", "RSV"
};
/*
* Structure representing single token (operand) inside instruction.
* name - name of operand
* pos - position within the instruction (in bits)
* len - operand length (in bits)
*/
struct arm64_insn_token {
char name[ARM64_MAX_TOKEN_LEN];
int pos;
int len;
};
/*
* Define generic types for instruction printing.
*/
enum arm64_format_type {
TYPE_01, /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
};
/*
* Structure representing single parsed instruction format.
* name - opcode name
* format - opcode format in a human-readable way
* type - syntax type for printing
* special_ops - special options passed to a printer (if any)
* mask - bitmask for instruction matching
* pattern - pattern to look for
* tokens - array of tokens (operands) inside instruction
*/
struct arm64_insn {
char* name;
char* format;
enum arm64_format_type type;
uint64_t special_ops;
uint32_t mask;
uint32_t pattern;
struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
};
/*
* Specify instruction opcode format in a human-readable way. Use notation
* obtained from ARM Architecture Reference Manual for ARMv8-A.
*
* Format string description:
* Each group must be separated by "|". Group made of 0/1 is used to
* generate mask and pattern for instruction matching. Groups containing
* an operand token (in format NAME(length_bits)) are used to retrieve any
* operand data from the instruction. Names here must be meaningful
* and match the one described in the Manual.
*
* Token description:
* SF - "0" represents 32-bit access, "1" represents 64-bit access
* SHIFT - type of shift (instruction dependent)
* IMM - immediate value
* Rx - register number
*/
static struct arm64_insn arm64_i[] = {
{ "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", TYPE_01, 0 },
{ "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)", TYPE_01, 0 },
{ "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)", TYPE_01, 0 },
{ NULL, NULL }
};
static void
arm64_disasm_generate_masks(struct arm64_insn *tab)
{
uint32_t mask, val;
int a, i;
int len, ret;
int token = 0;
char *format;
int error;
while (tab->name != NULL) {
mask = 0;
val = 0;
format = tab->format;
token = 0;
error = 0;
/*
* For each entry analyze format strings from the
* left (i.e. from the MSB).
*/
a = (INSN_SIZE * NBBY) - 1;
while (*format != '\0' && (a >= 0)) {
switch(*format) {
case '0':
/* Bit is 0, add to mask and pattern */
mask |= (1 << a);
a--;
format++;
break;
case '1':
/* Bit is 1, add to mask and pattern */
mask |= (1 << a);
val |= (1 << a);
a--;
format++;
break;
case '|':
/* skip */
format++;
break;
default:
/* Token found, copy the name */
memset(tab->tokens[token].name, 0,
sizeof(tab->tokens[token].name));
i = 0;
while (*format != '(') {
tab->tokens[token].name[i] = *format;
i++;
format++;
if (i >= ARM64_MAX_TOKEN_LEN) {
printf("ERROR: token too long in op %s\n",
tab->name);
error = 1;
break;
}
}
if (error != 0)
break;
/* Read the length value */
ret = sscanf(format, "(%d)", &len);
if (ret == 1) {
if (token >= ARM64_MAX_TOKEN_CNT) {
printf("ERROR: to many tokens in op %s\n",
tab->name);
error = 1;
break;
}
a -= len;
tab->tokens[token].pos = a + 1;
tab->tokens[token].len = len;
token++;
}
/* Skip to the end of the token */
while (*format != 0 && *format != '|')
format++;
}
}
/* Write mask and pattern to the instruction array */
tab->mask = mask;
tab->pattern = val;
/*
* If we got here, format string must be parsed and "a"
* should point to -1. If it's not, wrong number of bits
* in format string. Mark this as invalid and prevent
* from being matched.
*/
if (*format != 0 || (a != -1) || (error != 0)) {
tab->mask = 0;
tab->pattern = 0xffffffff;
printf("ERROR: skipping instruction op %s\n",
tab->name);
}
tab++;
}
}
static int
arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
const char *token, int *val)
{
int i;
for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
if (strcmp(insn->tokens[i].name, token) == 0) {
*val = (opcode >> insn->tokens[i].pos &
((1 << insn->tokens[i].len) - 1));
return (0);
}
}
return (EINVAL);
}
static const char *
arm64_reg(int b64, int num)
{
if (b64 != 0)
return (x_reg[num]);
return (w_reg[num]);
}
vm_offset_t
disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
{
struct arm64_insn *i_ptr = arm64_i;
uint32_t insn;
int matchp;
int ret;
int shift, rm, rd, rn, imm, sf;
int rm_absent;
/* Initialize defaults, all are 0 except SF indicating 64bit access */
shift = rd = rm = rn = imm = 0;
sf = 1;
matchp = 0;
insn = di->di_readword(loc);
while (i_ptr->name) {
/* If mask is 0 then the parser was not initialized yet */
if ((i_ptr->mask != 0) &&
((insn & i_ptr->mask) == i_ptr->pattern)) {
matchp = 1;
break;
}
i_ptr++;
}
if (matchp == 0)
goto undefined;
switch (i_ptr->type) {
case TYPE_01:
/* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
/* Mandatory tokens */
ret = arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
ret |= arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
if (ret != 0) {
printf("ERROR: Missing mandatory token for op %s type %d\n",
i_ptr->name, i_ptr->type);
goto undefined;
}
/* Optional tokens */
arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd),
arm64_reg(sf, rn));
/* If RM is present use it, otherwise use immediate notation */
if (rm_absent == 0) {
di->di_printf(", %s", arm64_reg(sf, rm));
if (imm != 0)
di->di_printf(", %s #%d", shift_2[shift], imm);
} else {
if (imm != 0 || shift != 0)
di->di_printf(", #0x%x", imm);
if (shift != 0)
di->di_printf(" LSL #12");
}
break;
default:
goto undefined;
}
di->di_printf("\n");
return(loc + INSN_SIZE);
undefined:
di->di_printf("undefined\t%08x\n", insn);
return(loc + INSN_SIZE);
}
/* Parse format strings at the very beginning */
SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES,
SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i);

View File

@ -33,6 +33,8 @@
#ifndef _MACHINE_ARMREG_H_
#define _MACHINE_ARMREG_H_
#define INSN_SIZE 4
#define READ_SPECIALREG(reg) \
({ uint64_t val; \
__asm __volatile("mrs %0, " __STRING(reg) : "=&r" (val)); \

View File

@ -0,0 +1,42 @@
/*-
* Copyright (c) 2016 Cavium
* All rights reserved.
*
* This software was developed by Semihalf.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef __DISASSEM_H_
#define __DISASSEM_H_
struct disasm_interface {
u_int (*di_readword)(vm_offset_t);
void (*di_printaddr)(vm_offset_t);
int (*di_printf)(const char *, ...) __printflike(1, 2);
};
vm_offset_t disasm(const struct disasm_interface *, vm_offset_t, int);
#endif /* __DISASSEM_H_ */

View File

@ -20,6 +20,7 @@ arm64/arm64/db_disasm.c optional ddb
arm64/arm64/db_interface.c optional ddb
arm64/arm64/db_trace.c optional ddb
arm64/arm64/debug_monitor.c optional kdb
arm64/arm64/disassem.c optional ddb
arm64/arm64/dump_machdep.c standard
arm64/arm64/elf_machdep.c standard
arm64/arm64/exception.S standard