1
0
mirror of https://git.FreeBSD.org/src.git synced 2025-01-21 15:45:02 +00:00

- Add more aggressive BPF JIT optimization. This is in more favor of i386

while the previous commit was more amd64-centric.
- Use calloc(3) instead of malloc(3)/memset(3) in user land[1].

Submitted by:	ed[1]
This commit is contained in:
Jung-uk Kim 2009-11-23 22:23:19 +00:00
parent 65e962fb76
commit 26b8a1c94f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=199721
4 changed files with 173 additions and 104 deletions

View File

@ -101,29 +101,46 @@ emit_code(bpf_bin_stream *stream, u_int value, u_int len)
static int
bpf_jit_optimize(struct bpf_insn *prog, u_int nins)
{
const struct bpf_insn *p;
int flags;
u_int i;
/* Do we return immediately? */
if (BPF_CLASS(prog[0].code) == BPF_RET)
return (BPF_JIT_FLAG_RET);
return (BPF_JIT_FRET);
for (flags = 0, i = 0; i < nins; i++) {
p = &prog[i];
/* Do we need reference table? */
if ((flags & BPF_JIT_FLAG_JMP) == 0 &&
BPF_CLASS(p->code) == BPF_JMP)
flags |= BPF_JIT_FLAG_JMP;
/* Do we need scratch memory? */
if ((flags & BPF_JIT_FLAG_MEM) == 0 &&
(p->code == BPF_ST || p->code == BPF_STX ||
p->code == (BPF_LD|BPF_MEM) ||
p->code == (BPF_LDX|BPF_MEM)))
flags |= BPF_JIT_FLAG_MEM;
switch (prog[i].code) {
case BPF_LD|BPF_W|BPF_ABS:
case BPF_LD|BPF_H|BPF_ABS:
case BPF_LD|BPF_B|BPF_ABS:
case BPF_LD|BPF_W|BPF_IND:
case BPF_LD|BPF_H|BPF_IND:
case BPF_LD|BPF_B|BPF_IND:
case BPF_LDX|BPF_MSH|BPF_B:
flags |= BPF_JIT_FPKT;
break;
case BPF_LD|BPF_MEM:
case BPF_LDX|BPF_MEM:
case BPF_ST:
case BPF_STX:
flags |= BPF_JIT_FMEM;
break;
case BPF_LD|BPF_W|BPF_LEN:
case BPF_LDX|BPF_W|BPF_LEN:
flags |= BPF_JIT_FLEN;
break;
case BPF_JMP|BPF_JA:
case BPF_JMP|BPF_JGT|BPF_K:
case BPF_JMP|BPF_JGE|BPF_K:
case BPF_JMP|BPF_JEQ|BPF_K:
case BPF_JMP|BPF_JSET|BPF_K:
case BPF_JMP|BPF_JGT|BPF_X:
case BPF_JMP|BPF_JGE|BPF_X:
case BPF_JMP|BPF_JEQ|BPF_X:
case BPF_JMP|BPF_JSET|BPF_X:
flags |= BPF_JIT_FJMP;
break;
}
if (flags == BPF_JIT_FLAG_ALL)
break;
}
@ -139,35 +156,37 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
{
bpf_bin_stream stream;
struct bpf_insn *ins;
int flags, flag_ret, flag_jmp, flag_mem;
int flags, fret, fpkt, fmem, fjmp, flen;
u_int i, pass;
flags = bpf_jit_optimize(prog, nins);
flag_ret = (flags & BPF_JIT_FLAG_RET) != 0;
flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0;
flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0;
/*
* NOTE: Do not modify the name of this variable, as it's used by
* the macros to emit code.
*/
emit_func emitm;
flags = bpf_jit_optimize(prog, nins);
fret = (flags & BPF_JIT_FRET) != 0;
fpkt = (flags & BPF_JIT_FPKT) != 0;
fmem = (flags & BPF_JIT_FMEM) != 0;
fjmp = (flags & BPF_JIT_FJMP) != 0;
flen = (flags & BPF_JIT_FLEN) != 0;
if (fret)
nins = 1;
memset(&stream, 0, sizeof(stream));
/* Allocate the reference table for the jumps. */
if (flag_jmp) {
if (fjmp) {
#ifdef _KERNEL
stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT,
M_NOWAIT | M_ZERO);
#else
stream.refs = malloc((nins + 1) * sizeof(u_int));
stream.refs = calloc(nins + 1, sizeof(u_int));
#endif
if (stream.refs == NULL)
return (NULL);
#ifndef _KERNEL
memset(stream.refs, 0, (nins + 1) * sizeof(u_int));
#endif
}
/*
@ -180,14 +199,15 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
ins = prog;
/* Create the procedure header. */
if (flag_mem) {
if (fmem) {
PUSH(RBP);
MOVrq(RSP, RBP);
SUBib(BPF_MEMWORDS * sizeof(uint32_t), RSP);
}
if (!flag_ret) {
MOVrq2(RDI, R8);
if (flen)
MOVrd2(ESI, R9D);
if (fpkt) {
MOVrq2(RDI, R8);
MOVrd(EDX, EDI);
}
@ -204,13 +224,13 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
case BPF_RET|BPF_K:
MOVid(ins->k, EAX);
if (flag_mem)
if (fmem)
LEAVE();
RET();
break;
case BPF_RET|BPF_A:
if (flag_mem)
if (fmem)
LEAVE();
RET();
break;
@ -222,7 +242,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
MOVrd(EDI, ECX);
SUBrd(ESI, ECX);
CMPid(sizeof(int32_t), ECX);
if (flag_mem) {
if (fmem) {
JAEb(4);
ZEROrd(EAX);
LEAVE();
@ -244,7 +264,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
MOVrd(EDI, ECX);
SUBrd(ESI, ECX);
CMPid(sizeof(int16_t), ECX);
if (flag_mem) {
if (fmem) {
JAEb(2);
LEAVE();
} else
@ -259,7 +279,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
ZEROrd(EAX);
MOVid(ins->k, ESI);
CMPrd(EDI, ESI);
if (flag_mem) {
if (fmem) {
JBb(2);
LEAVE();
} else
@ -289,7 +309,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
MOVrd(EDI, ECX);
SUBrd(ESI, ECX);
CMPid(sizeof(int32_t), ECX);
if (flag_mem) {
if (fmem) {
JAEb(4);
ZEROrd(EAX);
LEAVE();
@ -316,7 +336,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
MOVrd(EDI, ECX);
SUBrd(ESI, ECX);
CMPid(sizeof(int16_t), ECX);
if (flag_mem) {
if (fmem) {
JAEb(2);
LEAVE();
} else
@ -335,7 +355,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
MOVrd(EDI, ECX);
SUBrd(EDX, ECX);
CMPrd(ESI, ECX);
if (flag_mem) {
if (fmem) {
JAb(2);
LEAVE();
} else
@ -349,7 +369,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
case BPF_LDX|BPF_MSH|BPF_B:
MOVid(ins->k, ESI);
CMPrd(EDI, ESI);
if (flag_mem) {
if (fmem) {
JBb(4);
ZEROrd(EAX);
LEAVE();
@ -475,7 +495,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
case BPF_ALU|BPF_DIV|BPF_X:
TESTrd(EDX, EDX);
if (flag_mem) {
if (fmem) {
JNEb(4);
ZEROrd(EAX);
LEAVE();
@ -583,7 +603,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
* Modify the reference table to contain the offsets and
* not the lengths of the instructions.
*/
if (flag_jmp)
if (fjmp)
for (i = 1; i < nins + 1; i++)
stream.refs[i] += stream.refs[i - 1];
@ -599,7 +619,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
* The reference table is needed only during compilation,
* now we can free it.
*/
if (flag_jmp)
if (fjmp)
#ifdef _KERNEL
free(stream.refs, M_BPFJIT);
#else

View File

@ -86,12 +86,14 @@
#define BL 3
/* Optimization flags */
#define BPF_JIT_FLAG_RET 0x01
#define BPF_JIT_FLAG_JMP 0x02
#define BPF_JIT_FLAG_MEM 0x04
#define BPF_JIT_FRET 0x01
#define BPF_JIT_FPKT 0x02
#define BPF_JIT_FMEM 0x04
#define BPF_JIT_FJMP 0x08
#define BPF_JIT_FLEN 0x10
#define BPF_JIT_FLAG_ALL \
(BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM)
(BPF_JIT_FPKT | BPF_JIT_FMEM | BPF_JIT_FJMP | BPF_JIT_FLEN)
/* A stream of native binary code */
typedef struct bpf_bin_stream {

View File

@ -101,29 +101,45 @@ emit_code(bpf_bin_stream *stream, u_int value, u_int len)
static int
bpf_jit_optimize(struct bpf_insn *prog, u_int nins)
{
const struct bpf_insn *p;
int flags;
u_int i;
/* Do we return immediately? */
if (BPF_CLASS(prog[0].code) == BPF_RET)
return (BPF_JIT_FLAG_RET);
return (BPF_JIT_FRET);
for (flags = 0, i = 0; i < nins; i++) {
p = &prog[i];
/* Do we need reference table? */
if ((flags & BPF_JIT_FLAG_JMP) == 0 &&
BPF_CLASS(p->code) == BPF_JMP)
flags |= BPF_JIT_FLAG_JMP;
/* Do we need scratch memory? */
if ((flags & BPF_JIT_FLAG_MEM) == 0 &&
(p->code == BPF_ST || p->code == BPF_STX ||
p->code == (BPF_LD|BPF_MEM) ||
p->code == (BPF_LDX|BPF_MEM)))
flags |= BPF_JIT_FLAG_MEM;
switch (prog[i].code) {
case BPF_LD|BPF_W|BPF_ABS:
case BPF_LD|BPF_H|BPF_ABS:
case BPF_LD|BPF_B|BPF_ABS:
case BPF_LD|BPF_W|BPF_IND:
case BPF_LD|BPF_H|BPF_IND:
case BPF_LD|BPF_B|BPF_IND:
case BPF_LDX|BPF_MSH|BPF_B:
flags |= BPF_JIT_FPKT;
break;
case BPF_LD|BPF_MEM:
case BPF_LDX|BPF_MEM:
case BPF_ST:
case BPF_STX:
flags |= BPF_JIT_FMEM;
break;
case BPF_JMP|BPF_JA:
case BPF_JMP|BPF_JGT|BPF_K:
case BPF_JMP|BPF_JGE|BPF_K:
case BPF_JMP|BPF_JEQ|BPF_K:
case BPF_JMP|BPF_JSET|BPF_K:
case BPF_JMP|BPF_JGT|BPF_X:
case BPF_JMP|BPF_JGE|BPF_X:
case BPF_JMP|BPF_JEQ|BPF_X:
case BPF_JMP|BPF_JSET|BPF_X:
flags |= BPF_JIT_FJMP;
break;
case BPF_ALU|BPF_DIV|BPF_K:
flags |= BPF_JIT_FADK;
break;
}
if (flags == BPF_JIT_FLAG_ALL)
break;
}
@ -139,35 +155,39 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
{
bpf_bin_stream stream;
struct bpf_insn *ins;
int flags, flag_ret, flag_jmp, flag_mem;
int flags, fret, fpkt, fmem, fjmp, fadk;
int save_esp;
u_int i, pass;
flags = bpf_jit_optimize(prog, nins);
flag_ret = (flags & BPF_JIT_FLAG_RET) != 0;
flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0;
flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0;
/*
* NOTE: Do not modify the name of this variable, as it's used by
* the macros to emit code.
*/
emit_func emitm;
flags = bpf_jit_optimize(prog, nins);
fret = (flags & BPF_JIT_FRET) != 0;
fpkt = (flags & BPF_JIT_FPKT) != 0;
fmem = (flags & BPF_JIT_FMEM) != 0;
fjmp = (flags & BPF_JIT_FJMP) != 0;
fadk = (flags & BPF_JIT_FADK) != 0;
save_esp = (fpkt || fmem || fadk); /* Stack is used. */
if (fret)
nins = 1;
memset(&stream, 0, sizeof(stream));
/* Allocate the reference table for the jumps. */
if (flag_jmp) {
if (fjmp) {
#ifdef _KERNEL
stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT,
M_NOWAIT | M_ZERO);
#else
stream.refs = malloc((nins + 1) * sizeof(u_int));
stream.refs = calloc(nins + 1, sizeof(u_int));
#endif
if (stream.refs == NULL)
return (NULL);
#ifndef _KERNEL
memset(stream.refs, 0, (nins + 1) * sizeof(u_int));
#endif
}
/*
@ -180,15 +200,16 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
ins = prog;
/* Create the procedure header. */
if (!flag_ret) {
if (save_esp) {
PUSH(EBP);
MOVrd(ESP, EBP);
}
if (flag_mem)
if (fmem)
SUBib(BPF_MEMWORDS * sizeof(uint32_t), ESP);
if (!flag_ret) {
PUSH(EDI);
if (save_esp)
PUSH(ESI);
if (fpkt) {
PUSH(EDI);
PUSH(EBX);
MOVodd(8, EBP, EBX);
MOVodd(16, EBP, EDI);
@ -207,20 +228,24 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
case BPF_RET|BPF_K:
MOVid(ins->k, EAX);
if (!flag_ret) {
POP(EBX);
if (save_esp) {
if (fpkt) {
POP(EBX);
POP(EDI);
}
POP(ESI);
POP(EDI);
LEAVE();
}
RET();
break;
case BPF_RET|BPF_A:
if (!flag_ret) {
POP(EBX);
if (save_esp) {
if (fpkt) {
POP(EBX);
POP(EDI);
}
POP(ESI);
POP(EDI);
LEAVE();
}
RET();
@ -236,8 +261,8 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
JAEb(7);
ZEROrd(EAX);
POP(EBX);
POP(ESI);
POP(EDI);
POP(ESI);
LEAVE();
RET();
MOVobd(EBX, ESI, EAX);
@ -254,8 +279,8 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
CMPid(sizeof(int16_t), ECX);
JAEb(5);
POP(EBX);
POP(ESI);
POP(EDI);
POP(ESI);
LEAVE();
RET();
MOVobw(EBX, ESI, AX);
@ -268,19 +293,29 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
CMPrd(EDI, ESI);
JBb(5);
POP(EBX);
POP(ESI);
POP(EDI);
POP(ESI);
LEAVE();
RET();
MOVobb(EBX, ESI, AL);
break;
case BPF_LD|BPF_W|BPF_LEN:
MOVodd(12, EBP, EAX);
if (save_esp)
MOVodd(12, EBP, EAX);
else {
MOVrd(ESP, ECX);
MOVodd(12, ECX, EAX);
}
break;
case BPF_LDX|BPF_W|BPF_LEN:
MOVodd(12, EBP, EDX);
if (save_esp)
MOVodd(12, EBP, EDX);
else {
MOVrd(ESP, ECX);
MOVodd(12, ECX, EDX);
}
break;
case BPF_LD|BPF_W|BPF_IND:
@ -298,8 +333,8 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
JAEb(7);
ZEROrd(EAX);
POP(EBX);
POP(ESI);
POP(EDI);
POP(ESI);
LEAVE();
RET();
MOVobd(EBX, ESI, EAX);
@ -321,8 +356,8 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
CMPid(sizeof(int16_t), ECX);
JAEb(5);
POP(EBX);
POP(ESI);
POP(EDI);
POP(ESI);
LEAVE();
RET();
MOVobw(EBX, ESI, AX);
@ -339,8 +374,8 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
CMPrd(ESI, ECX);
JAb(5);
POP(EBX);
POP(ESI);
POP(EDI);
POP(ESI);
LEAVE();
RET();
ADDrd(EDX, ESI);
@ -353,8 +388,8 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
JBb(7);
ZEROrd(EAX);
POP(EBX);
POP(ESI);
POP(EDI);
POP(ESI);
LEAVE();
RET();
ZEROrd(EDX);
@ -481,12 +516,22 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
case BPF_ALU|BPF_DIV|BPF_X:
TESTrd(EDX, EDX);
JNEb(7);
ZEROrd(EAX);
POP(EBX);
POP(ESI);
POP(EDI);
LEAVE();
if (save_esp) {
if (fpkt) {
JNEb(7);
ZEROrd(EAX);
POP(EBX);
POP(EDI);
} else {
JNEb(5);
ZEROrd(EAX);
}
POP(ESI);
LEAVE();
} else {
JNEb(3);
ZEROrd(EAX);
}
RET();
MOVrd(EDX, ECX);
ZEROrd(EDX);
@ -587,7 +632,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
* Modify the reference table to contain the offsets and
* not the lengths of the instructions.
*/
if (flag_jmp)
if (fjmp)
for (i = 1; i < nins + 1; i++)
stream.refs[i] += stream.refs[i - 1];
@ -603,7 +648,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size)
* The reference table is needed only during compilation,
* now we can free it.
*/
if (flag_jmp)
if (fjmp)
#ifdef _KERNEL
free(stream.refs, M_BPFJIT);
#else

View File

@ -61,12 +61,14 @@
#define BL 3
/* Optimization flags */
#define BPF_JIT_FLAG_RET 0x01
#define BPF_JIT_FLAG_JMP 0x02
#define BPF_JIT_FLAG_MEM 0x04
#define BPF_JIT_FRET 0x01
#define BPF_JIT_FPKT 0x02
#define BPF_JIT_FMEM 0x04
#define BPF_JIT_FJMP 0x08
#define BPF_JIT_FADK 0x10
#define BPF_JIT_FLAG_ALL \
(BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM)
(BPF_JIT_FPKT | BPF_JIT_FMEM | BPF_JIT_FJMP | BPF_JIT_FADK)
/* A stream of native binary code */
typedef struct bpf_bin_stream {