From owner-svn-src-head@FreeBSD.ORG Sat Nov 21 00:19:09 2009 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id BD23A106566B; Sat, 21 Nov 2009 00:19:09 +0000 (UTC) (envelope-from jkim@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id A9CF28FC21; Sat, 21 Nov 2009 00:19:09 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id nAL0J9oW060530; Sat, 21 Nov 2009 00:19:09 GMT (envelope-from jkim@svn.freebsd.org) Received: (from jkim@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id nAL0J91R060525; Sat, 21 Nov 2009 00:19:09 GMT (envelope-from jkim@svn.freebsd.org) Message-Id: <200911210019.nAL0J91R060525@svn.freebsd.org> From: Jung-uk Kim Date: Sat, 21 Nov 2009 00:19:09 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r199619 - in head/sys: amd64/amd64 i386/i386 X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 21 Nov 2009 00:19:09 -0000 Author: jkim Date: Sat Nov 21 00:19:09 2009 New Revision: 199619 URL: http://svn.freebsd.org/changeset/base/199619 Log: Add an experimental and rudimentary JIT optimizer to reduce unncessary overhead from short BPF filter programs such as "get the first 96 bytes". Modified: head/sys/amd64/amd64/bpf_jit_machdep.c head/sys/amd64/amd64/bpf_jit_machdep.h head/sys/i386/i386/bpf_jit_machdep.c head/sys/i386/i386/bpf_jit_machdep.h Modified: head/sys/amd64/amd64/bpf_jit_machdep.c ============================================================================== --- head/sys/amd64/amd64/bpf_jit_machdep.c Fri Nov 20 23:14:08 2009 (r199618) +++ head/sys/amd64/amd64/bpf_jit_machdep.c Sat Nov 21 00:19:09 2009 (r199619) @@ -57,18 +57,19 @@ __FBSDID("$FreeBSD$"); bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, size_t *); /* - * emit routine to update the jump table + * Emit routine to update the jump table. */ static void emit_length(bpf_bin_stream *stream, __unused u_int value, u_int len) { - (stream->refs)[stream->bpf_pc] += len; + if (stream->refs != NULL) + (stream->refs)[stream->bpf_pc] += len; stream->cur_ip += len; } /* - * emit routine to output the actual binary code + * Emit routine to output the actual binary code. */ static void emit_code(bpf_bin_stream *stream, u_int value, u_int len) @@ -95,37 +96,79 @@ emit_code(bpf_bin_stream *stream, u_int } /* - * Function that does the real stuff + * Scan the filter program and find possible optimization. + */ +static int +bpf_jit_optimize(struct bpf_insn *prog, u_int nins) +{ + const struct bpf_insn *p; + int flags; + u_int i; + + /* Do we return immediately? */ + if (BPF_CLASS(prog[0].code) == BPF_RET) + return (BPF_JIT_FLAG_RET); + + for (flags = 0, i = 0; i < nins; i++) { + p = &prog[i]; + + /* Do we need reference table? */ + if ((flags & BPF_JIT_FLAG_JMP) == 0 && + BPF_CLASS(p->code) == BPF_JMP) + flags |= BPF_JIT_FLAG_JMP; + + /* Do we need scratch memory? */ + if ((flags & BPF_JIT_FLAG_MEM) == 0 && + (p->code == BPF_ST || p->code == BPF_STX || + p->code == (BPF_LD|BPF_MEM) || + p->code == (BPF_LDX|BPF_MEM))) + flags |= BPF_JIT_FLAG_MEM; + + if (flags == BPF_JIT_FLAG_ALL) + break; + } + + return (flags); +} + +/* + * Function that does the real stuff. */ bpf_filter_func bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size) { bpf_bin_stream stream; struct bpf_insn *ins; + int flags, flag_ret, flag_jmp, flag_mem; u_int i, pass; + flags = bpf_jit_optimize(prog, nins); + flag_ret = (flags & BPF_JIT_FLAG_RET) != 0; + flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0; + flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0; + /* * NOTE: Do not modify the name of this variable, as it's used by * the macros to emit code. */ emit_func emitm; + memset(&stream, 0, sizeof(stream)); + /* Allocate the reference table for the jumps. */ + if (flag_jmp) { #ifdef _KERNEL - stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT, - M_NOWAIT | M_ZERO); + stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT, + M_NOWAIT | M_ZERO); #else - stream.refs = malloc((nins + 1) * sizeof(u_int)); + stream.refs = malloc((nins + 1) * sizeof(u_int)); #endif - if (stream.refs == NULL) - return (NULL); + if (stream.refs == NULL) + return (NULL); #ifndef _KERNEL - memset(stream.refs, 0, (nins + 1) * sizeof(u_int)); + memset(stream.refs, 0, (nins + 1) * sizeof(u_int)); #endif - - stream.cur_ip = 0; - stream.bpf_pc = 0; - stream.ibuf = NULL; + } /* * The first pass will emit the lengths of the instructions @@ -137,12 +180,16 @@ bpf_jit_compile(struct bpf_insn *prog, u ins = prog; /* Create the procedure header. */ - PUSH(RBP); - MOVrq(RSP, RBP); - SUBib(BPF_MEMWORDS * sizeof(uint32_t), RSP); - MOVrq2(RDI, R8); - MOVrd2(ESI, R9D); - MOVrd(EDX, EDI); + if (flag_mem) { + PUSH(RBP); + MOVrq(RSP, RBP); + SUBib(BPF_MEMWORDS * sizeof(uint32_t), RSP); + } + if (!flag_ret) { + MOVrq2(RDI, R8); + MOVrd2(ESI, R9D); + MOVrd(EDX, EDI); + } for (i = 0; i < nins; i++) { stream.bpf_pc++; @@ -157,11 +204,15 @@ bpf_jit_compile(struct bpf_insn *prog, u case BPF_RET|BPF_K: MOVid(ins->k, EAX); - LEAVE_RET(); + if (flag_mem) + LEAVE(); + RET(); break; case BPF_RET|BPF_A: - LEAVE_RET(); + if (flag_mem) + LEAVE(); + RET(); break; case BPF_LD|BPF_W|BPF_ABS: @@ -171,9 +222,15 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(ESI, ECX); CMPid(sizeof(int32_t), ECX); - JAEb(4); - ZEROrd(EAX); - LEAVE_RET(); + if (flag_mem) { + JAEb(4); + ZEROrd(EAX); + LEAVE(); + } else { + JAEb(3); + ZEROrd(EAX); + } + RET(); MOVrq3(R8, RCX); MOVobd(RCX, RSI, EAX); BSWAP(EAX); @@ -187,8 +244,12 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(ESI, ECX); CMPid(sizeof(int16_t), ECX); - JAEb(2); - LEAVE_RET(); + if (flag_mem) { + JAEb(2); + LEAVE(); + } else + JAEb(1); + RET(); MOVrq3(R8, RCX); MOVobw(RCX, RSI, AX); SWAP_AX(); @@ -198,8 +259,12 @@ bpf_jit_compile(struct bpf_insn *prog, u ZEROrd(EAX); MOVid(ins->k, ESI); CMPrd(EDI, ESI); - JBb(2); - LEAVE_RET(); + if (flag_mem) { + JBb(2); + LEAVE(); + } else + JBb(1); + RET(); MOVrq3(R8, RCX); MOVobb(RCX, RSI, AL); break; @@ -224,9 +289,15 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(ESI, ECX); CMPid(sizeof(int32_t), ECX); - JAEb(4); - ZEROrd(EAX); - LEAVE_RET(); + if (flag_mem) { + JAEb(4); + ZEROrd(EAX); + LEAVE(); + } else { + JAEb(3); + ZEROrd(EAX); + } + RET(); MOVrq3(R8, RCX); MOVobd(RCX, RSI, EAX); BSWAP(EAX); @@ -245,8 +316,12 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(ESI, ECX); CMPid(sizeof(int16_t), ECX); - JAEb(2); - LEAVE_RET(); + if (flag_mem) { + JAEb(2); + LEAVE(); + } else + JAEb(1); + RET(); MOVrq3(R8, RCX); MOVobw(RCX, RSI, AX); SWAP_AX(); @@ -260,8 +335,12 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(EDX, ECX); CMPrd(ESI, ECX); - JAb(2); - LEAVE_RET(); + if (flag_mem) { + JAb(2); + LEAVE(); + } else + JAb(1); + RET(); MOVrq3(R8, RCX); ADDrd(EDX, ESI); MOVobb(RCX, RSI, AL); @@ -270,9 +349,15 @@ bpf_jit_compile(struct bpf_insn *prog, u case BPF_LDX|BPF_MSH|BPF_B: MOVid(ins->k, ESI); CMPrd(EDI, ESI); - JBb(4); - ZEROrd(EAX); - LEAVE_RET(); + if (flag_mem) { + JBb(4); + ZEROrd(EAX); + LEAVE(); + } else { + JBb(3); + ZEROrd(EAX); + } + RET(); ZEROrd(EDX); MOVrq3(R8, RCX); MOVobb(RCX, RSI, DL); @@ -390,9 +475,15 @@ bpf_jit_compile(struct bpf_insn *prog, u case BPF_ALU|BPF_DIV|BPF_X: TESTrd(EDX, EDX); - JNEb(4); - ZEROrd(EAX); - LEAVE_RET(); + if (flag_mem) { + JNEb(4); + ZEROrd(EAX); + LEAVE(); + } else { + JNEb(3); + ZEROrd(EAX); + } + RET(); MOVrd(EDX, ECX); ZEROrd(EDX); DIVrd(ECX); @@ -492,8 +583,9 @@ bpf_jit_compile(struct bpf_insn *prog, u * Modify the reference table to contain the offsets and * not the lengths of the instructions. */ - for (i = 1; i < nins + 1; i++) - stream.refs[i] += stream.refs[i - 1]; + if (flag_jmp) + for (i = 1; i < nins + 1; i++) + stream.refs[i] += stream.refs[i - 1]; /* Reset the counters. */ stream.cur_ip = 0; @@ -507,10 +599,14 @@ bpf_jit_compile(struct bpf_insn *prog, u * The reference table is needed only during compilation, * now we can free it. */ + if (flag_jmp) #ifdef _KERNEL - free(stream.refs, M_BPFJIT); + free(stream.refs, M_BPFJIT); #else - free(stream.refs); + free(stream.refs); +#endif + +#ifndef _KERNEL if (stream.ibuf != NULL && mprotect(stream.ibuf, *size, PROT_READ | PROT_EXEC) != 0) { munmap(stream.ibuf, *size); Modified: head/sys/amd64/amd64/bpf_jit_machdep.h ============================================================================== --- head/sys/amd64/amd64/bpf_jit_machdep.h Fri Nov 20 23:14:08 2009 (r199618) +++ head/sys/amd64/amd64/bpf_jit_machdep.h Sat Nov 21 00:19:09 2009 (r199619) @@ -85,7 +85,15 @@ #define DL 2 #define BL 3 -/* A stream of native binary code.*/ +/* Optimization flags */ +#define BPF_JIT_FLAG_RET 0x01 +#define BPF_JIT_FLAG_JMP 0x02 +#define BPF_JIT_FLAG_MEM 0x04 + +#define BPF_JIT_FLAG_ALL \ + (BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM) + +/* A stream of native binary code */ typedef struct bpf_bin_stream { /* Current native instruction pointer. */ int cur_ip; @@ -117,7 +125,7 @@ typedef struct bpf_bin_stream { typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n); /* - * native Instruction Macros + * Native instruction macros */ /* movl i32,r32 */ @@ -220,9 +228,14 @@ typedef void (*emit_func)(bpf_bin_stream emitm(&stream, (5 << 4) | (0 << 3) | (r64 & 0x7), 1); \ } while (0) -/* leave/ret */ -#define LEAVE_RET() do { \ - emitm(&stream, 0xc3c9, 2); \ +/* leaveq */ +#define LEAVE() do { \ + emitm(&stream, 0xc9, 1); \ +} while (0) + +/* retq */ +#define RET() do { \ + emitm(&stream, 0xc3, 1); \ } while (0) /* addl sr32,dr32 */ Modified: head/sys/i386/i386/bpf_jit_machdep.c ============================================================================== --- head/sys/i386/i386/bpf_jit_machdep.c Fri Nov 20 23:14:08 2009 (r199618) +++ head/sys/i386/i386/bpf_jit_machdep.c Sat Nov 21 00:19:09 2009 (r199619) @@ -57,18 +57,19 @@ __FBSDID("$FreeBSD$"); bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, size_t *); /* - * emit routine to update the jump table + * Emit routine to update the jump table. */ static void emit_length(bpf_bin_stream *stream, __unused u_int value, u_int len) { - (stream->refs)[stream->bpf_pc] += len; + if (stream->refs != NULL) + (stream->refs)[stream->bpf_pc] += len; stream->cur_ip += len; } /* - * emit routine to output the actual binary code + * Emit routine to output the actual binary code. */ static void emit_code(bpf_bin_stream *stream, u_int value, u_int len) @@ -95,37 +96,79 @@ emit_code(bpf_bin_stream *stream, u_int } /* - * Function that does the real stuff + * Scan the filter program and find possible optimization. + */ +static int +bpf_jit_optimize(struct bpf_insn *prog, u_int nins) +{ + const struct bpf_insn *p; + int flags; + u_int i; + + /* Do we return immediately? */ + if (BPF_CLASS(prog[0].code) == BPF_RET) + return (BPF_JIT_FLAG_RET); + + for (flags = 0, i = 0; i < nins; i++) { + p = &prog[i]; + + /* Do we need reference table? */ + if ((flags & BPF_JIT_FLAG_JMP) == 0 && + BPF_CLASS(p->code) == BPF_JMP) + flags |= BPF_JIT_FLAG_JMP; + + /* Do we need scratch memory? */ + if ((flags & BPF_JIT_FLAG_MEM) == 0 && + (p->code == BPF_ST || p->code == BPF_STX || + p->code == (BPF_LD|BPF_MEM) || + p->code == (BPF_LDX|BPF_MEM))) + flags |= BPF_JIT_FLAG_MEM; + + if (flags == BPF_JIT_FLAG_ALL) + break; + } + + return (flags); +} + +/* + * Function that does the real stuff. */ bpf_filter_func bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size) { bpf_bin_stream stream; struct bpf_insn *ins; + int flags, flag_ret, flag_jmp, flag_mem; u_int i, pass; + flags = bpf_jit_optimize(prog, nins); + flag_ret = (flags & BPF_JIT_FLAG_RET) != 0; + flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0; + flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0; + /* * NOTE: Do not modify the name of this variable, as it's used by * the macros to emit code. */ emit_func emitm; + memset(&stream, 0, sizeof(stream)); + /* Allocate the reference table for the jumps. */ + if (flag_jmp) { #ifdef _KERNEL - stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT, - M_NOWAIT | M_ZERO); + stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT, + M_NOWAIT | M_ZERO); #else - stream.refs = malloc((nins + 1) * sizeof(u_int)); + stream.refs = malloc((nins + 1) * sizeof(u_int)); #endif - if (stream.refs == NULL) - return (NULL); + if (stream.refs == NULL) + return (NULL); #ifndef _KERNEL - memset(stream.refs, 0, (nins + 1) * sizeof(u_int)); + memset(stream.refs, 0, (nins + 1) * sizeof(u_int)); #endif - - stream.cur_ip = 0; - stream.bpf_pc = 0; - stream.ibuf = NULL; + } /* * The first pass will emit the lengths of the instructions @@ -137,14 +180,19 @@ bpf_jit_compile(struct bpf_insn *prog, u ins = prog; /* Create the procedure header. */ - PUSH(EBP); - MOVrd(ESP, EBP); - SUBib(BPF_MEMWORDS * sizeof(uint32_t), ESP); - PUSH(EDI); - PUSH(ESI); - PUSH(EBX); - MOVodd(8, EBP, EBX); - MOVodd(16, EBP, EDI); + if (!flag_ret) { + PUSH(EBP); + MOVrd(ESP, EBP); + } + if (flag_mem) + SUBib(BPF_MEMWORDS * sizeof(uint32_t), ESP); + if (!flag_ret) { + PUSH(EDI); + PUSH(ESI); + PUSH(EBX); + MOVodd(8, EBP, EBX); + MOVodd(16, EBP, EDI); + } for (i = 0; i < nins; i++) { stream.bpf_pc++; @@ -159,17 +207,23 @@ bpf_jit_compile(struct bpf_insn *prog, u case BPF_RET|BPF_K: MOVid(ins->k, EAX); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + if (!flag_ret) { + POP(EBX); + POP(ESI); + POP(EDI); + LEAVE(); + } + RET(); break; case BPF_RET|BPF_A: - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + if (!flag_ret) { + POP(EBX); + POP(ESI); + POP(EDI); + LEAVE(); + } + RET(); break; case BPF_LD|BPF_W|BPF_ABS: @@ -184,7 +238,8 @@ bpf_jit_compile(struct bpf_insn *prog, u POP(EBX); POP(ESI); POP(EDI); - LEAVE_RET(); + LEAVE(); + RET(); MOVobd(EBX, ESI, EAX); BSWAP(EAX); break; @@ -201,7 +256,8 @@ bpf_jit_compile(struct bpf_insn *prog, u POP(EBX); POP(ESI); POP(EDI); - LEAVE_RET(); + LEAVE(); + RET(); MOVobw(EBX, ESI, AX); SWAP_AX(); break; @@ -214,7 +270,8 @@ bpf_jit_compile(struct bpf_insn *prog, u POP(EBX); POP(ESI); POP(EDI); - LEAVE_RET(); + LEAVE(); + RET(); MOVobb(EBX, ESI, AL); break; @@ -243,7 +300,8 @@ bpf_jit_compile(struct bpf_insn *prog, u POP(EBX); POP(ESI); POP(EDI); - LEAVE_RET(); + LEAVE(); + RET(); MOVobd(EBX, ESI, EAX); BSWAP(EAX); break; @@ -265,7 +323,8 @@ bpf_jit_compile(struct bpf_insn *prog, u POP(EBX); POP(ESI); POP(EDI); - LEAVE_RET(); + LEAVE(); + RET(); MOVobw(EBX, ESI, AX); SWAP_AX(); break; @@ -282,7 +341,8 @@ bpf_jit_compile(struct bpf_insn *prog, u POP(EBX); POP(ESI); POP(EDI); - LEAVE_RET(); + LEAVE(); + RET(); ADDrd(EDX, ESI); MOVobb(EBX, ESI, AL); break; @@ -295,7 +355,8 @@ bpf_jit_compile(struct bpf_insn *prog, u POP(EBX); POP(ESI); POP(EDI); - LEAVE_RET(); + LEAVE(); + RET(); ZEROrd(EDX); MOVobb(EBX, ESI, DL); ANDib(0x0f, DL); @@ -425,7 +486,8 @@ bpf_jit_compile(struct bpf_insn *prog, u POP(EBX); POP(ESI); POP(EDI); - LEAVE_RET(); + LEAVE(); + RET(); MOVrd(EDX, ECX); ZEROrd(EDX); DIVrd(ECX); @@ -525,8 +587,9 @@ bpf_jit_compile(struct bpf_insn *prog, u * Modify the reference table to contain the offsets and * not the lengths of the instructions. */ - for (i = 1; i < nins + 1; i++) - stream.refs[i] += stream.refs[i - 1]; + if (flag_jmp) + for (i = 1; i < nins + 1; i++) + stream.refs[i] += stream.refs[i - 1]; /* Reset the counters. */ stream.cur_ip = 0; @@ -540,10 +603,14 @@ bpf_jit_compile(struct bpf_insn *prog, u * The reference table is needed only during compilation, * now we can free it. */ + if (flag_jmp) #ifdef _KERNEL - free(stream.refs, M_BPFJIT); + free(stream.refs, M_BPFJIT); #else - free(stream.refs); + free(stream.refs); +#endif + +#ifndef _KERNEL if (stream.ibuf != NULL && mprotect(stream.ibuf, *size, PROT_READ | PROT_EXEC) != 0) { munmap(stream.ibuf, *size); Modified: head/sys/i386/i386/bpf_jit_machdep.h ============================================================================== --- head/sys/i386/i386/bpf_jit_machdep.h Fri Nov 20 23:14:08 2009 (r199618) +++ head/sys/i386/i386/bpf_jit_machdep.h Sat Nov 21 00:19:09 2009 (r199619) @@ -60,7 +60,15 @@ #define DL 2 #define BL 3 -/* A stream of native binary code.*/ +/* Optimization flags */ +#define BPF_JIT_FLAG_RET 0x01 +#define BPF_JIT_FLAG_JMP 0x02 +#define BPF_JIT_FLAG_MEM 0x04 + +#define BPF_JIT_FLAG_ALL \ + (BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM) + +/* A stream of native binary code */ typedef struct bpf_bin_stream { /* Current native instruction pointer. */ int cur_ip; @@ -92,7 +100,7 @@ typedef struct bpf_bin_stream { typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n); /* - * native Instruction Macros + * Native instruction macros */ /* movl i32,r32 */ @@ -165,9 +173,14 @@ typedef void (*emit_func)(bpf_bin_stream emitm(&stream, (5 << 4) | (1 << 3) | (r32 & 0x7), 1); \ } while (0) -/* leave/ret */ -#define LEAVE_RET() do { \ - emitm(&stream, 0xc3c9, 2); \ +/* leave */ +#define LEAVE() do { \ + emitm(&stream, 0xc9, 1); \ +} while (0) + +/* ret */ +#define RET() do { \ + emitm(&stream, 0xc3, 1); \ } while (0) /* addl sr32,dr32 */