From owner-svn-src-all@FreeBSD.ORG Mon Nov 23 22:23:20 2009 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id AACDA106568D; Mon, 23 Nov 2009 22:23:20 +0000 (UTC) (envelope-from jkim@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 986768FC1D; Mon, 23 Nov 2009 22:23:20 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id nANMNKRT077635; Mon, 23 Nov 2009 22:23:20 GMT (envelope-from jkim@svn.freebsd.org) Received: (from jkim@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id nANMNKlR077633; Mon, 23 Nov 2009 22:23:20 GMT (envelope-from jkim@svn.freebsd.org) Message-Id: <200911232223.nANMNKlR077633@svn.freebsd.org> From: Jung-uk Kim Date: Mon, 23 Nov 2009 22:23:20 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r199721 - in head/sys: amd64/amd64 i386/i386 X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 23 Nov 2009 22:23:20 -0000 Author: jkim Date: Mon Nov 23 22:23:19 2009 New Revision: 199721 URL: http://svn.freebsd.org/changeset/base/199721 Log: - Add more aggressive BPF JIT optimization. This is in more favor of i386 while the previous commit was more amd64-centric. - Use calloc(3) instead of malloc(3)/memset(3) in user land[1]. Submitted by: ed[1] Modified: head/sys/amd64/amd64/bpf_jit_machdep.c head/sys/amd64/amd64/bpf_jit_machdep.h head/sys/i386/i386/bpf_jit_machdep.c head/sys/i386/i386/bpf_jit_machdep.h Modified: head/sys/amd64/amd64/bpf_jit_machdep.c ============================================================================== --- head/sys/amd64/amd64/bpf_jit_machdep.c Mon Nov 23 21:17:38 2009 (r199720) +++ head/sys/amd64/amd64/bpf_jit_machdep.c Mon Nov 23 22:23:19 2009 (r199721) @@ -101,29 +101,46 @@ emit_code(bpf_bin_stream *stream, u_int static int bpf_jit_optimize(struct bpf_insn *prog, u_int nins) { - const struct bpf_insn *p; int flags; u_int i; /* Do we return immediately? */ if (BPF_CLASS(prog[0].code) == BPF_RET) - return (BPF_JIT_FLAG_RET); + return (BPF_JIT_FRET); for (flags = 0, i = 0; i < nins; i++) { - p = &prog[i]; - - /* Do we need reference table? */ - if ((flags & BPF_JIT_FLAG_JMP) == 0 && - BPF_CLASS(p->code) == BPF_JMP) - flags |= BPF_JIT_FLAG_JMP; - - /* Do we need scratch memory? */ - if ((flags & BPF_JIT_FLAG_MEM) == 0 && - (p->code == BPF_ST || p->code == BPF_STX || - p->code == (BPF_LD|BPF_MEM) || - p->code == (BPF_LDX|BPF_MEM))) - flags |= BPF_JIT_FLAG_MEM; - + switch (prog[i].code) { + case BPF_LD|BPF_W|BPF_ABS: + case BPF_LD|BPF_H|BPF_ABS: + case BPF_LD|BPF_B|BPF_ABS: + case BPF_LD|BPF_W|BPF_IND: + case BPF_LD|BPF_H|BPF_IND: + case BPF_LD|BPF_B|BPF_IND: + case BPF_LDX|BPF_MSH|BPF_B: + flags |= BPF_JIT_FPKT; + break; + case BPF_LD|BPF_MEM: + case BPF_LDX|BPF_MEM: + case BPF_ST: + case BPF_STX: + flags |= BPF_JIT_FMEM; + break; + case BPF_LD|BPF_W|BPF_LEN: + case BPF_LDX|BPF_W|BPF_LEN: + flags |= BPF_JIT_FLEN; + break; + case BPF_JMP|BPF_JA: + case BPF_JMP|BPF_JGT|BPF_K: + case BPF_JMP|BPF_JGE|BPF_K: + case BPF_JMP|BPF_JEQ|BPF_K: + case BPF_JMP|BPF_JSET|BPF_K: + case BPF_JMP|BPF_JGT|BPF_X: + case BPF_JMP|BPF_JGE|BPF_X: + case BPF_JMP|BPF_JEQ|BPF_X: + case BPF_JMP|BPF_JSET|BPF_X: + flags |= BPF_JIT_FJMP; + break; + } if (flags == BPF_JIT_FLAG_ALL) break; } @@ -139,35 +156,37 @@ bpf_jit_compile(struct bpf_insn *prog, u { bpf_bin_stream stream; struct bpf_insn *ins; - int flags, flag_ret, flag_jmp, flag_mem; + int flags, fret, fpkt, fmem, fjmp, flen; u_int i, pass; - flags = bpf_jit_optimize(prog, nins); - flag_ret = (flags & BPF_JIT_FLAG_RET) != 0; - flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0; - flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0; - /* * NOTE: Do not modify the name of this variable, as it's used by * the macros to emit code. */ emit_func emitm; + flags = bpf_jit_optimize(prog, nins); + fret = (flags & BPF_JIT_FRET) != 0; + fpkt = (flags & BPF_JIT_FPKT) != 0; + fmem = (flags & BPF_JIT_FMEM) != 0; + fjmp = (flags & BPF_JIT_FJMP) != 0; + flen = (flags & BPF_JIT_FLEN) != 0; + + if (fret) + nins = 1; + memset(&stream, 0, sizeof(stream)); /* Allocate the reference table for the jumps. */ - if (flag_jmp) { + if (fjmp) { #ifdef _KERNEL stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT, M_NOWAIT | M_ZERO); #else - stream.refs = malloc((nins + 1) * sizeof(u_int)); + stream.refs = calloc(nins + 1, sizeof(u_int)); #endif if (stream.refs == NULL) return (NULL); -#ifndef _KERNEL - memset(stream.refs, 0, (nins + 1) * sizeof(u_int)); -#endif } /* @@ -180,14 +199,15 @@ bpf_jit_compile(struct bpf_insn *prog, u ins = prog; /* Create the procedure header. */ - if (flag_mem) { + if (fmem) { PUSH(RBP); MOVrq(RSP, RBP); SUBib(BPF_MEMWORDS * sizeof(uint32_t), RSP); } - if (!flag_ret) { - MOVrq2(RDI, R8); + if (flen) MOVrd2(ESI, R9D); + if (fpkt) { + MOVrq2(RDI, R8); MOVrd(EDX, EDI); } @@ -204,13 +224,13 @@ bpf_jit_compile(struct bpf_insn *prog, u case BPF_RET|BPF_K: MOVid(ins->k, EAX); - if (flag_mem) + if (fmem) LEAVE(); RET(); break; case BPF_RET|BPF_A: - if (flag_mem) + if (fmem) LEAVE(); RET(); break; @@ -222,7 +242,7 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(ESI, ECX); CMPid(sizeof(int32_t), ECX); - if (flag_mem) { + if (fmem) { JAEb(4); ZEROrd(EAX); LEAVE(); @@ -244,7 +264,7 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(ESI, ECX); CMPid(sizeof(int16_t), ECX); - if (flag_mem) { + if (fmem) { JAEb(2); LEAVE(); } else @@ -259,7 +279,7 @@ bpf_jit_compile(struct bpf_insn *prog, u ZEROrd(EAX); MOVid(ins->k, ESI); CMPrd(EDI, ESI); - if (flag_mem) { + if (fmem) { JBb(2); LEAVE(); } else @@ -289,7 +309,7 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(ESI, ECX); CMPid(sizeof(int32_t), ECX); - if (flag_mem) { + if (fmem) { JAEb(4); ZEROrd(EAX); LEAVE(); @@ -316,7 +336,7 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(ESI, ECX); CMPid(sizeof(int16_t), ECX); - if (flag_mem) { + if (fmem) { JAEb(2); LEAVE(); } else @@ -335,7 +355,7 @@ bpf_jit_compile(struct bpf_insn *prog, u MOVrd(EDI, ECX); SUBrd(EDX, ECX); CMPrd(ESI, ECX); - if (flag_mem) { + if (fmem) { JAb(2); LEAVE(); } else @@ -349,7 +369,7 @@ bpf_jit_compile(struct bpf_insn *prog, u case BPF_LDX|BPF_MSH|BPF_B: MOVid(ins->k, ESI); CMPrd(EDI, ESI); - if (flag_mem) { + if (fmem) { JBb(4); ZEROrd(EAX); LEAVE(); @@ -475,7 +495,7 @@ bpf_jit_compile(struct bpf_insn *prog, u case BPF_ALU|BPF_DIV|BPF_X: TESTrd(EDX, EDX); - if (flag_mem) { + if (fmem) { JNEb(4); ZEROrd(EAX); LEAVE(); @@ -583,7 +603,7 @@ bpf_jit_compile(struct bpf_insn *prog, u * Modify the reference table to contain the offsets and * not the lengths of the instructions. */ - if (flag_jmp) + if (fjmp) for (i = 1; i < nins + 1; i++) stream.refs[i] += stream.refs[i - 1]; @@ -599,7 +619,7 @@ bpf_jit_compile(struct bpf_insn *prog, u * The reference table is needed only during compilation, * now we can free it. */ - if (flag_jmp) + if (fjmp) #ifdef _KERNEL free(stream.refs, M_BPFJIT); #else Modified: head/sys/amd64/amd64/bpf_jit_machdep.h ============================================================================== --- head/sys/amd64/amd64/bpf_jit_machdep.h Mon Nov 23 21:17:38 2009 (r199720) +++ head/sys/amd64/amd64/bpf_jit_machdep.h Mon Nov 23 22:23:19 2009 (r199721) @@ -86,12 +86,14 @@ #define BL 3 /* Optimization flags */ -#define BPF_JIT_FLAG_RET 0x01 -#define BPF_JIT_FLAG_JMP 0x02 -#define BPF_JIT_FLAG_MEM 0x04 +#define BPF_JIT_FRET 0x01 +#define BPF_JIT_FPKT 0x02 +#define BPF_JIT_FMEM 0x04 +#define BPF_JIT_FJMP 0x08 +#define BPF_JIT_FLEN 0x10 #define BPF_JIT_FLAG_ALL \ - (BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM) + (BPF_JIT_FPKT | BPF_JIT_FMEM | BPF_JIT_FJMP | BPF_JIT_FLEN) /* A stream of native binary code */ typedef struct bpf_bin_stream { Modified: head/sys/i386/i386/bpf_jit_machdep.c ============================================================================== --- head/sys/i386/i386/bpf_jit_machdep.c Mon Nov 23 21:17:38 2009 (r199720) +++ head/sys/i386/i386/bpf_jit_machdep.c Mon Nov 23 22:23:19 2009 (r199721) @@ -101,29 +101,45 @@ emit_code(bpf_bin_stream *stream, u_int static int bpf_jit_optimize(struct bpf_insn *prog, u_int nins) { - const struct bpf_insn *p; int flags; u_int i; /* Do we return immediately? */ if (BPF_CLASS(prog[0].code) == BPF_RET) - return (BPF_JIT_FLAG_RET); + return (BPF_JIT_FRET); for (flags = 0, i = 0; i < nins; i++) { - p = &prog[i]; - - /* Do we need reference table? */ - if ((flags & BPF_JIT_FLAG_JMP) == 0 && - BPF_CLASS(p->code) == BPF_JMP) - flags |= BPF_JIT_FLAG_JMP; - - /* Do we need scratch memory? */ - if ((flags & BPF_JIT_FLAG_MEM) == 0 && - (p->code == BPF_ST || p->code == BPF_STX || - p->code == (BPF_LD|BPF_MEM) || - p->code == (BPF_LDX|BPF_MEM))) - flags |= BPF_JIT_FLAG_MEM; - + switch (prog[i].code) { + case BPF_LD|BPF_W|BPF_ABS: + case BPF_LD|BPF_H|BPF_ABS: + case BPF_LD|BPF_B|BPF_ABS: + case BPF_LD|BPF_W|BPF_IND: + case BPF_LD|BPF_H|BPF_IND: + case BPF_LD|BPF_B|BPF_IND: + case BPF_LDX|BPF_MSH|BPF_B: + flags |= BPF_JIT_FPKT; + break; + case BPF_LD|BPF_MEM: + case BPF_LDX|BPF_MEM: + case BPF_ST: + case BPF_STX: + flags |= BPF_JIT_FMEM; + break; + case BPF_JMP|BPF_JA: + case BPF_JMP|BPF_JGT|BPF_K: + case BPF_JMP|BPF_JGE|BPF_K: + case BPF_JMP|BPF_JEQ|BPF_K: + case BPF_JMP|BPF_JSET|BPF_K: + case BPF_JMP|BPF_JGT|BPF_X: + case BPF_JMP|BPF_JGE|BPF_X: + case BPF_JMP|BPF_JEQ|BPF_X: + case BPF_JMP|BPF_JSET|BPF_X: + flags |= BPF_JIT_FJMP; + break; + case BPF_ALU|BPF_DIV|BPF_K: + flags |= BPF_JIT_FADK; + break; + } if (flags == BPF_JIT_FLAG_ALL) break; } @@ -139,35 +155,39 @@ bpf_jit_compile(struct bpf_insn *prog, u { bpf_bin_stream stream; struct bpf_insn *ins; - int flags, flag_ret, flag_jmp, flag_mem; + int flags, fret, fpkt, fmem, fjmp, fadk; + int save_esp; u_int i, pass; - flags = bpf_jit_optimize(prog, nins); - flag_ret = (flags & BPF_JIT_FLAG_RET) != 0; - flag_jmp = (flags & BPF_JIT_FLAG_JMP) != 0; - flag_mem = (flags & BPF_JIT_FLAG_MEM) != 0; - /* * NOTE: Do not modify the name of this variable, as it's used by * the macros to emit code. */ emit_func emitm; + flags = bpf_jit_optimize(prog, nins); + fret = (flags & BPF_JIT_FRET) != 0; + fpkt = (flags & BPF_JIT_FPKT) != 0; + fmem = (flags & BPF_JIT_FMEM) != 0; + fjmp = (flags & BPF_JIT_FJMP) != 0; + fadk = (flags & BPF_JIT_FADK) != 0; + save_esp = (fpkt || fmem || fadk); /* Stack is used. */ + + if (fret) + nins = 1; + memset(&stream, 0, sizeof(stream)); /* Allocate the reference table for the jumps. */ - if (flag_jmp) { + if (fjmp) { #ifdef _KERNEL stream.refs = malloc((nins + 1) * sizeof(u_int), M_BPFJIT, M_NOWAIT | M_ZERO); #else - stream.refs = malloc((nins + 1) * sizeof(u_int)); + stream.refs = calloc(nins + 1, sizeof(u_int)); #endif if (stream.refs == NULL) return (NULL); -#ifndef _KERNEL - memset(stream.refs, 0, (nins + 1) * sizeof(u_int)); -#endif } /* @@ -180,15 +200,16 @@ bpf_jit_compile(struct bpf_insn *prog, u ins = prog; /* Create the procedure header. */ - if (!flag_ret) { + if (save_esp) { PUSH(EBP); MOVrd(ESP, EBP); } - if (flag_mem) + if (fmem) SUBib(BPF_MEMWORDS * sizeof(uint32_t), ESP); - if (!flag_ret) { - PUSH(EDI); + if (save_esp) PUSH(ESI); + if (fpkt) { + PUSH(EDI); PUSH(EBX); MOVodd(8, EBP, EBX); MOVodd(16, EBP, EDI); @@ -207,20 +228,24 @@ bpf_jit_compile(struct bpf_insn *prog, u case BPF_RET|BPF_K: MOVid(ins->k, EAX); - if (!flag_ret) { - POP(EBX); + if (save_esp) { + if (fpkt) { + POP(EBX); + POP(EDI); + } POP(ESI); - POP(EDI); LEAVE(); } RET(); break; case BPF_RET|BPF_A: - if (!flag_ret) { - POP(EBX); + if (save_esp) { + if (fpkt) { + POP(EBX); + POP(EDI); + } POP(ESI); - POP(EDI); LEAVE(); } RET(); @@ -236,8 +261,8 @@ bpf_jit_compile(struct bpf_insn *prog, u JAEb(7); ZEROrd(EAX); POP(EBX); - POP(ESI); POP(EDI); + POP(ESI); LEAVE(); RET(); MOVobd(EBX, ESI, EAX); @@ -254,8 +279,8 @@ bpf_jit_compile(struct bpf_insn *prog, u CMPid(sizeof(int16_t), ECX); JAEb(5); POP(EBX); - POP(ESI); POP(EDI); + POP(ESI); LEAVE(); RET(); MOVobw(EBX, ESI, AX); @@ -268,19 +293,29 @@ bpf_jit_compile(struct bpf_insn *prog, u CMPrd(EDI, ESI); JBb(5); POP(EBX); - POP(ESI); POP(EDI); + POP(ESI); LEAVE(); RET(); MOVobb(EBX, ESI, AL); break; case BPF_LD|BPF_W|BPF_LEN: - MOVodd(12, EBP, EAX); + if (save_esp) + MOVodd(12, EBP, EAX); + else { + MOVrd(ESP, ECX); + MOVodd(12, ECX, EAX); + } break; case BPF_LDX|BPF_W|BPF_LEN: - MOVodd(12, EBP, EDX); + if (save_esp) + MOVodd(12, EBP, EDX); + else { + MOVrd(ESP, ECX); + MOVodd(12, ECX, EDX); + } break; case BPF_LD|BPF_W|BPF_IND: @@ -298,8 +333,8 @@ bpf_jit_compile(struct bpf_insn *prog, u JAEb(7); ZEROrd(EAX); POP(EBX); - POP(ESI); POP(EDI); + POP(ESI); LEAVE(); RET(); MOVobd(EBX, ESI, EAX); @@ -321,8 +356,8 @@ bpf_jit_compile(struct bpf_insn *prog, u CMPid(sizeof(int16_t), ECX); JAEb(5); POP(EBX); - POP(ESI); POP(EDI); + POP(ESI); LEAVE(); RET(); MOVobw(EBX, ESI, AX); @@ -339,8 +374,8 @@ bpf_jit_compile(struct bpf_insn *prog, u CMPrd(ESI, ECX); JAb(5); POP(EBX); - POP(ESI); POP(EDI); + POP(ESI); LEAVE(); RET(); ADDrd(EDX, ESI); @@ -353,8 +388,8 @@ bpf_jit_compile(struct bpf_insn *prog, u JBb(7); ZEROrd(EAX); POP(EBX); - POP(ESI); POP(EDI); + POP(ESI); LEAVE(); RET(); ZEROrd(EDX); @@ -481,12 +516,22 @@ bpf_jit_compile(struct bpf_insn *prog, u case BPF_ALU|BPF_DIV|BPF_X: TESTrd(EDX, EDX); - JNEb(7); - ZEROrd(EAX); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE(); + if (save_esp) { + if (fpkt) { + JNEb(7); + ZEROrd(EAX); + POP(EBX); + POP(EDI); + } else { + JNEb(5); + ZEROrd(EAX); + } + POP(ESI); + LEAVE(); + } else { + JNEb(3); + ZEROrd(EAX); + } RET(); MOVrd(EDX, ECX); ZEROrd(EDX); @@ -587,7 +632,7 @@ bpf_jit_compile(struct bpf_insn *prog, u * Modify the reference table to contain the offsets and * not the lengths of the instructions. */ - if (flag_jmp) + if (fjmp) for (i = 1; i < nins + 1; i++) stream.refs[i] += stream.refs[i - 1]; @@ -603,7 +648,7 @@ bpf_jit_compile(struct bpf_insn *prog, u * The reference table is needed only during compilation, * now we can free it. */ - if (flag_jmp) + if (fjmp) #ifdef _KERNEL free(stream.refs, M_BPFJIT); #else Modified: head/sys/i386/i386/bpf_jit_machdep.h ============================================================================== --- head/sys/i386/i386/bpf_jit_machdep.h Mon Nov 23 21:17:38 2009 (r199720) +++ head/sys/i386/i386/bpf_jit_machdep.h Mon Nov 23 22:23:19 2009 (r199721) @@ -61,12 +61,14 @@ #define BL 3 /* Optimization flags */ -#define BPF_JIT_FLAG_RET 0x01 -#define BPF_JIT_FLAG_JMP 0x02 -#define BPF_JIT_FLAG_MEM 0x04 +#define BPF_JIT_FRET 0x01 +#define BPF_JIT_FPKT 0x02 +#define BPF_JIT_FMEM 0x04 +#define BPF_JIT_FJMP 0x08 +#define BPF_JIT_FADK 0x10 #define BPF_JIT_FLAG_ALL \ - (BPF_JIT_FLAG_JMP | BPF_JIT_FLAG_MEM) + (BPF_JIT_FPKT | BPF_JIT_FMEM | BPF_JIT_FJMP | BPF_JIT_FADK) /* A stream of native binary code */ typedef struct bpf_bin_stream {