From owner-p4-projects Sun Apr 28 11:26: 5 2002 Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id B266437B419; Sun, 28 Apr 2002 11:25:16 -0700 (PDT) Delivered-To: perforce@freebsd.org Received: from freefall.freebsd.org (freefall.FreeBSD.org [216.136.204.21]) by hub.freebsd.org (Postfix) with ESMTP id 80B5637B404 for ; Sun, 28 Apr 2002 11:25:15 -0700 (PDT) Received: (from perforce@localhost) by freefall.freebsd.org (8.11.6/8.11.6) id g3SIPFg16454 for perforce@freebsd.org; Sun, 28 Apr 2002 11:25:15 -0700 (PDT) (envelope-from tmm@freebsd.org) Date: Sun, 28 Apr 2002 11:25:15 -0700 (PDT) Message-Id: <200204281825.g3SIPFg16454@freefall.freebsd.org> X-Authentication-Warning: freefall.freebsd.org: perforce set sender to tmm@freebsd.org using -f From: Thomas Moestl Subject: PERFORCE change 10407 for review To: Perforce Change Reviews Sender: owner-p4-projects@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG http://people.freebsd.org/~peter/p4db/chv.cgi?CH=10407 Change 10407 by tmm@tmm_forge on 2002/04/28 11:24:19 Make use of the new INSFPdq_RN macro where apporpriate; this is required for correctly handling the "high" fp registers (>= %f32). Fix a number of bugs related to the handling of the high registers which were caused by using __fpu_[gs]etreg() where __fpu_[gs]etreg64() should be used (the former can only access the low, single-precision, registers). Affected files ... ... //depot/projects/sparc64/lib/libc/sparc64/fpu/fpu.c#11 edit ... //depot/projects/sparc64/lib/libc/sparc64/fpu/fpu_explode.c#6 edit ... //depot/projects/sparc64/lib/libc/sparc64/sys/__sparc_utrap_emul.c#2 edit Differences ... ==== //depot/projects/sparc64/lib/libc/sparc64/fpu/fpu.c#11 (text+ko) ==== @@ -186,24 +186,62 @@ } #endif +static int opmask[] = {0, 0, 1, 3}; + +/* Decode 5 bit register field depending on the type. */ +#define RN_DECODE(tp, rn) \ + ((tp == FTYPE_DBL || tp == FTYPE_EXT ? INSFPdq_RN((rn)) : (rn)) & \ + ~opmask[tp]) + +/* Operand size in 32-bit registers. */ +#define OPSZ(tp) ((tp) == FTYPE_LNG ? 2 : (1 << (tp))) + +/* + * Helper for forming the below case statements. Build only the op3 and opf + * field of the instruction, these are the only ones that need to match. + */ +#define FOP(op3, opf) \ + ((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT) + +/* + * Implement a move operation for all supported operand types. The additional + * nand and xor parameters will be applied to the upper 32 bit word of the + * source operand. This allows to implement fabs and fneg (for fp operands + * only!) using this functions, too, by passing (1 << 31) for one of the + * parameters, and 0 for the other. + */ static void -__fpu_mov(struct fpemu *fe, int type, int rd, int rs1, int rs2) +__fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand, + u_int32_t xor) { + u_int64_t tmp64; + u_int32_t *p32; int i; - i = 1 << type; - __fpu_setreg(rd++, rs1); - while (--i) - __fpu_setreg(rd++, __fpu_getreg(++rs2)); + if (type == FTYPE_INT || type == FTYPE_SNG) + __fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor); + else { + /* + * Need to use the double versions to be able to access + * the upper 32 fp registers. + */ + for (i = 0; i < OPSZ(type); i += 2, rd += 2, rs2 += 2) { + tmp64 = __fpu_getreg64(rs2); + if (i == 0) + tmp64 = (tmp64 & ~((u_int64_t)nand << 32)) ^ + ((u_int64_t)xor << 32); + __fpu_setreg64(rd, tmp64); + } + } } static __inline void -__fpu_ccmov(struct fpemu *fe, int type, int rd, int rs1, int rs2, +__fpu_ccmov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t insn, int fcc) { if (IF_F4_COND(insn) == fcc) - __fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); + __fpu_mov(fe, type, rd, rs2, 0, 0); } static int @@ -230,16 +268,7 @@ return (0); } -static int opmask[] = {0, 0, 1, 3}; - /* - * Helper for forming the below case statements. Build only the op3 and opf - * field of the instruction, these are the only that need to match. - */ -#define FOP(op3, opf) \ - ((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT) - -/* * Execute an FPU instruction (one that runs entirely in the FPU; not * FBfcc or STF, for instance). On return, fe->fe_fs->fs_fsr will be * modified to reflect the setting the hardware would have left. @@ -254,6 +283,7 @@ int opf, rs1, rs2, rd, type, mask, cx, cond; u_long reg, fsr; u_int space[4]; + int i; /* * `Decode' and execute instruction. Start with no exceptions. @@ -263,13 +293,12 @@ opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) | IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2)); type = IF_F3_OPF(insn) & 3; - mask = opmask[type]; - rs1 = IF_F3_RS1(insn) & ~mask; - rs2 = IF_F3_RS2(insn) & ~mask; - rd = IF_F3_RD(insn) & ~mask; + rs1 = RN_DECODE(type, IF_F3_RS1(insn)); + rs2 = RN_DECODE(type, IF_F3_RS2(insn)); + rd = RN_DECODE(type, IF_F3_RD(insn)); cond = 0; #ifdef notdef - if ((rs1 | rs2 | rd) & mask) + if ((rs1 | rs2 | rd) & opmask[type]) return (SIGILL); #endif fsr = fe->fe_fsr; @@ -277,58 +306,54 @@ fe->fe_cx = 0; switch (opf) { case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))): - __fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, - FSR_GET_FCC0(fsr)); + __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC0(fsr)); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))): - __fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, - FSR_GET_FCC1(fsr)); + __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC1(fsr)); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))): - __fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, - FSR_GET_FCC2(fsr)); + __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC2(fsr)); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))): - __fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, - FSR_GET_FCC3(fsr)); + __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC3(fsr)); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)): - __fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, + __fpu_ccmov(fe, type, rd, rs2, insn, (tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)): - __fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, + __fpu_ccmov(fe, type, rd, rs2, insn, (tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT)); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)): reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); if (reg == 0) - __fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); + __fpu_mov(fe, type, rd, rs2, 0, 0); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)): reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); if (reg <= 0) - __fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); + __fpu_mov(fe, type, rd, rs2, 0, 0); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)): reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); if (reg < 0) - __fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); + __fpu_mov(fe, type, rd, rs2, 0, 0); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)): reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); if (reg != 0) - __fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); + __fpu_mov(fe, type, rd, rs2, 0, 0); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)): reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); if (reg > 0) - __fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); + __fpu_mov(fe, type, rd, rs2, 0, 0); return (0); case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)): reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); if (reg >= 0) - __fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); + __fpu_mov(fe, type, rd, rs2, 0, 0); return (0); case FOP(INS2_FPop2, INSFP2_FCMP): __fpu_explode(fe, &fe->fe_f1, type, rs1); @@ -341,13 +366,13 @@ __fpu_compare(fe, 1, IF_F3_CC(insn)); return (__fpu_cmpck(fe)); case FOP(INS2_FPop1, INSFP1_FMOV): /* these should all be pretty obvious */ - __fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); + __fpu_mov(fe, type, rd, rs2, 0, 0); return (0); case FOP(INS2_FPop1, INSFP1_FNEG): - __fpu_mov(fe, type, rd, __fpu_getreg(rs2) ^ (1 << 31), rs2); + __fpu_mov(fe, type, rd, rs2, 0, (1 << 31)); return (0); case FOP(INS2_FPop1, INSFP1_FABS): - __fpu_mov(fe, type, rd, __fpu_getreg(rs2) & ~(1 << 31), rs2); + __fpu_mov(fe, type, rd, rs2, (1 << 31), 0); return (0); case FOP(INS2_FPop1, INSFP1_FSQRT): __fpu_explode(fe, &fe->fe_f1, type, rs2); @@ -384,8 +409,7 @@ * Recalculate rd (the old type applied for the source regs * only, the target one has a different size). */ - mask = opmask[type]; - rd = IF_F3_RD(insn) & ~mask; + rd = RN_DECODE(type, IF_F3_RD(insn)); fp = __fpu_mul(fe); break; case FOP(INS2_FPop1, INSFP1_FxTOs): @@ -395,8 +419,7 @@ __fpu_explode(fe, fp = &fe->fe_f1, type, rs2); /* sneaky; depends on instruction encoding */ type = (IF_F3_OPF(insn) >> 2) & 3; - mask = opmask[type]; - rd = IF_F3_RD(insn) & ~mask; + rd = RN_DECODE(type, IF_F3_RD(insn)); break; case FOP(INS2_FPop1, INSFP1_FTOx): __fpu_explode(fe, fp = &fe->fe_f1, type, rs2); @@ -411,8 +434,7 @@ __fpu_explode(fe, fp = &fe->fe_f1, type, rs2); /* sneaky; depends on instruction encoding */ type = (IF_F3_OPF(insn) >> 2) & 3; - mask = opmask[type]; - rd = IF_F3_RD(insn) & ~mask; + rd = RN_DECODE(type, IF_F3_RD(insn)); break; default: return (SIGILL); @@ -438,12 +460,12 @@ fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT); } fe->fe_fsr = fsr; - __fpu_setreg(rd, space[0]); - if (type >= FTYPE_DBL || type == FTYPE_LNG) { - __fpu_setreg(rd + 1, space[1]); - if (type > FTYPE_DBL) { - __fpu_setreg(rd + 2, space[2]); - __fpu_setreg(rd + 3, space[3]); + if (type == FTYPE_INT || type == FTYPE_SNG) + __fpu_setreg(rd, space[0]); + else { + for (i = 0; i < OPSZ(type); i += 2) { + __fpu_setreg64(rd + i, ((u_int64_t)space[i] << 32) | + space[i + 1]); } } return (0); /* success */ ==== //depot/projects/sparc64/lib/libc/sparc64/fpu/fpu_explode.c#6 (text+ko) ==== @@ -248,16 +248,21 @@ struct fpn *fp; int type, reg; { - u_int s; - u_int64_t l; + u_int32_t s, *sp; + u_int64_t l[2]; - l = __fpu_getreg64(reg & ~1); - s = __fpu_getreg(reg); - fp->fp_sign = s >> 31; + if (type == FTYPE_LNG || type == FTYPE_DBL || type == FTYPE_EXT) { + l[0] = __fpu_getreg64(reg & ~1); + sp = (u_int32_t *)l; + fp->fp_sign = sp[0] >> 31; + } else { + s = __fpu_getreg(reg); + fp->fp_sign = s >> 31; + } fp->fp_sticky = 0; switch (type) { case FTYPE_LNG: - s = __fpu_xtof(fp, l); + s = __fpu_xtof(fp, l[0]); break; case FTYPE_INT: @@ -269,13 +274,12 @@ break; case FTYPE_DBL: - s = __fpu_dtof(fp, s, __fpu_getreg(reg + 1)); + s = __fpu_dtof(fp, sp[0], sp[1]); break; case FTYPE_EXT: - s = __fpu_qtof(fp, s, __fpu_getreg(reg + 1), - __fpu_getreg(reg + 2), - __fpu_getreg(reg + 3)); + l[1] = __fpu_getreg64((reg & ~1) + 2); + s = __fpu_qtof(fp, sp[0], sp[1], sp[2], sp[3]); break; default: ==== //depot/projects/sparc64/lib/libc/sparc64/sys/__sparc_utrap_emul.c#2 (text+ko) ==== @@ -70,15 +70,13 @@ case IOP_LDST: switch (IF_F3_OP3(insn)) { case INS3_LDQF: - rd = IF_F3_RD(insn); - rd = (rd & ~3) | ((rd & 1) << 5); + rd = INSFPdq_RN(IF_F3_RD(insn)); addr = (u_long *)__emul_f3_memop_addr(uf, insn); __fpu_setreg64(rd, addr[0]); __fpu_setreg64(rd + 2, addr[1]); break; case INS3_STQF: - rd = IF_F3_RD(insn); - rd = (rd & ~3) | ((rd & 1) << 5); + rd = INSFPdq_RN(IF_F3_RD(insn)); addr = (u_long *)__emul_f3_memop_addr(uf, insn); addr[0] = __fpu_getreg64(rd); addr[1] = __fpu_getreg64(rd + 2); To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe p4-projects" in the body of the message