Date: Mon, 5 Feb 2018 16:00:30 +0000 (UTC) From: Mark Johnston <markj@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r328886 - in user/markj/netdump: . bin/sh contrib/blacklist/libexec contrib/llvm/include/llvm contrib/llvm/include/llvm/CodeGen contrib/llvm/lib/CodeGen contrib/llvm/lib/CodeGen/Selecti... Message-ID: <201802051600.w15G0UmG015434@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: markj Date: Mon Feb 5 16:00:30 2018 New Revision: 328886 URL: https://svnweb.freebsd.org/changeset/base/328886 Log: MFH at r328885. Added: user/markj/netdump/contrib/llvm/lib/CodeGen/IndirectBrExpandPass.cpp - copied unchanged from r328885, head/contrib/llvm/lib/CodeGen/IndirectBrExpandPass.cpp user/markj/netdump/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp - copied unchanged from r328885, head/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp user/markj/netdump/sys/powerpc/conf/GENERIC64-NODEBUG - copied unchanged from r328885, head/sys/powerpc/conf/GENERIC64-NODEBUG Deleted: user/markj/netdump/stand/forth/pcibios.4th user/markj/netdump/stand/forth/pnp.4th Modified: user/markj/netdump/ObsoleteFiles.inc user/markj/netdump/bin/sh/jobs.c user/markj/netdump/contrib/blacklist/libexec/blacklistd-helper user/markj/netdump/contrib/llvm/include/llvm/CodeGen/Passes.h user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetLowering.h user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h user/markj/netdump/contrib/llvm/include/llvm/InitializePasses.h user/markj/netdump/contrib/llvm/lib/CodeGen/CodeGen.cpp user/markj/netdump/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp user/markj/netdump/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp user/markj/netdump/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp user/markj/netdump/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp user/markj/netdump/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp user/markj/netdump/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp user/markj/netdump/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h user/markj/netdump/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp user/markj/netdump/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp user/markj/netdump/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp user/markj/netdump/contrib/llvm/lib/Target/X86/X86.h user/markj/netdump/contrib/llvm/lib/Target/X86/X86.td user/markj/netdump/contrib/llvm/lib/Target/X86/X86AsmPrinter.h user/markj/netdump/contrib/llvm/lib/Target/X86/X86FastISel.cpp user/markj/netdump/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelLowering.h user/markj/netdump/contrib/llvm/lib/Target/X86/X86InstrCompiler.td user/markj/netdump/contrib/llvm/lib/Target/X86/X86InstrControl.td user/markj/netdump/contrib/llvm/lib/Target/X86/X86InstrInfo.td user/markj/netdump/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp user/markj/netdump/contrib/llvm/lib/Target/X86/X86Subtarget.cpp user/markj/netdump/contrib/llvm/lib/Target/X86/X86Subtarget.h user/markj/netdump/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp user/markj/netdump/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp user/markj/netdump/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp user/markj/netdump/contrib/llvm/tools/clang/include/clang/Driver/Options.td user/markj/netdump/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp user/markj/netdump/contrib/llvm/tools/clang/lib/Basic/Targets/X86.h user/markj/netdump/contrib/llvm/tools/lld/ELF/Arch/X86.cpp user/markj/netdump/contrib/llvm/tools/lld/ELF/Arch/X86_64.cpp user/markj/netdump/contrib/llvm/tools/lld/ELF/Config.h user/markj/netdump/contrib/llvm/tools/lld/ELF/Driver.cpp user/markj/netdump/contrib/llvm/tools/opt/opt.cpp user/markj/netdump/lib/clang/freebsd_cc_version.h user/markj/netdump/lib/clang/include/clang/Basic/Version.inc user/markj/netdump/lib/clang/include/lld/Common/Version.inc user/markj/netdump/lib/clang/include/llvm/Support/VCSRevision.h user/markj/netdump/lib/clang/libllvm/Makefile user/markj/netdump/lib/libc/sparc64/sys/__sparc_utrap_setup.c user/markj/netdump/lib/libc/stdlib/strtold.c user/markj/netdump/lib/libufs/sblock.c user/markj/netdump/libexec/getty/extern.h user/markj/netdump/libexec/getty/init.c user/markj/netdump/libexec/getty/main.c user/markj/netdump/libexec/getty/subr.c user/markj/netdump/libexec/rtld-elf/aarch64/reloc.c user/markj/netdump/libexec/rtld-elf/amd64/reloc.c user/markj/netdump/libexec/rtld-elf/arm/reloc.c user/markj/netdump/libexec/rtld-elf/i386/reloc.c user/markj/netdump/libexec/rtld-elf/mips/reloc.c user/markj/netdump/libexec/rtld-elf/powerpc/reloc.c user/markj/netdump/libexec/rtld-elf/powerpc64/reloc.c user/markj/netdump/libexec/rtld-elf/riscv/reloc.c user/markj/netdump/libexec/rtld-elf/rtld.c user/markj/netdump/libexec/rtld-elf/rtld.h user/markj/netdump/libexec/rtld-elf/sparc64/reloc.c user/markj/netdump/sbin/dhclient/dhclient.c user/markj/netdump/sbin/etherswitchcfg/etherswitchcfg.c user/markj/netdump/sbin/geom/class/cache/geom_cache.c user/markj/netdump/sbin/geom/class/concat/geom_concat.c user/markj/netdump/sbin/geom/class/journal/geom_journal.c user/markj/netdump/sbin/geom/class/label/geom_label.c user/markj/netdump/sbin/geom/class/mirror/geom_mirror.c user/markj/netdump/sbin/geom/class/raid3/geom_raid3.c user/markj/netdump/sbin/geom/class/shsec/geom_shsec.c user/markj/netdump/sbin/geom/class/stripe/geom_stripe.c user/markj/netdump/sbin/geom/misc/subr.c user/markj/netdump/sbin/newfs/mkfs.c user/markj/netdump/share/examples/bhyve/vmrun.sh user/markj/netdump/share/man/man3/pthread_join.3 user/markj/netdump/share/mk/bsd.sys.mk user/markj/netdump/stand/arm/uboot/Makefile user/markj/netdump/stand/common/load_elf.c user/markj/netdump/stand/common/pnp.c user/markj/netdump/stand/efi/fdt/efi_fdt.c user/markj/netdump/stand/efi/libefi/Makefile user/markj/netdump/stand/efi/libefi/env.c user/markj/netdump/stand/efi/loader/Makefile user/markj/netdump/stand/ficl.mk user/markj/netdump/stand/forth/Makefile user/markj/netdump/stand/forth/loader.4th user/markj/netdump/stand/i386/libi386/biospci.c user/markj/netdump/stand/i386/loader/Makefile user/markj/netdump/stand/libsa/stand.h user/markj/netdump/stand/loader.mk user/markj/netdump/stand/mips/beri/loader/Makefile user/markj/netdump/stand/mips/uboot/Makefile user/markj/netdump/stand/ofw/common/main.c user/markj/netdump/stand/ofw/libofw/elf_freebsd.c user/markj/netdump/stand/ofw/libofw/libofw.h user/markj/netdump/stand/ofw/libofw/ofw_copy.c user/markj/netdump/stand/ofw/libofw/ofw_memory.c user/markj/netdump/stand/ofw/libofw/ppc64_elf_freebsd.c user/markj/netdump/stand/powerpc/kboot/Makefile user/markj/netdump/stand/powerpc/ofw/Makefile user/markj/netdump/stand/powerpc/ofw/ldscript.powerpc user/markj/netdump/stand/powerpc/uboot/Makefile user/markj/netdump/stand/sparc64/loader/Makefile user/markj/netdump/stand/userboot/userboot/Makefile user/markj/netdump/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c user/markj/netdump/sys/compat/freebsd32/freebsd32_ipc.h user/markj/netdump/sys/conf/files user/markj/netdump/sys/dev/atkbdc/psm.c user/markj/netdump/sys/dev/etherswitch/arswitch/arswitch.c user/markj/netdump/sys/dev/etherswitch/arswitch/arswitch_7240.c user/markj/netdump/sys/dev/etherswitch/arswitch/arswitch_8316.c user/markj/netdump/sys/dev/etherswitch/arswitch/arswitch_8327.c user/markj/netdump/sys/dev/etherswitch/arswitch/arswitch_9340.c user/markj/netdump/sys/dev/etherswitch/arswitch/arswitchreg.h user/markj/netdump/sys/dev/etherswitch/arswitch/arswitchvar.h user/markj/netdump/sys/dev/mpr/mpr.c user/markj/netdump/sys/dev/usb/serial/uslcom.c user/markj/netdump/sys/dev/usb/usbdevs user/markj/netdump/sys/fs/ext2fs/ext2_extents.c user/markj/netdump/sys/fs/ext2fs/ext2_vfsops.c user/markj/netdump/sys/fs/ext2fs/ext2fs.h user/markj/netdump/sys/geom/label/g_label_ufs.c user/markj/netdump/sys/geom/virstor/g_virstor.c user/markj/netdump/sys/kern/sysv_msg.c user/markj/netdump/sys/kern/sysv_sem.c user/markj/netdump/sys/kern/sysv_shm.c user/markj/netdump/sys/mips/conf/DB120 user/markj/netdump/sys/mips/conf/DB120.hints user/markj/netdump/sys/mips/conf/std.AR934X user/markj/netdump/sys/modules/linux/Makefile user/markj/netdump/sys/modules/linux64/Makefile user/markj/netdump/sys/netinet6/frag6.c user/markj/netdump/sys/netinet6/ip6_input.c user/markj/netdump/sys/netinet6/ip6_var.h user/markj/netdump/sys/netinet6/raw_ip6.c user/markj/netdump/sys/powerpc/conf/MPC85XX user/markj/netdump/sys/powerpc/conf/MPC85XXSPE user/markj/netdump/sys/powerpc/mpc85xx/mpc85xx_cache.c user/markj/netdump/sys/vm/vm_object.c user/markj/netdump/tools/boot/install-boot.sh user/markj/netdump/tools/boot/rootgen.sh user/markj/netdump/tools/tools/nanobsd/legacy.sh user/markj/netdump/usr.bin/clang/lld/ld.lld.1 user/markj/netdump/usr.sbin/bsdinstall/scripts/auto user/markj/netdump/usr.sbin/makefs/tests/makefs_cd9660_tests.sh user/markj/netdump/usr.sbin/newsyslog/newsyslog.8 Directory Properties: user/markj/netdump/ (props changed) user/markj/netdump/contrib/blacklist/ (props changed) user/markj/netdump/contrib/compiler-rt/ (props changed) user/markj/netdump/contrib/libc++/ (props changed) user/markj/netdump/contrib/llvm/ (props changed) user/markj/netdump/contrib/llvm/tools/clang/ (props changed) user/markj/netdump/contrib/llvm/tools/lld/ (props changed) user/markj/netdump/contrib/llvm/tools/lldb/ (props changed) user/markj/netdump/sys/cddl/contrib/opensolaris/ (props changed) Modified: user/markj/netdump/ObsoleteFiles.inc ============================================================================== --- user/markj/netdump/ObsoleteFiles.inc Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/ObsoleteFiles.inc Mon Feb 5 16:00:30 2018 (r328886) @@ -38,6 +38,10 @@ # xargs -n1 | sort | uniq -d; # done +# 20180201: Obsolete forth files +OLD_FILES+=boot/efi.4th +OLD_FILES+=boot/pcibios.4th + # 20180114: new clang import which bumps version from 5.0.1 to 6.0.0. OLD_FILES+=usr/lib/clang/5.0.1/include/sanitizer/allocator_interface.h OLD_FILES+=usr/lib/clang/5.0.1/include/sanitizer/asan_interface.h Modified: user/markj/netdump/bin/sh/jobs.c ============================================================================== --- user/markj/netdump/bin/sh/jobs.c Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/bin/sh/jobs.c Mon Feb 5 16:00:30 2018 (r328886) @@ -362,7 +362,7 @@ showjob(struct job *jp, int mode) const char *statestr, *coredump; struct procstat *ps; struct job *j; - int col, curr, i, jobno, prev, procno; + int col, curr, i, jobno, prev, procno, status; char c; procno = (mode == SHOWJOBS_PGIDS) ? 1 : jp->nprocs; @@ -376,11 +376,12 @@ showjob(struct job *jp, int mode) } #endif coredump = ""; - ps = jp->ps + jp->nprocs - 1; + status = jp->ps[jp->nprocs - 1].status; if (jp->state == 0) { statestr = "Running"; #if JOBS } else if (jp->state == JOBSTOPPED) { + ps = jp->ps + jp->nprocs - 1; while (!WIFSTOPPED(ps->status) && ps > jp->ps) ps--; if (WIFSTOPPED(ps->status)) @@ -391,20 +392,20 @@ showjob(struct job *jp, int mode) if (statestr == NULL) statestr = "Suspended"; #endif - } else if (WIFEXITED(ps->status)) { - if (WEXITSTATUS(ps->status) == 0) + } else if (WIFEXITED(status)) { + if (WEXITSTATUS(status) == 0) statestr = "Done"; else { fmtstr(statebuf, sizeof(statebuf), "Done(%d)", - WEXITSTATUS(ps->status)); + WEXITSTATUS(status)); statestr = statebuf; } } else { - i = WTERMSIG(ps->status); + i = WTERMSIG(status); statestr = strsignal(i); if (statestr == NULL) statestr = "Unknown signal"; - if (WCOREDUMP(ps->status)) + if (WCOREDUMP(status)) coredump = " (core dumped)"; } Modified: user/markj/netdump/contrib/blacklist/libexec/blacklistd-helper ============================================================================== --- user/markj/netdump/contrib/blacklist/libexec/blacklistd-helper Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/blacklist/libexec/blacklistd-helper Mon Feb 5 16:00:30 2018 (r328886) @@ -80,8 +80,8 @@ add) echo "block in quick $proto from <port$6> to any $port" | \ /sbin/pfctl -a "$2/$6" -f - # insert $ip/$mask into per-protocol/port anchored table - /sbin/pfctl -a "$2/$6" -t "port$6" -T add "$addr/$mask" && \ - echo OK + /sbin/pfctl -qa "$2/$6" -t "port$6" -T add "$addr/$mask" && \ + /sbin/pfctl -q -k $addr && echo OK ;; esac ;; @@ -101,7 +101,7 @@ rem) /sbin/npfctl rule "$2" rem-id "$7" ;; pf) - /sbin/pfctl -a "$2/$6" -t "port$6" -T delete "$addr/$mask" && \ + /sbin/pfctl -qa "$2/$6" -t "port$6" -T delete "$addr/$mask" && \ echo OK ;; esac @@ -118,7 +118,13 @@ flush) /sbin/npfctl rule "$2" flush ;; pf) - /sbin/pfctl -a "$2/$6" -t "port$6" -T flush && echo OK + # dynamically determine which anchors exist + anchors=$(/sbin/pfctl -a $2 -s Anchors) + for anchor in $anchors; do + /sbin/pfctl -a $anchor -t "port${anchor##*/}" -T flush + /sbin/pfctl -a $anchor -F rules + done + echo OK ;; esac ;; Modified: user/markj/netdump/contrib/llvm/include/llvm/CodeGen/Passes.h ============================================================================== --- user/markj/netdump/contrib/llvm/include/llvm/CodeGen/Passes.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/include/llvm/CodeGen/Passes.h Mon Feb 5 16:00:30 2018 (r328886) @@ -417,6 +417,9 @@ namespace llvm { // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); + // This pass expands indirectbr instructions. + FunctionPass *createIndirectBrExpandPass(); + } // End llvm namespace #endif Modified: user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h ============================================================================== --- user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h Mon Feb 5 16:00:30 2018 (r328886) @@ -950,6 +950,10 @@ class TargetInstrInfo : public MCInstrInfo { (public) /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } + /// Return true if the given SDNode can be copied during scheduling + /// even if it has glue. + virtual bool canCopyGluedNodeDuringSchedule(SDNode *N) const { return false; } + protected: /// Target-dependent implementation for foldMemoryOperand. /// Target-independent code in foldMemoryOperand will Modified: user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetLowering.h ============================================================================== --- user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetLowering.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetLowering.h Mon Feb 5 16:00:30 2018 (r328886) @@ -800,7 +800,7 @@ class TargetLoweringBase { (public) } /// Return true if lowering to a jump table is allowed. - bool areJTsAllowed(const Function *Fn) const { + virtual bool areJTsAllowed(const Function *Fn) const { if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") return false; Modified: user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h ============================================================================== --- user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h Mon Feb 5 16:00:30 2018 (r328886) @@ -416,6 +416,13 @@ class TargetPassConfig : public ImmutablePass { (prote /// immediately before machine code is emitted. virtual void addPreEmitPass() { } + /// Targets may add passes immediately before machine code is emitted in this + /// callback. This is called even later than `addPreEmitPass`. + // FIXME: Rename `addPreEmitPass` to something more sensible given its actual + // position and remove the `2` suffix here as this callback is what + // `addPreEmitPass` *should* be but in reality isn't. + virtual void addPreEmitPass2() {} + /// Utilities for targets to add passes to the pass manager. /// Modified: user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h ============================================================================== --- user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h Mon Feb 5 16:00:30 2018 (r328886) @@ -174,6 +174,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo { ( /// \brief True if the subtarget should run the atomic expansion pass. virtual bool enableAtomicExpand() const; + /// True if the subtarget should run the indirectbr expansion pass. + virtual bool enableIndirectBrExpand() const; + /// \brief Override generic scheduling policy within a region. /// /// This is a convenient way for targets that don't provide any custom Modified: user/markj/netdump/contrib/llvm/include/llvm/InitializePasses.h ============================================================================== --- user/markj/netdump/contrib/llvm/include/llvm/InitializePasses.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/include/llvm/InitializePasses.h Mon Feb 5 16:00:30 2018 (r328886) @@ -161,6 +161,7 @@ void initializeIVUsersWrapperPassPass(PassRegistry&); void initializeIfConverterPass(PassRegistry&); void initializeImplicitNullChecksPass(PassRegistry&); void initializeIndVarSimplifyLegacyPassPass(PassRegistry&); +void initializeIndirectBrExpandPassPass(PassRegistry&); void initializeInductiveRangeCheckEliminationPass(PassRegistry&); void initializeInferAddressSpacesPass(PassRegistry&); void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&); Modified: user/markj/netdump/contrib/llvm/lib/CodeGen/CodeGen.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/CodeGen/CodeGen.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/CodeGen/CodeGen.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -38,6 +38,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeGCModuleInfoPass(Registry); initializeIfConverterPass(Registry); initializeImplicitNullChecksPass(Registry); + initializeIndirectBrExpandPassPass(Registry); initializeInterleavedAccessPass(Registry); initializeLiveDebugValuesPass(Registry); initializeLiveDebugVariablesPass(Registry); Copied: user/markj/netdump/contrib/llvm/lib/CodeGen/IndirectBrExpandPass.cpp (from r328885, head/contrib/llvm/lib/CodeGen/IndirectBrExpandPass.cpp) ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/markj/netdump/contrib/llvm/lib/CodeGen/IndirectBrExpandPass.cpp Mon Feb 5 16:00:30 2018 (r328886, copy of r328885, head/contrib/llvm/lib/CodeGen/IndirectBrExpandPass.cpp) @@ -0,0 +1,221 @@ +//===- IndirectBrExpandPass.cpp - Expand indirectbr to switch -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Implements an expansion pass to turn `indirectbr` instructions in the IR +/// into `switch` instructions. This works by enumerating the basic blocks in +/// a dense range of integers, replacing each `blockaddr` constant with the +/// corresponding integer constant, and then building a switch that maps from +/// the integers to the actual blocks. All of the indirectbr instructions in the +/// function are redirected to this common switch. +/// +/// While this is generically useful if a target is unable to codegen +/// `indirectbr` natively, it is primarily useful when there is some desire to +/// get the builtin non-jump-table lowering of a switch even when the input +/// source contained an explicit indirect branch construct. +/// +/// Note that it doesn't make any sense to enable this pass unless a target also +/// disables jump-table lowering of switches. Doing that is likely to pessimize +/// the code. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "indirectbr-expand" + +namespace { + +class IndirectBrExpandPass : public FunctionPass { + const TargetLowering *TLI = nullptr; + +public: + static char ID; // Pass identification, replacement for typeid + + IndirectBrExpandPass() : FunctionPass(ID) { + initializeIndirectBrExpandPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; +}; + +} // end anonymous namespace + +char IndirectBrExpandPass::ID = 0; + +INITIALIZE_PASS(IndirectBrExpandPass, DEBUG_TYPE, + "Expand indirectbr instructions", false, false) + +FunctionPass *llvm::createIndirectBrExpandPass() { + return new IndirectBrExpandPass(); +} + +bool IndirectBrExpandPass::runOnFunction(Function &F) { + auto &DL = F.getParent()->getDataLayout(); + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) + return false; + + auto &TM = TPC->getTM<TargetMachine>(); + auto &STI = *TM.getSubtargetImpl(F); + if (!STI.enableIndirectBrExpand()) + return false; + TLI = STI.getTargetLowering(); + + SmallVector<IndirectBrInst *, 1> IndirectBrs; + + // Set of all potential successors for indirectbr instructions. + SmallPtrSet<BasicBlock *, 4> IndirectBrSuccs; + + // Build a list of indirectbrs that we want to rewrite. + for (BasicBlock &BB : F) + if (auto *IBr = dyn_cast<IndirectBrInst>(BB.getTerminator())) { + // Handle the degenerate case of no successors by replacing the indirectbr + // with unreachable as there is no successor available. + if (IBr->getNumSuccessors() == 0) { + (void)new UnreachableInst(F.getContext(), IBr); + IBr->eraseFromParent(); + continue; + } + + IndirectBrs.push_back(IBr); + for (BasicBlock *SuccBB : IBr->successors()) + IndirectBrSuccs.insert(SuccBB); + } + + if (IndirectBrs.empty()) + return false; + + // If we need to replace any indirectbrs we need to establish integer + // constants that will correspond to each of the basic blocks in the function + // whose address escapes. We do that here and rewrite all the blockaddress + // constants to just be those integer constants cast to a pointer type. + SmallVector<BasicBlock *, 4> BBs; + + for (BasicBlock &BB : F) { + // Skip blocks that aren't successors to an indirectbr we're going to + // rewrite. + if (!IndirectBrSuccs.count(&BB)) + continue; + + auto IsBlockAddressUse = [&](const Use &U) { + return isa<BlockAddress>(U.getUser()); + }; + auto BlockAddressUseIt = llvm::find_if(BB.uses(), IsBlockAddressUse); + if (BlockAddressUseIt == BB.use_end()) + continue; + + assert(std::find_if(std::next(BlockAddressUseIt), BB.use_end(), + IsBlockAddressUse) == BB.use_end() && + "There should only ever be a single blockaddress use because it is " + "a constant and should be uniqued."); + + auto *BA = cast<BlockAddress>(BlockAddressUseIt->getUser()); + + // Skip if the constant was formed but ended up not being used (due to DCE + // or whatever). + if (!BA->isConstantUsed()) + continue; + + // Compute the index we want to use for this basic block. We can't use zero + // because null can be compared with block addresses. + int BBIndex = BBs.size() + 1; + BBs.push_back(&BB); + + auto *ITy = cast<IntegerType>(DL.getIntPtrType(BA->getType())); + ConstantInt *BBIndexC = ConstantInt::get(ITy, BBIndex); + + // Now rewrite the blockaddress to an integer constant based on the index. + // FIXME: We could potentially preserve the uses as arguments to inline asm. + // This would allow some uses such as diagnostic information in crashes to + // have higher quality even when this transform is enabled, but would break + // users that round-trip blockaddresses through inline assembly and then + // back into an indirectbr. + BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(BBIndexC, BA->getType())); + } + + if (BBs.empty()) { + // There are no blocks whose address is taken, so any indirectbr instruction + // cannot get a valid input and we can replace all of them with unreachable. + for (auto *IBr : IndirectBrs) { + (void)new UnreachableInst(F.getContext(), IBr); + IBr->eraseFromParent(); + } + return true; + } + + BasicBlock *SwitchBB; + Value *SwitchValue; + + // Compute a common integer type across all the indirectbr instructions. + IntegerType *CommonITy = nullptr; + for (auto *IBr : IndirectBrs) { + auto *ITy = + cast<IntegerType>(DL.getIntPtrType(IBr->getAddress()->getType())); + if (!CommonITy || ITy->getBitWidth() > CommonITy->getBitWidth()) + CommonITy = ITy; + } + + auto GetSwitchValue = [DL, CommonITy](IndirectBrInst *IBr) { + return CastInst::CreatePointerCast( + IBr->getAddress(), CommonITy, + Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr); + }; + + if (IndirectBrs.size() == 1) { + // If we only have one indirectbr, we can just directly replace it within + // its block. + SwitchBB = IndirectBrs[0]->getParent(); + SwitchValue = GetSwitchValue(IndirectBrs[0]); + IndirectBrs[0]->eraseFromParent(); + } else { + // Otherwise we need to create a new block to hold the switch across BBs, + // jump to that block instead of each indirectbr, and phi together the + // values for the switch. + SwitchBB = BasicBlock::Create(F.getContext(), "switch_bb", &F); + auto *SwitchPN = PHINode::Create(CommonITy, IndirectBrs.size(), + "switch_value_phi", SwitchBB); + SwitchValue = SwitchPN; + + // Now replace the indirectbr instructions with direct branches to the + // switch block and fill out the PHI operands. + for (auto *IBr : IndirectBrs) { + SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent()); + BranchInst::Create(SwitchBB, IBr); + IBr->eraseFromParent(); + } + } + + // Now build the switch in the block. The block will have no terminator + // already. + auto *SI = SwitchInst::Create(SwitchValue, BBs[0], BBs.size(), SwitchBB); + + // Add a case for each block. + for (int i : llvm::seq<int>(1, BBs.size())) + SI->addCase(ConstantInt::get(CommonITy, i + 1), BBs[i]); + + return true; +} Modified: user/markj/netdump/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -1996,14 +1996,15 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Lib Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op; Entry.Ty = ArgTy; - Entry.IsSExt = isSigned; - Entry.IsZExt = !isSigned; + Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); + Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); - Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); // By default, the input chain to this libcall is the entry node of the // function. If the libcall is going to be emitted as a tail call then @@ -2022,13 +2023,14 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Lib InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); + bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, isSigned); CLI.setDebugLoc(SDLoc(Node)) .setChain(InChain) .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setTailCall(isTailCall) - .setSExtResult(isSigned) - .setZExtResult(!isSigned) + .setSExtResult(signExtend) + .setZExtResult(!signExtend) .setIsPostTypeLegalization(true); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); Modified: user/markj/netdump/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -1117,22 +1117,34 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit if (!N) return nullptr; - if (SU->getNode()->getGluedNode()) + DEBUG(dbgs() << "Considering duplicating the SU\n"); + DEBUG(SU->dump(this)); + + if (N->getGluedNode() && + !TII->canCopyGluedNodeDuringSchedule(N)) { + DEBUG(dbgs() + << "Giving up because it has incoming glue and the target does not " + "want to copy it\n"); return nullptr; + } SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { MVT VT = N->getSimpleValueType(i); - if (VT == MVT::Glue) + if (VT == MVT::Glue) { + DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); return nullptr; - else if (VT == MVT::Other) + } else if (VT == MVT::Other) TryUnfold = true; } for (const SDValue &Op : N->op_values()) { MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); - if (VT == MVT::Glue) + if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) { + DEBUG(dbgs() << "Giving up because it one of the operands is glue and " + "the target does not want to copy it\n"); return nullptr; + } } // If possible unfold instruction. Modified: user/markj/netdump/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -907,6 +907,9 @@ void TargetPassConfig::addMachinePasses() { if (EnableMachineOutliner) PM->add(createMachineOutlinerPass(EnableLinkOnceODROutlining)); + // Add passes that directly emit MI after all other MI passes. + addPreEmitPass2(); + AddingMachinePasses = false; } Modified: user/markj/netdump/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -38,6 +38,10 @@ bool TargetSubtargetInfo::enableAtomicExpand() const { return true; } +bool TargetSubtargetInfo::enableIndirectBrExpand() const { + return false; +} + bool TargetSubtargetInfo::enableMachineScheduler() const { return false; } Modified: user/markj/netdump/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -3756,36 +3756,45 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) co // FIXME: This isn't safe because the addressing mode doesn't work // correctly if vaddr is negative. // - // FIXME: Handle v_add_u32 and VOP3 form. Also don't rely on immediate - // being in src0. - // // FIXME: Should probably be done somewhere else, maybe SIFoldOperands. // // See if we can extract an immediate offset by recognizing one of these: // V_ADD_I32_e32 dst, imm, src1 // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1 // V_ADD will be removed by "Remove dead machine instructions". - if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) { - const MachineOperand *Src = - getNamedOperand(*Add, AMDGPU::OpName::src0); + if (Add && + (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 || + Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) { + static const unsigned SrcNames[2] = { + AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + }; - if (Src->isReg()) { - auto Mov = MRI.getUniqueVRegDef(Src->getReg()); - if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32) - Src = &Mov->getOperand(1); - } + // Find a literal offset in one of source operands. + for (int i = 0; i < 2; i++) { + const MachineOperand *Src = + getNamedOperand(*Add, SrcNames[i]); - if (Src) { - if (Src->isImm()) - Offset = Src->getImm(); - else if (Src->isCImm()) - Offset = Src->getCImm()->getZExtValue(); - } + if (Src->isReg()) { + auto Mov = MRI.getUniqueVRegDef(Src->getReg()); + if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32) + Src = &Mov->getOperand(1); + } - if (Offset && isLegalMUBUFImmOffset(Offset)) - VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1); - else + if (Src) { + if (Src->isImm()) + Offset = Src->getImm(); + else if (Src->isCImm()) + Offset = Src->getCImm()->getZExtValue(); + } + + if (Offset && isLegalMUBUFImmOffset(Offset)) { + VAddr = getNamedOperand(*Add, SrcNames[!i]); + break; + } + Offset = 0; + } } BuildMI(*MBB, Inst, Inst.getDebugLoc(), Modified: user/markj/netdump/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -141,3 +141,16 @@ void Thumb1InstrInfo::expandLoadStackGuard( else expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi); } + +bool Thumb1InstrInfo::canCopyGluedNodeDuringSchedule(SDNode *N) const { + // In Thumb1 the scheduler may need to schedule a cross-copy between GPRS and CPSR + // but this is not always possible there, so allow the Scheduler to clone tADCS and tSBCS + // even if they have glue. + // FIXME. Actually implement the cross-copy where it is possible (post v6) + // because these copies entail more spilling. + unsigned Opcode = N->getMachineOpcode(); + if (Opcode == ARM::tADCS || Opcode == ARM::tSBCS) + return true; + + return false; +} Modified: user/markj/netdump/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h Mon Feb 5 16:00:30 2018 (r328886) @@ -53,6 +53,7 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { (pub const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + bool canCopyGluedNodeDuringSchedule(SDNode *N) const override; private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; Modified: user/markj/netdump/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -3507,10 +3507,9 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID Cal bool MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { - if (Subtarget.hasMips3() && Subtarget.useSoftFloat()) { - if (Type == MVT::i32) + if ((ABI.IsN32() || ABI.IsN64()) && Type == MVT::i32) return true; - } + return IsSigned; } Modified: user/markj/netdump/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -136,6 +136,13 @@ IsGlobalInSmallSectionImpl(const GlobalObject *GO, return false; Type *Ty = GVA->getValueType(); + + // It is possible that the type of the global is unsized, i.e. a declaration + // of a extern struct. In this case don't presume it is in the small data + // section. This happens e.g. when building the FreeBSD kernel. + if (!Ty->isSized()) + return false; + return IsInSmallSection( GVA->getParent()->getDataLayout().getTypeAllocSize(Ty)); } Modified: user/markj/netdump/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -88,10 +88,11 @@ void SparcFrameLowering::emitPrologue(MachineFunction assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo &MFI = MF.getFrameInfo(); + const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>(); const SparcInstrInfo &TII = - *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo()); + *static_cast<const SparcInstrInfo *>(Subtarget.getInstrInfo()); const SparcRegisterInfo &RegInfo = - *static_cast<const SparcRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + *static_cast<const SparcRegisterInfo *>(Subtarget.getRegisterInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -141,7 +142,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction // Adds the SPARC subtarget-specific spill area to the stack // size. Also ensures target-required alignment. - NumBytes = MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes); + NumBytes = Subtarget.getAdjustedFrameSize(NumBytes); // Finally, ensure that the size is sufficiently aligned for the // data on the stack. @@ -176,9 +177,27 @@ void SparcFrameLowering::emitPrologue(MachineFunction .addCFIIndex(CFIIndex); if (NeedsStackRealignment) { - // andn %o6, MaxAlign-1, %o6 + int64_t Bias = Subtarget.getStackPointerBias(); + unsigned regUnbiased; + if (Bias) { + // This clobbers G1 which we always know is available here. + regUnbiased = SP::G1; + // add %o6, BIAS, %g1 + BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), regUnbiased) + .addReg(SP::O6).addImm(Bias); + } else + regUnbiased = SP::O6; + + // andn %regUnbiased, MaxAlign-1, %regUnbiased int MaxAlign = MFI.getMaxAlignment(); - BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), SP::O6).addReg(SP::O6).addImm(MaxAlign - 1); + BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), regUnbiased) + .addReg(regUnbiased).addImm(MaxAlign - 1); + + if (Bias) { + // add %g1, -BIAS, %o6 + BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6) + .addReg(regUnbiased).addImm(-Bias); + } } } Modified: user/markj/netdump/contrib/llvm/lib/Target/X86/X86.h ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/X86/X86.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/X86/X86.h Mon Feb 5 16:00:30 2018 (r328886) @@ -22,6 +22,7 @@ namespace llvm { class FunctionPass; class ImmutablePass; class InstructionSelector; +class ModulePass; class PassRegistry; class X86RegisterBankInfo; class X86Subtarget; @@ -101,6 +102,9 @@ void initializeFixupBWInstPassPass(PassRegistry &); /// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX /// encoding when possible in order to reduce code size. FunctionPass *createX86EvexToVexInsts(); + +/// This pass creates the thunks for the retpoline feature. +FunctionPass *createX86RetpolineThunksPass(); InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &, Modified: user/markj/netdump/contrib/llvm/lib/Target/X86/X86.td ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/X86/X86.td Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/X86/X86.td Mon Feb 5 16:00:30 2018 (r328886) @@ -329,6 +329,27 @@ def FeatureHasFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", "Indicates if gather is reasonably fast.">; +// Enable mitigation of some aspects of speculative execution related +// vulnerabilities by removing speculatable indirect branches. This disables +// jump-table formation, rewrites explicit `indirectbr` instructions into +// `switch` instructions, and uses a special construct called a "retpoline" to +// prevent speculation of the remaining indirect branches (indirect calls and +// tail calls). +def FeatureRetpoline + : SubtargetFeature<"retpoline", "UseRetpoline", "true", + "Remove speculation of indirect branches from the " + "generated code, either by avoiding them entirely or " + "lowering them with a speculation blocking construct.">; + +// Rely on external thunks for the emitted retpoline calls. This allows users +// to provide their own custom thunk definitions in highly specialized +// environments such as a kernel that does boot-time hot patching. +def FeatureRetpolineExternalThunk + : SubtargetFeature< + "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", + "Enable retpoline, but with an externally provided thunk.", + [FeatureRetpoline]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// Modified: user/markj/netdump/contrib/llvm/lib/Target/X86/X86AsmPrinter.h ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/X86/X86AsmPrinter.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/X86/X86AsmPrinter.h Mon Feb 5 16:00:30 2018 (r328886) @@ -32,6 +32,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public A FaultMaps FM; std::unique_ptr<MCCodeEmitter> CodeEmitter; bool EmitFPOData = false; + bool NeedsRetpoline = false; // This utility class tracks the length of a stackmap instruction's 'shadow'. // It is used by the X86AsmPrinter to ensure that the stackmap shadow Modified: user/markj/netdump/contrib/llvm/lib/Target/X86/X86FastISel.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/X86/X86FastISel.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/X86/X86FastISel.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -3172,6 +3172,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) return false; + // Functions using retpoline should use SDISel for calls. + if (Subtarget->useRetpoline()) + return false; + // Handle only C, fastcc, and webkit_js calling conventions for now. switch (CC) { default: return false; Modified: user/markj/netdump/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -741,6 +741,11 @@ void X86FrameLowering::emitStackProbeCall(MachineFunct bool InProlog) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; + // FIXME: Add retpoline support and remove this. + if (Is64Bit && IsLargeCodeModel && STI.useRetpoline()) + report_fatal_error("Emitting stack probe calls on 64-bit with the large " + "code model and retpoline not yet implemented."); + unsigned CallOp; if (Is64Bit) CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; @@ -2345,6 +2350,10 @@ void X86FrameLowering::adjustForSegmentedStacks( // This solution is not perfect, as it assumes that the .rodata section // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. + // FIXME: Add retpoline support and remove the error here.. + if (STI.useRetpoline()) + report_fatal_error("Emitting morestack calls on 64-bit with the large " + "code model and retpoline not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) .addReg(X86::RIP) .addImm(0) Modified: user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -629,11 +629,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() { SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && - // Only does this when target favors doesn't favor register indirect - // call. + // Only do this when the target can fold the load into the call or + // jmp. + !Subtarget->useRetpoline() && ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && - // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || !getTargetMachine().isPositionIndependent())))) { /// Also try moving call address load from outside callseq_start to just Modified: user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp Mon Feb 5 16:00:30 2018 (r328886) @@ -25767,6 +25767,15 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallV return isShuffleMaskLegal(Mask, VT); } +bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { + // If the subtarget is using retpolines, we need to not generate jump tables. + if (Subtarget.useRetpoline()) + return false; + + // Otherwise, fallback on the generic logic. + return TargetLowering::areJTsAllowed(Fn); +} + //===----------------------------------------------------------------------===// // X86 Scheduler Hooks //===----------------------------------------------------------------------===// @@ -27069,7 +27078,116 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI return BB; } +static unsigned getOpcodeForRetpoline(unsigned RPOpc) { + switch (RPOpc) { + case X86::RETPOLINE_CALL32: + return X86::CALLpcrel32; + case X86::RETPOLINE_CALL64: + return X86::CALL64pcrel32; + case X86::RETPOLINE_TCRETURN32: + return X86::TCRETURNdi; + case X86::RETPOLINE_TCRETURN64: + return X86::TCRETURNdi64; + } + llvm_unreachable("not retpoline opcode"); +} + +static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, + unsigned Reg) { + switch (Reg) { + case 0: + assert(!Subtarget.is64Bit() && "R11 should always be available on x64"); + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_push" + : "__llvm_retpoline_push"; + case X86::EAX: + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_eax" + : "__llvm_retpoline_eax"; + case X86::ECX: + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_ecx" + : "__llvm_retpoline_ecx"; + case X86::EDX: + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_edx" + : "__llvm_retpoline_edx"; + case X86::R11: + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_r11" + : "__llvm_retpoline_r11"; + } + llvm_unreachable("unexpected reg for retpoline"); +} + MachineBasicBlock * +X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, + MachineBasicBlock *BB) const { + // Copy the virtual register into the R11 physical register and + // call the retpoline thunk. + DebugLoc DL = MI.getDebugLoc(); + const X86InstrInfo *TII = Subtarget.getInstrInfo(); + unsigned CalleeVReg = MI.getOperand(0).getReg(); + unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); + + // Find an available scratch register to hold the callee. On 64-bit, we can + // just use R11, but we scan for uses anyway to ensure we don't generate + // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't + // already a register use operand to the call to hold the callee. If none + // are available, push the callee instead. This is less efficient, but is + // necessary for functions using 3 regparms. Such function calls are + // (currently) not eligible for tail call optimization, because there is no + // scratch register available to hold the address of the callee. + SmallVector<unsigned, 3> AvailableRegs; + if (Subtarget.is64Bit()) + AvailableRegs.push_back(X86::R11); + else + AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX}); + + // Zero out any registers that are already used. + for (const auto &MO : MI.operands()) { + if (MO.isReg() && MO.isUse()) + for (unsigned &Reg : AvailableRegs) + if (Reg == MO.getReg()) + Reg = 0; + } + + // Choose the first remaining non-zero available register. + unsigned AvailableReg = 0; + for (unsigned MaybeReg : AvailableRegs) { + if (MaybeReg) { + AvailableReg = MaybeReg; + break; + } + } + + const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); + + if (AvailableReg == 0) { + // No register available. Use PUSH. This must not be a tailcall, and this + // must not be x64. + if (Subtarget.is64Bit()) + report_fatal_error( + "Cannot make an indirect call on x86-64 using both retpoline and a " + "calling convention that preservers r11"); + if (Opc != X86::CALLpcrel32) + report_fatal_error("Cannot make an indirect tail call on x86 using " + "retpoline without a preserved register"); + BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg); + MI.getOperand(0).ChangeToES(Symbol); + MI.setDesc(TII->get(Opc)); + } else { + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) + .addReg(CalleeVReg); + MI.getOperand(0).ChangeToES(Symbol); + MI.setDesc(TII->get(Opc)); + MachineInstrBuilder(*BB->getParent(), &MI) + .addReg(AvailableReg, RegState::Implicit | RegState::Kill); + } + return BB; +} + +MachineBasicBlock * X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); @@ -27584,6 +27702,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(Machine case X86::TLS_base_addr32: case X86::TLS_base_addr64: return EmitLoweredTLSAddr(MI, BB); + case X86::RETPOLINE_CALL32: + case X86::RETPOLINE_CALL64: + case X86::RETPOLINE_TCRETURN32: + case X86::RETPOLINE_TCRETURN64: + return EmitLoweredRetpoline(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: Modified: user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelLowering.h ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelLowering.h Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/X86/X86ISelLowering.h Mon Feb 5 16:00:30 2018 (r328886) @@ -982,6 +982,9 @@ namespace llvm { bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, EVT VT) const override; + /// Returns true if lowering to a jump table is allowed. + bool areJTsAllowed(const Function *Fn) const override; + /// If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type /// in order to save space and / or reduce runtime. @@ -1293,6 +1296,9 @@ namespace llvm { MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; Modified: user/markj/netdump/contrib/llvm/lib/Target/X86/X86InstrCompiler.td ============================================================================== --- user/markj/netdump/contrib/llvm/lib/Target/X86/X86InstrCompiler.td Mon Feb 5 15:14:01 2018 (r328885) +++ user/markj/netdump/contrib/llvm/lib/Target/X86/X86InstrCompiler.td Mon Feb 5 16:00:30 2018 (r328886) @@ -1146,14 +1146,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode]>; + Requires<[Not64BitMode, NotUseRetpoline]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[Not64BitMode, IsNotPIC]>; + Requires<[Not64BitMode, IsNotPIC, NotUseRetpoline]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi tglobaladdr:$dst, imm:$off)>, @@ -1165,13 +1165,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201802051600.w15G0UmG015434>