Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 8 Jan 2021 17:39:05 GMT
From:      Dimitry Andric <dim@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 15b3e1e51cb5 - stable/11 - MFC llvm fixes for building ceph on powerpc
Message-ID:  <202101081739.108Hd5L9080528@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/11 has been updated by dim:

URL: https://cgit.FreeBSD.org/src/commit/?id=15b3e1e51cb59f69e3971e6c4ad010d27325a2d9

commit 15b3e1e51cb59f69e3971e6c4ad010d27325a2d9
Author:     Dimitry Andric <dim@FreeBSD.org>
AuthorDate: 2021-01-08 17:37:25 +0000
Commit:     Dimitry Andric <dim@FreeBSD.org>
CommitDate: 2021-01-08 17:38:04 +0000

    MFC llvm fixes for building ceph on powerpc
    
    Merge commit 8f5e3c74b from llvm git (by Teresa Johnson):
    
      [PowerPC] Fix compile time issue in recursive CTR analysis code
    
      Summary:
      Avoid re-examining operands on recursive walk looking for CTR.
      This was causing huge compile time after some earlier optimization
      created a large expression.
    
      The start of the expression (created by IndVarSimplify) looked like:
    
      %469 = lshr i64 trunc (i128 xor (i128 udiv (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zex
 t (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 ptrtoint (i8 @_ZN4absl13hash_internal13CityHashState5kSeedE to i64), i64 120) to i128), i128 8192506886679785011), i128 64), i128 mul (i128 zext (i64 add (i64 ptrtoint (i8 @_ZN4absl13hash_internal13CityHashState5kSeedE to i64), i64 120) to i128), i128 8192506886679785011)) to i64), i64 45) to i128)
 , i128 8192506886679785011), i128 64), i128 !
 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 ptrtoint (i8 @_ZN4absl13hash_internal13CityHashState5kSeedE to i64), i64 120) to i128), i128 8192506886679785011), i128 64), i128 mul (i128 zext (i64 add (i64 ptrtoint (i8 @_ZN4absl13hash_internal13CityHashState5kSeedE to i64), i64 120) to i128), i128 8192506886679785011)) to i64), i64 45) to i128), ...
    
      with the _ZN4absl13hash_internal13CityHashState5kSeedE referenced many times.
    
      Reviewers: hfinkel
    
      Subscribers: nemanjai, hiraditya, kbarton, jsji, shchenz, llvm-commits
    
      Tags: #llvm
    
      Differential Revision: https://reviews.llvm.org/D75790
    
    Merge commit 4f568fbd2 from llvm git (by Nemanja Ivanovic):
    
      [PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit
    
      If any PHI nodes in loop exit blocks have incoming values from the
      loop that are accesses of TLS variables with local dynamic or general
      dynamic TLS model, the address will be computed inside the loop. Since
      this includes a call to __tls_get_addr, this will in turn cause the
      CTR loops verifier to complain.
      Disable CTR loops in such cases.
    
      Fixes: https://bugs.llvm.org/show_bug.cgi?id=48527
    
    This should fix building ceph 12.2.12 on powerpc64, powerpc, powerpcspe
    and powerpc64le.
    
    Requested by:   pkubaj
    
    (cherry picked from commit 543478be758fdfbf050eca5b58b7c74ba51b9175)
---
 .../lib/Target/PowerPC/PPCTargetTransformInfo.cpp  | 72 ++++++++++++++--------
 .../lib/Target/PowerPC/PPCTargetTransformInfo.h    |  3 +-
 2 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index e05699cc95ec..db591fad39e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -213,8 +213,31 @@ unsigned PPCTTIImpl::getUserCost(const User *U,
   return BaseT::getUserCost(U, Operands);
 }
 
-bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
-                             TargetLibraryInfo *LibInfo) {
+// Determining the address of a TLS variable results in a function call in
+// certain TLS models.
+static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
+                           SmallPtrSetImpl<const Value *> &Visited) {
+  // No need to traverse again if we already checked this operand.
+  if (!Visited.insert(MemAddr).second)
+    return false;
+  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+  if (!GV) {
+    // Recurse to check for constants that refer to TLS global variables.
+    if (const auto *CV = dyn_cast<Constant>(MemAddr))
+      for (const auto &CO : CV->operands())
+        if (memAddrUsesCTR(CO, TM, Visited))
+          return true;
+    return false;
+  }
+
+  if (!GV->isThreadLocal())
+    return false;
+  TLSModel::Model Model = TM.getTLSModel(GV);
+  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
+}
+
+bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
+                             SmallPtrSetImpl<const Value *> &Visited) {
   const PPCTargetMachine &TM = ST->getTargetMachine();
 
   // Loop through the inline asm constraints and look for something that
@@ -231,28 +254,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
     return false;
   };
 
-  // Determining the address of a TLS variable results in a function call in
-  // certain TLS models.
-  std::function<bool(const Value*)> memAddrUsesCTR =
-    [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool {
-    const auto *GV = dyn_cast<GlobalValue>(MemAddr);
-    if (!GV) {
-      // Recurse to check for constants that refer to TLS global variables.
-      if (const auto *CV = dyn_cast<Constant>(MemAddr))
-        for (const auto &CO : CV->operands())
-          if (memAddrUsesCTR(CO))
-            return true;
-
-      return false;
-    }
-
-    if (!GV->isThreadLocal())
-      return false;
-    TLSModel::Model Model = TM.getTLSModel(GV);
-    return Model == TLSModel::GeneralDynamic ||
-      Model == TLSModel::LocalDynamic;
-  };
-
   auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
     if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
       return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
@@ -468,7 +469,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
     }
 
     for (Value *Operand : J->operands())
-      if (memAddrUsesCTR(Operand))
+      if (memAddrUsesCTR(Operand, TM, Visited))
         return true;
   }
 
@@ -498,9 +499,10 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
 
   // We don't want to spill/restore the counter register, and so we don't
   // want to use the counter register if the loop contains calls.
+  SmallPtrSet<const Value *, 4> Visited;
   for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
        I != IE; ++I)
-    if (mightUseCTR(*I, LibInfo))
+    if (mightUseCTR(*I, LibInfo, Visited))
       return false;
 
   SmallVector<BasicBlock*, 4> ExitingBlocks;
@@ -527,6 +529,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
     }
   }
 
+  // If an exit block has a PHI that accesses a TLS variable as one of the
+  // incoming values from the loop, we cannot produce a CTR loop because the
+  // address for that value will be computed in the loop.
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  for (auto &BB : ExitBlocks) {
+    for (auto &PHI : BB->phis()) {
+      for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
+           Idx++) {
+        const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
+        const Value *IncomingValue = PHI.getIncomingValue(Idx);
+        if (L->contains(IncomingBB) &&
+            memAddrUsesCTR(IncomingValue, TM, Visited))
+          return false;
+      }
+    }
+  }
+
   LLVMContext &C = L->getHeader()->getContext();
   HWLoopInfo.CountType = TM.isPPC64() ?
     Type::getInt64Ty(C) : Type::getInt32Ty(C);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 35388d14f606..a709282bb76c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -33,7 +33,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
 
   const PPCSubtarget *getST() const { return ST; }
   const PPCTargetLowering *getTLI() const { return TLI; }
-  bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo);
+  bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
+                   SmallPtrSetImpl<const Value *> &Visited);
 
 public:
   explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202101081739.108Hd5L9080528>