LLVM  6.0.0svn
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1 //===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements an idiom recognizer that transforms simple loops into a
11 // non-loop form. In cases that this kicks in, it can be a significant
12 // performance win.
13 //
14 // If compiling for code size we avoid idiom recognition if the resulting
15 // code could be larger than the code for the original loop. One way this could
16 // happen is if the loop is not removable after idiom recognition due to the
17 // presence of non-idiom instructions. The initial implementation of the
18 // heuristics applies to idioms in multi-block loops.
19 //
20 //===----------------------------------------------------------------------===//
21 //
22 // TODO List:
23 //
24 // Future loop memory idioms to recognize:
25 // memcmp, memmove, strlen, etc.
26 // Future floating point idioms to recognize in -ffast-math mode:
27 // fpowi
28 // Future integer operation idioms to recognize:
29 // ctpop, ctlz, cttz
30 //
31 // Beware that isel's default lowering for ctpop is highly inefficient for
32 // i64 and larger types when i64 is legal and the value has few bits set. It
33 // would be good to enhance isel to emit a loop for ctpop in this case.
34 //
35 // This could recognize common matrix multiplies and dot product idioms and
36 // replace them with calls to BLAS (if linked in??).
37 //
38 //===----------------------------------------------------------------------===//
39 
41 #include "llvm/ADT/MapVector.h"
42 #include "llvm/ADT/SetVector.h"
43 #include "llvm/ADT/Statistic.h"
48 #include "llvm/Analysis/LoopPass.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/Dominators.h"
57 #include "llvm/IR/IRBuilder.h"
58 #include "llvm/IR/IntrinsicInst.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/Support/Debug.h"
62 #include "llvm/Transforms/Scalar.h"
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "loop-idiom"
70 
71 STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
72 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
73 
75  "use-lir-code-size-heurs",
76  cl::desc("Use loop idiom recognition code size heuristics when compiling"
77  "with -Os/-Oz"),
78  cl::init(true), cl::Hidden);
79 
80 namespace {
81 
82 class LoopIdiomRecognize {
83  Loop *CurLoop;
84  AliasAnalysis *AA;
85  DominatorTree *DT;
86  LoopInfo *LI;
87  ScalarEvolution *SE;
88  TargetLibraryInfo *TLI;
89  const TargetTransformInfo *TTI;
90  const DataLayout *DL;
91  bool ApplyCodeSizeHeuristics;
92 
93 public:
94  explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
95  LoopInfo *LI, ScalarEvolution *SE,
96  TargetLibraryInfo *TLI,
97  const TargetTransformInfo *TTI,
98  const DataLayout *DL)
99  : CurLoop(nullptr), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI),
100  DL(DL) {}
101 
102  bool runOnLoop(Loop *L);
103 
104 private:
105  typedef SmallVector<StoreInst *, 8> StoreList;
106  typedef MapVector<Value *, StoreList> StoreListMap;
107  StoreListMap StoreRefsForMemset;
108  StoreListMap StoreRefsForMemsetPattern;
109  StoreList StoreRefsForMemcpy;
110  bool HasMemset;
111  bool HasMemsetPattern;
112  bool HasMemcpy;
113  /// Return code for isLegalStore()
114  enum LegalStoreKind {
115  None = 0,
116  Memset,
117  MemsetPattern,
118  Memcpy,
119  UnorderedAtomicMemcpy,
120  DontUse // Dummy retval never to be used. Allows catching errors in retval
121  // handling.
122  };
123 
124  /// \name Countable Loop Idiom Handling
125  /// @{
126 
127  bool runOnCountableLoop();
128  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
129  SmallVectorImpl<BasicBlock *> &ExitBlocks);
130 
131  void collectStores(BasicBlock *BB);
132  LegalStoreKind isLegalStore(StoreInst *SI);
133  bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
134  bool ForMemset);
135  bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
136 
137  bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
138  unsigned StoreAlignment, Value *StoredVal,
139  Instruction *TheStore,
141  const SCEVAddRecExpr *Ev, const SCEV *BECount,
142  bool NegStride, bool IsLoopMemset = false);
143  bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
144  bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
145  bool IsLoopMemset = false);
146 
147  /// @}
148  /// \name Noncountable Loop Idiom Handling
149  /// @{
150 
151  bool runOnNoncountableLoop();
152 
153  bool recognizePopcount();
154  void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
155  PHINode *CntPhi, Value *Var);
156  bool recognizeAndInsertCTLZ();
157  void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
158  PHINode *CntPhi, Value *Var, const DebugLoc DL,
159  bool ZeroCheck, bool IsCntPhiUsedOutsideLoop);
160 
161  /// @}
162 };
163 
164 class LoopIdiomRecognizeLegacyPass : public LoopPass {
165 public:
166  static char ID;
167  explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
170  }
171 
172  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
173  if (skipLoop(L))
174  return false;
175 
176  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
177  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
178  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
179  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
180  TargetLibraryInfo *TLI =
181  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
182  const TargetTransformInfo *TTI =
183  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
184  *L->getHeader()->getParent());
185  const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
186 
187  LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
188  return LIR.runOnLoop(L);
189  }
190 
191  /// This transformation requires natural loop information & requires that
192  /// loop preheaders be inserted into the CFG.
193  ///
194  void getAnalysisUsage(AnalysisUsage &AU) const override {
198  }
199 };
200 } // End anonymous namespace.
201 
204  LPMUpdater &) {
205  const auto *DL = &L.getHeader()->getModule()->getDataLayout();
206 
207  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
208  if (!LIR.runOnLoop(&L))
209  return PreservedAnalyses::all();
210 
212 }
213 
215 INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
216  "Recognize loop idioms", false, false)
220 INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
221  "Recognize loop idioms", false, false)
222 
223 Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
224 
227  I->eraseFromParent();
228 }
229 
230 //===----------------------------------------------------------------------===//
231 //
232 // Implementation of LoopIdiomRecognize
233 //
234 //===----------------------------------------------------------------------===//
235 
236 bool LoopIdiomRecognize::runOnLoop(Loop *L) {
237  CurLoop = L;
238  // If the loop could not be converted to canonical form, it must have an
239  // indirectbr in it, just give up.
240  if (!L->getLoopPreheader())
241  return false;
242 
243  // Disable loop idiom recognition if the function's name is a common idiom.
245  if (Name == "memset" || Name == "memcpy")
246  return false;
247 
248  // Determine if code size heuristics need to be applied.
249  ApplyCodeSizeHeuristics =
251 
252  HasMemset = TLI->has(LibFunc_memset);
253  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
254  HasMemcpy = TLI->has(LibFunc_memcpy);
255 
256  if (HasMemset || HasMemsetPattern || HasMemcpy)
258  return runOnCountableLoop();
259 
260  return runOnNoncountableLoop();
261 }
262 
263 bool LoopIdiomRecognize::runOnCountableLoop() {
264  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
265  assert(!isa<SCEVCouldNotCompute>(BECount) &&
266  "runOnCountableLoop() called on a loop without a predictable"
267  "backedge-taken count");
268 
269  // If this loop executes exactly one time, then it should be peeled, not
270  // optimized by this pass.
271  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
272  if (BECst->getAPInt() == 0)
273  return false;
274 
275  SmallVector<BasicBlock *, 8> ExitBlocks;
276  CurLoop->getUniqueExitBlocks(ExitBlocks);
277 
278  DEBUG(dbgs() << "loop-idiom Scanning: F["
279  << CurLoop->getHeader()->getParent()->getName() << "] Loop %"
280  << CurLoop->getHeader()->getName() << "\n");
281 
282  bool MadeChange = false;
283 
284  // The following transforms hoist stores/memsets into the loop pre-header.
285  // Give up if the loop has instructions may throw.
286  LoopSafetyInfo SafetyInfo;
287  computeLoopSafetyInfo(&SafetyInfo, CurLoop);
288  if (SafetyInfo.MayThrow)
289  return MadeChange;
290 
291  // Scan all the blocks in the loop that are not in subloops.
292  for (auto *BB : CurLoop->getBlocks()) {
293  // Ignore blocks in subloops.
294  if (LI->getLoopFor(BB) != CurLoop)
295  continue;
296 
297  MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
298  }
299  return MadeChange;
300 }
301 
302 static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) {
303  uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
304  assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
305  "Don't overflow unsigned.");
306  return (unsigned)SizeInBits >> 3;
307 }
308 
309 static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
310  const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
311  return ConstStride->getAPInt();
312 }
313 
314 /// getMemSetPatternValue - If a strided store of the specified value is safe to
315 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
316 /// be passed in. Otherwise, return null.
317 ///
318 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
319 /// just replicate their input array and then pass on to memset_pattern16.
321  // If the value isn't a constant, we can't promote it to being in a constant
322  // array. We could theoretically do a store to an alloca or something, but
323  // that doesn't seem worthwhile.
324  Constant *C = dyn_cast<Constant>(V);
325  if (!C)
326  return nullptr;
327 
328  // Only handle simple values that are a power of two bytes in size.
329  uint64_t Size = DL->getTypeSizeInBits(V->getType());
330  if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
331  return nullptr;
332 
333  // Don't care enough about darwin/ppc to implement this.
334  if (DL->isBigEndian())
335  return nullptr;
336 
337  // Convert to size in bytes.
338  Size /= 8;
339 
340  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
341  // if the top and bottom are the same (e.g. for vectors and large integers).
342  if (Size > 16)
343  return nullptr;
344 
345  // If the constant is exactly 16 bytes, just use it.
346  if (Size == 16)
347  return C;
348 
349  // Otherwise, we'll use an array of the constants.
350  unsigned ArraySize = 16 / Size;
351  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
352  return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
353 }
354 
355 LoopIdiomRecognize::LegalStoreKind
356 LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
357 
358  // Don't touch volatile stores.
359  if (SI->isVolatile())
360  return LegalStoreKind::None;
361  // We only want simple or unordered-atomic stores.
362  if (!SI->isUnordered())
363  return LegalStoreKind::None;
364 
365  // Don't convert stores of non-integral pointer types to memsets (which stores
366  // integers).
367  if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
368  return LegalStoreKind::None;
369 
370  // Avoid merging nontemporal stores.
372  return LegalStoreKind::None;
373 
374  Value *StoredVal = SI->getValueOperand();
375  Value *StorePtr = SI->getPointerOperand();
376 
377  // Reject stores that are so large that they overflow an unsigned.
378  uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
379  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
380  return LegalStoreKind::None;
381 
382  // See if the pointer expression is an AddRec like {base,+,1} on the current
383  // loop, which indicates a strided store. If we have something else, it's a
384  // random store we can't handle.
385  const SCEVAddRecExpr *StoreEv =
386  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
387  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
388  return LegalStoreKind::None;
389 
390  // Check to see if we have a constant stride.
391  if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
392  return LegalStoreKind::None;
393 
394  // See if the store can be turned into a memset.
395 
396  // If the stored value is a byte-wise value (like i32 -1), then it may be
397  // turned into a memset of i8 -1, assuming that all the consecutive bytes
398  // are stored. A store of i32 0x01020304 can never be turned into a memset,
399  // but it can be turned into memset_pattern if the target supports it.
400  Value *SplatValue = isBytewiseValue(StoredVal);
401  Constant *PatternValue = nullptr;
402 
403  // Note: memset and memset_pattern on unordered-atomic is yet not supported
404  bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
405 
406  // If we're allowed to form a memset, and the stored value would be
407  // acceptable for memset, use it.
408  if (!UnorderedAtomic && HasMemset && SplatValue &&
409  // Verify that the stored value is loop invariant. If not, we can't
410  // promote the memset.
411  CurLoop->isLoopInvariant(SplatValue)) {
412  // It looks like we can use SplatValue.
413  return LegalStoreKind::Memset;
414  } else if (!UnorderedAtomic && HasMemsetPattern &&
415  // Don't create memset_pattern16s with address spaces.
416  StorePtr->getType()->getPointerAddressSpace() == 0 &&
417  (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
418  // It looks like we can use PatternValue!
419  return LegalStoreKind::MemsetPattern;
420  }
421 
422  // Otherwise, see if the store can be turned into a memcpy.
423  if (HasMemcpy) {
424  // Check to see if the stride matches the size of the store. If so, then we
425  // know that every byte is touched in the loop.
426  APInt Stride = getStoreStride(StoreEv);
427  unsigned StoreSize = getStoreSizeInBytes(SI, DL);
428  if (StoreSize != Stride && StoreSize != -Stride)
429  return LegalStoreKind::None;
430 
431  // The store must be feeding a non-volatile load.
433 
434  // Only allow non-volatile loads
435  if (!LI || LI->isVolatile())
436  return LegalStoreKind::None;
437  // Only allow simple or unordered-atomic loads
438  if (!LI->isUnordered())
439  return LegalStoreKind::None;
440 
441  // See if the pointer expression is an AddRec like {base,+,1} on the current
442  // loop, which indicates a strided load. If we have something else, it's a
443  // random load we can't handle.
444  const SCEVAddRecExpr *LoadEv =
446  if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
447  return LegalStoreKind::None;
448 
449  // The store and load must share the same stride.
450  if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
451  return LegalStoreKind::None;
452 
453  // Success. This store can be converted into a memcpy.
454  UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
455  return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
456  : LegalStoreKind::Memcpy;
457  }
458  // This store can't be transformed into a memset/memcpy.
459  return LegalStoreKind::None;
460 }
461 
462 void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
463  StoreRefsForMemset.clear();
464  StoreRefsForMemsetPattern.clear();
465  StoreRefsForMemcpy.clear();
466  for (Instruction &I : *BB) {
467  StoreInst *SI = dyn_cast<StoreInst>(&I);
468  if (!SI)
469  continue;
470 
471  // Make sure this is a strided store with a constant stride.
472  switch (isLegalStore(SI)) {
474  // Nothing to do
475  break;
476  case LegalStoreKind::Memset: {
477  // Find the base pointer.
478  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
479  StoreRefsForMemset[Ptr].push_back(SI);
480  } break;
481  case LegalStoreKind::MemsetPattern: {
482  // Find the base pointer.
483  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
484  StoreRefsForMemsetPattern[Ptr].push_back(SI);
485  } break;
486  case LegalStoreKind::Memcpy:
487  case LegalStoreKind::UnorderedAtomicMemcpy:
488  StoreRefsForMemcpy.push_back(SI);
489  break;
490  default:
491  assert(false && "unhandled return value");
492  break;
493  }
494  }
495 }
496 
497 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
498 /// with the specified backedge count. This block is known to be in the current
499 /// loop and not in any subloops.
500 bool LoopIdiomRecognize::runOnLoopBlock(
501  BasicBlock *BB, const SCEV *BECount,
502  SmallVectorImpl<BasicBlock *> &ExitBlocks) {
503  // We can only promote stores in this block if they are unconditionally
504  // executed in the loop. For a block to be unconditionally executed, it has
505  // to dominate all the exit blocks of the loop. Verify this now.
506  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
507  if (!DT->dominates(BB, ExitBlocks[i]))
508  return false;
509 
510  bool MadeChange = false;
511  // Look for store instructions, which may be optimized to memset/memcpy.
512  collectStores(BB);
513 
514  // Look for a single store or sets of stores with a common base, which can be
515  // optimized into a memset (memset_pattern). The latter most commonly happens
516  // with structs and handunrolled loops.
517  for (auto &SL : StoreRefsForMemset)
518  MadeChange |= processLoopStores(SL.second, BECount, true);
519 
520  for (auto &SL : StoreRefsForMemsetPattern)
521  MadeChange |= processLoopStores(SL.second, BECount, false);
522 
523  // Optimize the store into a memcpy, if it feeds an similarly strided load.
524  for (auto &SI : StoreRefsForMemcpy)
525  MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
526 
527  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
528  Instruction *Inst = &*I++;
529  // Look for memset instructions, which may be optimized to a larger memset.
530  if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
531  WeakTrackingVH InstPtr(&*I);
532  if (!processLoopMemSet(MSI, BECount))
533  continue;
534  MadeChange = true;
535 
536  // If processing the memset invalidated our iterator, start over from the
537  // top of the block.
538  if (!InstPtr)
539  I = BB->begin();
540  continue;
541  }
542  }
543 
544  return MadeChange;
545 }
546 
547 /// processLoopStores - See if this store(s) can be promoted to a memset.
548 bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
549  const SCEV *BECount,
550  bool ForMemset) {
551  // Try to find consecutive stores that can be transformed into memsets.
552  SetVector<StoreInst *> Heads, Tails;
554 
555  // Do a quadratic search on all of the given stores and find
556  // all of the pairs of stores that follow each other.
557  SmallVector<unsigned, 16> IndexQueue;
558  for (unsigned i = 0, e = SL.size(); i < e; ++i) {
559  assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
560 
561  Value *FirstStoredVal = SL[i]->getValueOperand();
562  Value *FirstStorePtr = SL[i]->getPointerOperand();
563  const SCEVAddRecExpr *FirstStoreEv =
564  cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
565  APInt FirstStride = getStoreStride(FirstStoreEv);
566  unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL);
567 
568  // See if we can optimize just this store in isolation.
569  if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
570  Heads.insert(SL[i]);
571  continue;
572  }
573 
574  Value *FirstSplatValue = nullptr;
575  Constant *FirstPatternValue = nullptr;
576 
577  if (ForMemset)
578  FirstSplatValue = isBytewiseValue(FirstStoredVal);
579  else
580  FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
581 
582  assert((FirstSplatValue || FirstPatternValue) &&
583  "Expected either splat value or pattern value.");
584 
585  IndexQueue.clear();
586  // If a store has multiple consecutive store candidates, search Stores
587  // array according to the sequence: from i+1 to e, then from i-1 to 0.
588  // This is because usually pairing with immediate succeeding or preceding
589  // candidate create the best chance to find memset opportunity.
590  unsigned j = 0;
591  for (j = i + 1; j < e; ++j)
592  IndexQueue.push_back(j);
593  for (j = i; j > 0; --j)
594  IndexQueue.push_back(j - 1);
595 
596  for (auto &k : IndexQueue) {
597  assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
598  Value *SecondStorePtr = SL[k]->getPointerOperand();
599  const SCEVAddRecExpr *SecondStoreEv =
600  cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
601  APInt SecondStride = getStoreStride(SecondStoreEv);
602 
603  if (FirstStride != SecondStride)
604  continue;
605 
606  Value *SecondStoredVal = SL[k]->getValueOperand();
607  Value *SecondSplatValue = nullptr;
608  Constant *SecondPatternValue = nullptr;
609 
610  if (ForMemset)
611  SecondSplatValue = isBytewiseValue(SecondStoredVal);
612  else
613  SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
614 
615  assert((SecondSplatValue || SecondPatternValue) &&
616  "Expected either splat value or pattern value.");
617 
618  if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
619  if (ForMemset) {
620  if (FirstSplatValue != SecondSplatValue)
621  continue;
622  } else {
623  if (FirstPatternValue != SecondPatternValue)
624  continue;
625  }
626  Tails.insert(SL[k]);
627  Heads.insert(SL[i]);
628  ConsecutiveChain[SL[i]] = SL[k];
629  break;
630  }
631  }
632  }
633 
634  // We may run into multiple chains that merge into a single chain. We mark the
635  // stores that we transformed so that we don't visit the same store twice.
636  SmallPtrSet<Value *, 16> TransformedStores;
637  bool Changed = false;
638 
639  // For stores that start but don't end a link in the chain:
640  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
641  it != e; ++it) {
642  if (Tails.count(*it))
643  continue;
644 
645  // We found a store instr that starts a chain. Now follow the chain and try
646  // to transform it.
647  SmallPtrSet<Instruction *, 8> AdjacentStores;
648  StoreInst *I = *it;
649 
650  StoreInst *HeadStore = I;
651  unsigned StoreSize = 0;
652 
653  // Collect the chain into a list.
654  while (Tails.count(I) || Heads.count(I)) {
655  if (TransformedStores.count(I))
656  break;
657  AdjacentStores.insert(I);
658 
659  StoreSize += getStoreSizeInBytes(I, DL);
660  // Move to the next value in the chain.
661  I = ConsecutiveChain[I];
662  }
663 
664  Value *StoredVal = HeadStore->getValueOperand();
665  Value *StorePtr = HeadStore->getPointerOperand();
666  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
667  APInt Stride = getStoreStride(StoreEv);
668 
669  // Check to see if the stride matches the size of the stores. If so, then
670  // we know that every byte is touched in the loop.
671  if (StoreSize != Stride && StoreSize != -Stride)
672  continue;
673 
674  bool NegStride = StoreSize == -Stride;
675 
676  if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
677  StoredVal, HeadStore, AdjacentStores, StoreEv,
678  BECount, NegStride)) {
679  TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
680  Changed = true;
681  }
682  }
683 
684  return Changed;
685 }
686 
687 /// processLoopMemSet - See if this memset can be promoted to a large memset.
688 bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
689  const SCEV *BECount) {
690  // We can only handle non-volatile memsets with a constant size.
691  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
692  return false;
693 
694  // If we're not allowed to hack on memset, we fail.
695  if (!HasMemset)
696  return false;
697 
698  Value *Pointer = MSI->getDest();
699 
700  // See if the pointer expression is an AddRec like {base,+,1} on the current
701  // loop, which indicates a strided store. If we have something else, it's a
702  // random store we can't handle.
703  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
704  if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
705  return false;
706 
707  // Reject memsets that are so large that they overflow an unsigned.
708  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
709  if ((SizeInBytes >> 32) != 0)
710  return false;
711 
712  // Check to see if the stride matches the size of the memset. If so, then we
713  // know that every byte is touched in the loop.
714  const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
715  if (!ConstStride)
716  return false;
717 
718  APInt Stride = ConstStride->getAPInt();
719  if (SizeInBytes != Stride && SizeInBytes != -Stride)
720  return false;
721 
722  // Verify that the memset value is loop invariant. If not, we can't promote
723  // the memset.
724  Value *SplatValue = MSI->getValue();
725  if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
726  return false;
727 
729  MSIs.insert(MSI);
730  bool NegStride = SizeInBytes == -Stride;
731  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
732  MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
733  BECount, NegStride, /*IsLoopMemset=*/true);
734 }
735 
736 /// mayLoopAccessLocation - Return true if the specified loop might access the
737 /// specified pointer location, which is a loop-strided access. The 'Access'
738 /// argument specifies what the verboten forms of access are (read or write).
739 static bool
741  const SCEV *BECount, unsigned StoreSize,
742  AliasAnalysis &AA,
743  SmallPtrSetImpl<Instruction *> &IgnoredStores) {
744  // Get the location that may be stored across the loop. Since the access is
745  // strided positively through memory, we say that the modified location starts
746  // at the pointer and has infinite size.
747  uint64_t AccessSize = MemoryLocation::UnknownSize;
748 
749  // If the loop iterates a fixed number of times, we can refine the access size
750  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
751  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
752  AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
753 
754  // TODO: For this to be really effective, we have to dive into the pointer
755  // operand in the store. Store to &A[i] of 100 will always return may alias
756  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
757  // which will then no-alias a store to &A[100].
758  MemoryLocation StoreLoc(Ptr, AccessSize);
759 
760  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
761  ++BI)
762  for (Instruction &I : **BI)
763  if (IgnoredStores.count(&I) == 0 &&
764  (AA.getModRefInfo(&I, StoreLoc) & Access))
765  return true;
766 
767  return false;
768 }
769 
770 // If we have a negative stride, Start refers to the end of the memory location
771 // we're trying to memset. Therefore, we need to recompute the base pointer,
772 // which is just Start - BECount*Size.
773 static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
774  Type *IntPtr, unsigned StoreSize,
775  ScalarEvolution *SE) {
776  const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
777  if (StoreSize != 1)
778  Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
779  SCEV::FlagNUW);
780  return SE->getMinusSCEV(Start, Index);
781 }
782 
783 /// Compute the number of bytes as a SCEV from the backedge taken count.
784 ///
785 /// This also maps the SCEV into the provided type and tries to handle the
786 /// computation in a way that will fold cleanly.
787 static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
788  unsigned StoreSize, Loop *CurLoop,
789  const DataLayout *DL, ScalarEvolution *SE) {
790  const SCEV *NumBytesS;
791  // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
792  // pointer size if it isn't already.
793  //
794  // If we're going to need to zero extend the BE count, check if we can add
795  // one to it prior to zero extending without overflow. Provided this is safe,
796  // it allows better simplification of the +1.
797  if (DL->getTypeSizeInBits(BECount->getType()) <
798  DL->getTypeSizeInBits(IntPtr) &&
800  CurLoop, ICmpInst::ICMP_NE, BECount,
801  SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
802  NumBytesS = SE->getZeroExtendExpr(
803  SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
804  IntPtr);
805  } else {
806  NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
807  SE->getOne(IntPtr), SCEV::FlagNUW);
808  }
809 
810  // And scale it based on the store size.
811  if (StoreSize != 1) {
812  NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
813  SCEV::FlagNUW);
814  }
815  return NumBytesS;
816 }
817 
818 /// processLoopStridedStore - We see a strided store of some value. If we can
819 /// transform this into a memset or memset_pattern in the loop preheader, do so.
820 bool LoopIdiomRecognize::processLoopStridedStore(
821  Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
822  Value *StoredVal, Instruction *TheStore,
824  const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
825  Value *SplatValue = isBytewiseValue(StoredVal);
826  Constant *PatternValue = nullptr;
827 
828  if (!SplatValue)
829  PatternValue = getMemSetPatternValue(StoredVal, DL);
830 
831  assert((SplatValue || PatternValue) &&
832  "Expected either splat value or pattern value.");
833 
834  // The trip count of the loop and the base pointer of the addrec SCEV is
835  // guaranteed to be loop invariant, which means that it should dominate the
836  // header. This allows us to insert code for it in the preheader.
837  unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
838  BasicBlock *Preheader = CurLoop->getLoopPreheader();
839  IRBuilder<> Builder(Preheader->getTerminator());
840  SCEVExpander Expander(*SE, *DL, "loop-idiom");
841 
842  Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
843  Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
844 
845  const SCEV *Start = Ev->getStart();
846  // Handle negative strided loops.
847  if (NegStride)
848  Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
849 
850  // TODO: ideally we should still be able to generate memset if SCEV expander
851  // is taught to generate the dependencies at the latest point.
852  if (!isSafeToExpand(Start, *SE))
853  return false;
854 
855  // Okay, we have a strided store "p[i]" of a splattable value. We can turn
856  // this into a memset in the loop preheader now if we want. However, this
857  // would be unsafe to do if there is anything else in the loop that may read
858  // or write to the aliased location. Check for any overlap by generating the
859  // base pointer and checking the region.
860  Value *BasePtr =
861  Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
862  if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
863  *AA, Stores)) {
864  Expander.clear();
865  // If we generated new code for the base pointer, clean up.
867  return false;
868  }
869 
870  if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
871  return false;
872 
873  // Okay, everything looks good, insert the memset.
874 
875  const SCEV *NumBytesS =
876  getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE);
877 
878  // TODO: ideally we should still be able to generate memset if SCEV expander
879  // is taught to generate the dependencies at the latest point.
880  if (!isSafeToExpand(NumBytesS, *SE))
881  return false;
882 
883  Value *NumBytes =
884  Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
885 
886  CallInst *NewCall;
887  if (SplatValue) {
888  NewCall =
889  Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
890  } else {
891  // Everything is emitted in default address space
892  Type *Int8PtrTy = DestInt8PtrTy;
893 
894  Module *M = TheStore->getModule();
895  Value *MSP =
896  M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
897  Int8PtrTy, Int8PtrTy, IntPtr);
898  inferLibFuncAttributes(*M->getFunction("memset_pattern16"), *TLI);
899 
900  // Otherwise we should form a memset_pattern16. PatternValue is known to be
901  // an constant array of 16-bytes. Plop the value into a mergable global.
902  GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
904  PatternValue, ".memset_pattern");
905  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
906  GV->setAlignment(16);
907  Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
908  NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
909  }
910 
911  DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
912  << " from store to: " << *Ev << " at: " << *TheStore << "\n");
913  NewCall->setDebugLoc(TheStore->getDebugLoc());
914 
915  // Okay, the memset has been formed. Zap the original store and anything that
916  // feeds into it.
917  for (auto *I : Stores)
919  ++NumMemSet;
920  return true;
921 }
922 
923 /// If the stored value is a strided load in the same loop with the same stride
924 /// this may be transformable into a memcpy. This kicks in for stuff like
925 /// for (i) A[i] = B[i];
926 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
927  const SCEV *BECount) {
928  assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
929 
930  Value *StorePtr = SI->getPointerOperand();
931  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
932  APInt Stride = getStoreStride(StoreEv);
933  unsigned StoreSize = getStoreSizeInBytes(SI, DL);
934  bool NegStride = StoreSize == -Stride;
935 
936  // The store must be feeding a non-volatile load.
937  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
938  assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
939 
940  // See if the pointer expression is an AddRec like {base,+,1} on the current
941  // loop, which indicates a strided load. If we have something else, it's a
942  // random load we can't handle.
943  const SCEVAddRecExpr *LoadEv =
944  cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
945 
946  // The trip count of the loop and the base pointer of the addrec SCEV is
947  // guaranteed to be loop invariant, which means that it should dominate the
948  // header. This allows us to insert code for it in the preheader.
949  BasicBlock *Preheader = CurLoop->getLoopPreheader();
950  IRBuilder<> Builder(Preheader->getTerminator());
951  SCEVExpander Expander(*SE, *DL, "loop-idiom");
952 
953  const SCEV *StrStart = StoreEv->getStart();
954  unsigned StrAS = SI->getPointerAddressSpace();
955  Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
956 
957  // Handle negative strided loops.
958  if (NegStride)
959  StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
960 
961  // Okay, we have a strided store "p[i]" of a loaded value. We can turn
962  // this into a memcpy in the loop preheader now if we want. However, this
963  // would be unsafe to do if there is anything else in the loop that may read
964  // or write the memory region we're storing to. This includes the load that
965  // feeds the stores. Check for an alias by generating the base address and
966  // checking everything.
967  Value *StoreBasePtr = Expander.expandCodeFor(
968  StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
969 
971  Stores.insert(SI);
972  if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
973  StoreSize, *AA, Stores)) {
974  Expander.clear();
975  // If we generated new code for the base pointer, clean up.
977  return false;
978  }
979 
980  const SCEV *LdStart = LoadEv->getStart();
981  unsigned LdAS = LI->getPointerAddressSpace();
982 
983  // Handle negative strided loops.
984  if (NegStride)
985  LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
986 
987  // For a memcpy, we have to make sure that the input array is not being
988  // mutated by the loop.
989  Value *LoadBasePtr = Expander.expandCodeFor(
990  LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
991 
992  if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
993  *AA, Stores)) {
994  Expander.clear();
995  // If we generated new code for the base pointer, clean up.
998  return false;
999  }
1000 
1001  if (avoidLIRForMultiBlockLoop())
1002  return false;
1003 
1004  // Okay, everything is safe, we can transform this!
1005 
1006  const SCEV *NumBytesS =
1007  getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE);
1008 
1009  Value *NumBytes =
1010  Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
1011 
1012  unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
1013  CallInst *NewCall = nullptr;
1014  // Check whether to generate an unordered atomic memcpy:
1015  // If the load or store are atomic, then they must neccessarily be unordered
1016  // by previous checks.
1017  if (!SI->isAtomic() && !LI->isAtomic())
1018  NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
1019  else {
1020  // We cannot allow unaligned ops for unordered load/store, so reject
1021  // anything where the alignment isn't at least the element size.
1022  if (Align < StoreSize)
1023  return false;
1024 
1025  // If the element.atomic memcpy is not lowered into explicit
1026  // loads/stores later, then it will be lowered into an element-size
1027  // specific lib call. If the lib call doesn't exist for our store size, then
1028  // we shouldn't generate the memcpy.
1029  if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1030  return false;
1031 
1032  NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1033  StoreBasePtr, LoadBasePtr, NumBytes, StoreSize);
1034 
1035  // Propagate alignment info onto the pointer args. Note that unordered
1036  // atomic loads/stores are *required* by the spec to have an alignment
1037  // but non-atomic loads/stores may not.
1038  NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(),
1039  SI->getAlignment()));
1040  NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(),
1041  LI->getAlignment()));
1042  }
1043  NewCall->setDebugLoc(SI->getDebugLoc());
1044 
1045  DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
1046  << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
1047  << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
1048 
1049  // Okay, the memcpy has been formed. Zap the original store and anything that
1050  // feeds into it.
1052  ++NumMemCpy;
1053  return true;
1054 }
1055 
1056 // When compiling for codesize we avoid idiom recognition for a multi-block loop
1057 // unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1058 //
1059 bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1060  bool IsLoopMemset) {
1061  if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1062  if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
1063  DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1064  << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1065  << " avoided: multi-block top-level loop\n");
1066  return true;
1067  }
1068  }
1069 
1070  return false;
1071 }
1072 
1073 bool LoopIdiomRecognize::runOnNoncountableLoop() {
1074  return recognizePopcount() || recognizeAndInsertCTLZ();
1075 }
1076 
1077 /// Check if the given conditional branch is based on the comparison between
1078 /// a variable and zero, and if the variable is non-zero, the control yields to
1079 /// the loop entry. If the branch matches the behavior, the variable involved
1080 /// in the comparison is returned. This function will be called to see if the
1081 /// precondition and postcondition of the loop are in desirable form.
1082 static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
1083  if (!BI || !BI->isConditional())
1084  return nullptr;
1085 
1086  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1087  if (!Cond)
1088  return nullptr;
1089 
1090  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1091  if (!CmpZero || !CmpZero->isZero())
1092  return nullptr;
1093 
1094  ICmpInst::Predicate Pred = Cond->getPredicate();
1095  if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) ||
1096  (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry))
1097  return Cond->getOperand(0);
1098 
1099  return nullptr;
1100 }
1101 
1102 // Check if the recurrence variable `VarX` is in the right form to create
1103 // the idiom. Returns the value coerced to a PHINode if so.
1105  BasicBlock *LoopEntry) {
1106  auto *PhiX = dyn_cast<PHINode>(VarX);
1107  if (PhiX && PhiX->getParent() == LoopEntry &&
1108  (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1109  return PhiX;
1110  return nullptr;
1111 }
1112 
1113 /// Return true iff the idiom is detected in the loop.
1114 ///
1115 /// Additionally:
1116 /// 1) \p CntInst is set to the instruction counting the population bit.
1117 /// 2) \p CntPhi is set to the corresponding phi node.
1118 /// 3) \p Var is set to the value whose population bits are being counted.
1119 ///
1120 /// The core idiom we are trying to detect is:
1121 /// \code
1122 /// if (x0 != 0)
1123 /// goto loop-exit // the precondition of the loop
1124 /// cnt0 = init-val;
1125 /// do {
1126 /// x1 = phi (x0, x2);
1127 /// cnt1 = phi(cnt0, cnt2);
1128 ///
1129 /// cnt2 = cnt1 + 1;
1130 /// ...
1131 /// x2 = x1 & (x1 - 1);
1132 /// ...
1133 /// } while(x != 0);
1134 ///
1135 /// loop-exit:
1136 /// \endcode
1137 static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
1138  Instruction *&CntInst, PHINode *&CntPhi,
1139  Value *&Var) {
1140  // step 1: Check to see if the look-back branch match this pattern:
1141  // "if (a!=0) goto loop-entry".
1142  BasicBlock *LoopEntry;
1143  Instruction *DefX2, *CountInst;
1144  Value *VarX1, *VarX0;
1145  PHINode *PhiX, *CountPhi;
1146 
1147  DefX2 = CountInst = nullptr;
1148  VarX1 = VarX0 = nullptr;
1149  PhiX = CountPhi = nullptr;
1150  LoopEntry = *(CurLoop->block_begin());
1151 
1152  // step 1: Check if the loop-back branch is in desirable form.
1153  {
1154  if (Value *T = matchCondition(
1155  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1156  DefX2 = dyn_cast<Instruction>(T);
1157  else
1158  return false;
1159  }
1160 
1161  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1162  {
1163  if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1164  return false;
1165 
1166  BinaryOperator *SubOneOp;
1167 
1168  if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1169  VarX1 = DefX2->getOperand(1);
1170  else {
1171  VarX1 = DefX2->getOperand(0);
1172  SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1173  }
1174  if (!SubOneOp)
1175  return false;
1176 
1177  Instruction *SubInst = cast<Instruction>(SubOneOp);
1178  ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
1179  if (!Dec ||
1180  !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1181  (SubInst->getOpcode() == Instruction::Add &&
1182  Dec->isMinusOne()))) {
1183  return false;
1184  }
1185  }
1186 
1187  // step 3: Check the recurrence of variable X
1188  PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1189  if (!PhiX)
1190  return false;
1191 
1192  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1193  {
1194  CountInst = nullptr;
1195  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1196  IterE = LoopEntry->end();
1197  Iter != IterE; Iter++) {
1198  Instruction *Inst = &*Iter;
1199  if (Inst->getOpcode() != Instruction::Add)
1200  continue;
1201 
1202  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1203  if (!Inc || !Inc->isOne())
1204  continue;
1205 
1206  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1207  if (!Phi)
1208  continue;
1209 
1210  // Check if the result of the instruction is live of the loop.
1211  bool LiveOutLoop = false;
1212  for (User *U : Inst->users()) {
1213  if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1214  LiveOutLoop = true;
1215  break;
1216  }
1217  }
1218 
1219  if (LiveOutLoop) {
1220  CountInst = Inst;
1221  CountPhi = Phi;
1222  break;
1223  }
1224  }
1225 
1226  if (!CountInst)
1227  return false;
1228  }
1229 
1230  // step 5: check if the precondition is in this form:
1231  // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1232  {
1233  auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1234  Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1235  if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1236  return false;
1237 
1238  CntInst = CountInst;
1239  CntPhi = CountPhi;
1240  Var = T;
1241  }
1242 
1243  return true;
1244 }
1245 
1246 /// Return true if the idiom is detected in the loop.
1247 ///
1248 /// Additionally:
1249 /// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1250 /// or nullptr if there is no such.
1251 /// 2) \p CntPhi is set to the corresponding phi node
1252 /// or nullptr if there is no such.
1253 /// 3) \p Var is set to the value whose CTLZ could be used.
1254 /// 4) \p DefX is set to the instruction calculating Loop exit condition.
1255 ///
1256 /// The core idiom we are trying to detect is:
1257 /// \code
1258 /// if (x0 == 0)
1259 /// goto loop-exit // the precondition of the loop
1260 /// cnt0 = init-val;
1261 /// do {
1262 /// x = phi (x0, x.next); //PhiX
1263 /// cnt = phi(cnt0, cnt.next);
1264 ///
1265 /// cnt.next = cnt + 1;
1266 /// ...
1267 /// x.next = x >> 1; // DefX
1268 /// ...
1269 /// } while(x.next != 0);
1270 ///
1271 /// loop-exit:
1272 /// \endcode
1273 static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
1274  Instruction *&CntInst, PHINode *&CntPhi,
1275  Instruction *&DefX) {
1276  BasicBlock *LoopEntry;
1277  Value *VarX = nullptr;
1278 
1279  DefX = nullptr;
1280  PhiX = nullptr;
1281  CntInst = nullptr;
1282  CntPhi = nullptr;
1283  LoopEntry = *(CurLoop->block_begin());
1284 
1285  // step 1: Check if the loop-back branch is in desirable form.
1286  if (Value *T = matchCondition(
1287  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1288  DefX = dyn_cast<Instruction>(T);
1289  else
1290  return false;
1291 
1292  // step 2: detect instructions corresponding to "x.next = x >> 1"
1293  if (!DefX || DefX->getOpcode() != Instruction::AShr)
1294  return false;
1295  if (ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1)))
1296  if (!Shft || !Shft->isOne())
1297  return false;
1298  VarX = DefX->getOperand(0);
1299 
1300  // step 3: Check the recurrence of variable X
1301  PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1302  if (!PhiX)
1303  return false;
1304 
1305  // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1306  // TODO: We can skip the step. If loop trip count is known (CTLZ),
1307  // then all uses of "cnt.next" could be optimized to the trip count
1308  // plus "cnt0". Currently it is not optimized.
1309  // This step could be used to detect POPCNT instruction:
1310  // cnt.next = cnt + (x.next & 1)
1311  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1312  IterE = LoopEntry->end();
1313  Iter != IterE; Iter++) {
1314  Instruction *Inst = &*Iter;
1315  if (Inst->getOpcode() != Instruction::Add)
1316  continue;
1317 
1318  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1319  if (!Inc || !Inc->isOne())
1320  continue;
1321 
1322  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1323  if (!Phi)
1324  continue;
1325 
1326  CntInst = Inst;
1327  CntPhi = Phi;
1328  break;
1329  }
1330  if (!CntInst)
1331  return false;
1332 
1333  return true;
1334 }
1335 
1336 /// Recognize CTLZ idiom in a non-countable loop and convert the loop
1337 /// to countable (with CTLZ trip count).
1338 /// If CTLZ inserted as a new trip count returns true; otherwise, returns false.
1339 bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
1340  // Give up if the loop has multiple blocks or multiple backedges.
1341  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1342  return false;
1343 
1344  Instruction *CntInst, *DefX;
1345  PHINode *CntPhi, *PhiX;
1346  if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX))
1347  return false;
1348 
1349  bool IsCntPhiUsedOutsideLoop = false;
1350  for (User *U : CntPhi->users())
1351  if (!CurLoop->contains(dyn_cast<Instruction>(U))) {
1352  IsCntPhiUsedOutsideLoop = true;
1353  break;
1354  }
1355  bool IsCntInstUsedOutsideLoop = false;
1356  for (User *U : CntInst->users())
1357  if (!CurLoop->contains(dyn_cast<Instruction>(U))) {
1358  IsCntInstUsedOutsideLoop = true;
1359  break;
1360  }
1361  // If both CntInst and CntPhi are used outside the loop the profitability
1362  // is questionable.
1363  if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1364  return false;
1365 
1366  // For some CPUs result of CTLZ(X) intrinsic is undefined
1367  // when X is 0. If we can not guarantee X != 0, we need to check this
1368  // when expand.
1369  bool ZeroCheck = false;
1370  // It is safe to assume Preheader exist as it was checked in
1371  // parent function RunOnLoop.
1372  BasicBlock *PH = CurLoop->getLoopPreheader();
1373  Value *InitX = PhiX->getIncomingValueForBlock(PH);
1374  // If we check X != 0 before entering the loop we don't need a zero
1375  // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the
1376  // loop (if it is used we count CTLZ(X >> 1)).
1377  if (!IsCntPhiUsedOutsideLoop)
1378  if (BasicBlock *PreCondBB = PH->getSinglePredecessor())
1379  if (BranchInst *PreCondBr =
1380  dyn_cast<BranchInst>(PreCondBB->getTerminator())) {
1381  if (matchCondition(PreCondBr, PH) == InitX)
1382  ZeroCheck = true;
1383  }
1384 
1385  // Check if CTLZ intrinsic is profitable. Assume it is always profitable
1386  // if we delete the loop (the loop has only 6 instructions):
1387  // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1388  // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1389  // %shr = ashr %n.addr.0, 1
1390  // %tobool = icmp eq %shr, 0
1391  // %inc = add nsw %i.0, 1
1392  // br i1 %tobool
1393 
1394  IRBuilder<> Builder(PH->getTerminator());
1396  {InitX, ZeroCheck ? Builder.getTrue() : Builder.getFalse()};
1398  if (CurLoop->getHeader()->size() != 6 &&
1399  TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) >
1401  return false;
1402 
1403  const DebugLoc DL = DefX->getDebugLoc();
1404  transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck,
1405  IsCntPhiUsedOutsideLoop);
1406  return true;
1407 }
1408 
1409 /// Recognizes a population count idiom in a non-countable loop.
1410 ///
1411 /// If detected, transforms the relevant code to issue the popcount intrinsic
1412 /// function call, and returns true; otherwise, returns false.
1413 bool LoopIdiomRecognize::recognizePopcount() {
1415  return false;
1416 
1417  // Counting population are usually conducted by few arithmetic instructions.
1418  // Such instructions can be easily "absorbed" by vacant slots in a
1419  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
1420  // in a compact loop.
1421 
1422  // Give up if the loop has multiple blocks or multiple backedges.
1423  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1424  return false;
1425 
1426  BasicBlock *LoopBody = *(CurLoop->block_begin());
1427  if (LoopBody->size() >= 20) {
1428  // The loop is too big, bail out.
1429  return false;
1430  }
1431 
1432  // It should have a preheader containing nothing but an unconditional branch.
1433  BasicBlock *PH = CurLoop->getLoopPreheader();
1434  if (!PH || &PH->front() != PH->getTerminator())
1435  return false;
1436  auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
1437  if (!EntryBI || EntryBI->isConditional())
1438  return false;
1439 
1440  // It should have a precondition block where the generated popcount instrinsic
1441  // function can be inserted.
1442  auto *PreCondBB = PH->getSinglePredecessor();
1443  if (!PreCondBB)
1444  return false;
1445  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1446  if (!PreCondBI || PreCondBI->isUnconditional())
1447  return false;
1448 
1449  Instruction *CntInst;
1450  PHINode *CntPhi;
1451  Value *Val;
1452  if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
1453  return false;
1454 
1455  transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1456  return true;
1457 }
1458 
1460  const DebugLoc &DL) {
1461  Value *Ops[] = {Val};
1462  Type *Tys[] = {Val->getType()};
1463 
1464  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1465  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
1466  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1467  CI->setDebugLoc(DL);
1468 
1469  return CI;
1470 }
1471 
1473  const DebugLoc &DL, bool ZeroCheck) {
1474  Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
1475  Type *Tys[] = {Val->getType()};
1476 
1477  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1478  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys);
1479  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1480  CI->setDebugLoc(DL);
1481 
1482  return CI;
1483 }
1484 
1485 /// Transform the following loop:
1486 /// loop:
1487 /// CntPhi = PHI [Cnt0, CntInst]
1488 /// PhiX = PHI [InitX, DefX]
1489 /// CntInst = CntPhi + 1
1490 /// DefX = PhiX >> 1
1491 // LOOP_BODY
1492 /// Br: loop if (DefX != 0)
1493 /// Use(CntPhi) or Use(CntInst)
1494 ///
1495 /// Into:
1496 /// If CntPhi used outside the loop:
1497 /// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
1498 /// Count = CountPrev + 1
1499 /// else
1500 /// Count = BitWidth(InitX) - CTLZ(InitX)
1501 /// loop:
1502 /// CntPhi = PHI [Cnt0, CntInst]
1503 /// PhiX = PHI [InitX, DefX]
1504 /// PhiCount = PHI [Count, Dec]
1505 /// CntInst = CntPhi + 1
1506 /// DefX = PhiX >> 1
1507 /// Dec = PhiCount - 1
1508 /// LOOP_BODY
1509 /// Br: loop if (Dec != 0)
1510 /// Use(CountPrev + Cnt0) // Use(CntPhi)
1511 /// or
1512 /// Use(Count + Cnt0) // Use(CntInst)
1513 ///
1514 /// If LOOP_BODY is empty the loop will be deleted.
1515 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
1516 void LoopIdiomRecognize::transformLoopToCountable(
1517  BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
1518  const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
1519  BranchInst *PreheaderBr = dyn_cast<BranchInst>(Preheader->getTerminator());
1520 
1521  // Step 1: Insert the CTLZ instruction at the end of the preheader block
1522  // Count = BitWidth - CTLZ(InitX);
1523  // If there are uses of CntPhi create:
1524  // CountPrev = BitWidth - CTLZ(InitX >> 1);
1525  IRBuilder<> Builder(PreheaderBr);
1526  Builder.SetCurrentDebugLocation(DL);
1527  Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
1528 
1529  if (IsCntPhiUsedOutsideLoop)
1530  InitXNext = Builder.CreateAShr(InitX,
1531  ConstantInt::get(InitX->getType(), 1));
1532  else
1533  InitXNext = InitX;
1534  CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
1535  Count = Builder.CreateSub(
1536  ConstantInt::get(CTLZ->getType(),
1537  CTLZ->getType()->getIntegerBitWidth()),
1538  CTLZ);
1539  if (IsCntPhiUsedOutsideLoop) {
1540  CountPrev = Count;
1541  Count = Builder.CreateAdd(
1542  CountPrev,
1543  ConstantInt::get(CountPrev->getType(), 1));
1544  }
1545  if (IsCntPhiUsedOutsideLoop)
1546  NewCount = Builder.CreateZExtOrTrunc(CountPrev,
1547  cast<IntegerType>(CntInst->getType()));
1548  else
1549  NewCount = Builder.CreateZExtOrTrunc(Count,
1550  cast<IntegerType>(CntInst->getType()));
1551 
1552  // If the CTLZ counter's initial value is not zero, insert Add Inst.
1553  Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1554  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1555  if (!InitConst || !InitConst->isZero())
1556  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1557 
1558  // Step 2: Insert new IV and loop condition:
1559  // loop:
1560  // ...
1561  // PhiCount = PHI [Count, Dec]
1562  // ...
1563  // Dec = PhiCount - 1
1564  // ...
1565  // Br: loop if (Dec != 0)
1566  BasicBlock *Body = *(CurLoop->block_begin());
1567  auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());
1568  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1569  Type *Ty = Count->getType();
1570 
1571  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1572 
1573  Builder.SetInsertPoint(LbCond);
1574  Instruction *TcDec = cast<Instruction>(
1575  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1576  "tcdec", false, true));
1577 
1578  TcPhi->addIncoming(Count, Preheader);
1579  TcPhi->addIncoming(TcDec, Body);
1580 
1581  CmpInst::Predicate Pred =
1582  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
1583  LbCond->setPredicate(Pred);
1584  LbCond->setOperand(0, TcDec);
1585  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1586 
1587  // Step 3: All the references to the original counter outside
1588  // the loop are replaced with the NewCount -- the value returned from
1589  // __builtin_ctlz(x).
1590  if (IsCntPhiUsedOutsideLoop)
1591  CntPhi->replaceUsesOutsideBlock(NewCount, Body);
1592  else
1593  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1594 
1595  // step 4: Forget the "non-computable" trip-count SCEV associated with the
1596  // loop. The loop would otherwise not be deleted even if it becomes empty.
1597  SE->forgetLoop(CurLoop);
1598 }
1599 
1600 void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
1601  Instruction *CntInst,
1602  PHINode *CntPhi, Value *Var) {
1603  BasicBlock *PreHead = CurLoop->getLoopPreheader();
1604  auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1605  const DebugLoc DL = CntInst->getDebugLoc();
1606 
1607  // Assuming before transformation, the loop is following:
1608  // if (x) // the precondition
1609  // do { cnt++; x &= x - 1; } while(x);
1610 
1611  // Step 1: Insert the ctpop instruction at the end of the precondition block
1612  IRBuilder<> Builder(PreCondBr);
1613  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
1614  {
1615  PopCnt = createPopcntIntrinsic(Builder, Var, DL);
1616  NewCount = PopCntZext =
1617  Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
1618 
1619  if (NewCount != PopCnt)
1620  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1621 
1622  // TripCnt is exactly the number of iterations the loop has
1623  TripCnt = NewCount;
1624 
1625  // If the population counter's initial value is not zero, insert Add Inst.
1626  Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
1627  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1628  if (!InitConst || !InitConst->isZero()) {
1629  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1630  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1631  }
1632  }
1633 
1634  // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
1635  // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
1636  // function would be partial dead code, and downstream passes will drag
1637  // it back from the precondition block to the preheader.
1638  {
1639  ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
1640 
1641  Value *Opnd0 = PopCntZext;
1642  Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
1643  if (PreCond->getOperand(0) != Var)
1644  std::swap(Opnd0, Opnd1);
1645 
1646  ICmpInst *NewPreCond = cast<ICmpInst>(
1647  Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
1648  PreCondBr->setCondition(NewPreCond);
1649 
1651  }
1652 
1653  // Step 3: Note that the population count is exactly the trip count of the
1654  // loop in question, which enable us to to convert the loop from noncountable
1655  // loop into a countable one. The benefit is twofold:
1656  //
1657  // - If the loop only counts population, the entire loop becomes dead after
1658  // the transformation. It is a lot easier to prove a countable loop dead
1659  // than to prove a noncountable one. (In some C dialects, an infinite loop
1660  // isn't dead even if it computes nothing useful. In general, DCE needs
1661  // to prove a noncountable loop finite before safely delete it.)
1662  //
1663  // - If the loop also performs something else, it remains alive.
1664  // Since it is transformed to countable form, it can be aggressively
1665  // optimized by some optimizations which are in general not applicable
1666  // to a noncountable loop.
1667  //
1668  // After this step, this loop (conceptually) would look like following:
1669  // newcnt = __builtin_ctpop(x);
1670  // t = newcnt;
1671  // if (x)
1672  // do { cnt++; x &= x-1; t--) } while (t > 0);
1673  BasicBlock *Body = *(CurLoop->block_begin());
1674  {
1675  auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());
1676  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1677  Type *Ty = TripCnt->getType();
1678 
1679  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1680 
1681  Builder.SetInsertPoint(LbCond);
1682  Instruction *TcDec = cast<Instruction>(
1683  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1684  "tcdec", false, true));
1685 
1686  TcPhi->addIncoming(TripCnt, PreHead);
1687  TcPhi->addIncoming(TcDec, Body);
1688 
1689  CmpInst::Predicate Pred =
1690  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
1691  LbCond->setPredicate(Pred);
1692  LbCond->setOperand(0, TcDec);
1693  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1694  }
1695 
1696  // Step 4: All the references to the original population counter outside
1697  // the loop are replaced with the NewCount -- the value returned from
1698  // __builtin_ctpop().
1699  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1700 
1701  // step 5: Forget the "non-computable" trip-count SCEV associated with the
1702  // loop. The loop would otherwise not be deleted even if it becomes empty.
1703  SE->forgetLoop(CurLoop);
1704 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:395
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
void push_back(const T &Elt)
Definition: SmallVector.h:212
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1634
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions that feed it, giving the original input.
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1391
const SCEV * getConstant(ConstantInt *V)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Value * isBytewiseValue(Value *V)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
This is the interface for a simple mod/ref and alias analysis over globals.
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:142
static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:125
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isUnordered() const
Definition: Instructions.h:389
The main scalar evolution driver.
static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:57
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
Value * getCondition() const
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
The access modifies the value stored in memory.
static Constant * get(ArrayType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:888
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:217
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
This is the interface for a SCEV-based alias analysis.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:631
static CallInst * createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:664
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:889
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:107
void computeLoopSafetyInfo(LoopSafetyInfo *, Loop *)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: LICM.cpp:489
static bool isSimple(Instruction *I)
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:201
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:404
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:182
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
This node represents a polynomial recurrence on the trip count of the specified loop.
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
#define T
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:122
Class to represent array types.
Definition: DerivedTypes.h:369
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
bool has(LibFunc F) const
Tests whether a library function is available.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:194
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:911
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:306
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:428
static void deleteDeadInstruction(Instruction *I)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:975
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:128
Value * getOperand(unsigned i) const
Definition: User.h:154
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS. ...
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1678
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
const SCEV * getOperand(unsigned i) const
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:171
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:281
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:217
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:288
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
const Instruction & front() const
Definition: BasicBlock.h:264
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:363
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
size_t size() const
Definition: BasicBlock.h:262
bool isUnordered() const
Definition: Instructions.h:264
Represent the analysis usage information of a pass.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:530
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry)
Check if the given conditional branch is based on the comparison between a variable and zero...
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:860
std::vector< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:160
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:270
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:374
self_iterator getIterator()
Definition: ilist_node.h:82
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:370
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
bool isVolatile() const
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Representation for a specific memory location.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
typename vector_type::const_iterator iterator
Definition: SetVector.h:49
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:254
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:239
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, unsigned StoreSize, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:172
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
signed less or equal
Definition: InstrTypes.h:890
loop Recognize loop idioms
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Class for arbitrary precision integers.
Definition: APInt.h:69
loop idiom
ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
iterator_range< user_iterator > users()
Definition: Value.h:395
This class uses information about analyze scalars to rewrite expressions in canonical form...
Pass * createLoopIdiomPass()
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:293
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:532
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:934
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:339
Captures loop safety information.
Definition: LoopUtils.h:51
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:284
INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:203
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:404
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:218
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
The access both references and modifies the value stored in memory.
#define I(x, y, z)
Definition: MD5.cpp:58
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1023
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:568
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
block_iterator block_end() const
Definition: LoopInfo.h:162
Value * getValue() const
Return the arguments to the instruction.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
unsigned getAlignment() const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:276
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, unsigned StoreSize, ScalarEvolution *SE)
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
Value * getLength() const
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys) const
Estimate the cost of an intrinsic when lowered.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:545
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
Definition: SetVector.h:41
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction *> &IgnoredStores)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
#define DEBUG(X)
Definition: Debug.h:118
static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL)
unsigned greater than
Definition: InstrTypes.h:883
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
A container for analyses that lazily runs them and caches their results.
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition: Value.cpp:438
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This pass exposes codegen information to IR-level passes.
bool isSimple() const
Definition: Instructions.h:387
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
bool isBigEndian() const
Definition: DataLayout.h:217
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
block_iterator block_begin() const
Definition: LoopInfo.h:161
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count...
Value * getPointerOperand()
Definition: Instructions.h:398
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
This class represents a constant integer value.
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1659
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.