LLVM  10.0.0svn
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1 //===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements an idiom recognizer that transforms simple loops into a
10 // non-loop form. In cases that this kicks in, it can be a significant
11 // performance win.
12 //
13 // If compiling for code size we avoid idiom recognition if the resulting
14 // code could be larger than the code for the original loop. One way this could
15 // happen is if the loop is not removable after idiom recognition due to the
16 // presence of non-idiom instructions. The initial implementation of the
17 // heuristics applies to idioms in multi-block loops.
18 //
19 //===----------------------------------------------------------------------===//
20 //
21 // TODO List:
22 //
23 // Future loop memory idioms to recognize:
24 // memcmp, memmove, strlen, etc.
25 // Future floating point idioms to recognize in -ffast-math mode:
26 // fpowi
27 // Future integer operation idioms to recognize:
28 // ctpop
29 //
30 // Beware that isel's default lowering for ctpop is highly inefficient for
31 // i64 and larger types when i64 is legal and the value has few bits set. It
32 // would be good to enhance isel to emit a loop for ctpop in this case.
33 //
34 // This could recognize common matrix multiplies and dot product idioms and
35 // replace them with calls to BLAS (if linked in??).
36 //
37 //===----------------------------------------------------------------------===//
38 
40 #include "llvm/ADT/APInt.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/DenseMap.h"
43 #include "llvm/ADT/MapVector.h"
44 #include "llvm/ADT/SetVector.h"
45 #include "llvm/ADT/SmallPtrSet.h"
46 #include "llvm/ADT/SmallVector.h"
47 #include "llvm/ADT/Statistic.h"
48 #include "llvm/ADT/StringRef.h"
51 #include "llvm/Analysis/LoopInfo.h"
52 #include "llvm/Analysis/LoopPass.h"
61 #include "llvm/IR/Attributes.h"
62 #include "llvm/IR/BasicBlock.h"
63 #include "llvm/IR/Constant.h"
64 #include "llvm/IR/Constants.h"
65 #include "llvm/IR/DataLayout.h"
66 #include "llvm/IR/DebugLoc.h"
67 #include "llvm/IR/DerivedTypes.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/GlobalValue.h"
70 #include "llvm/IR/GlobalVariable.h"
71 #include "llvm/IR/IRBuilder.h"
72 #include "llvm/IR/InstrTypes.h"
73 #include "llvm/IR/Instruction.h"
74 #include "llvm/IR/Instructions.h"
75 #include "llvm/IR/IntrinsicInst.h"
76 #include "llvm/IR/Intrinsics.h"
77 #include "llvm/IR/LLVMContext.h"
78 #include "llvm/IR/Module.h"
79 #include "llvm/IR/PassManager.h"
80 #include "llvm/IR/Type.h"
81 #include "llvm/IR/User.h"
82 #include "llvm/IR/Value.h"
83 #include "llvm/IR/ValueHandle.h"
84 #include "llvm/Pass.h"
85 #include "llvm/Support/Casting.h"
87 #include "llvm/Support/Debug.h"
89 #include "llvm/Transforms/Scalar.h"
93 #include <algorithm>
94 #include <cassert>
95 #include <cstdint>
96 #include <utility>
97 #include <vector>
98 
99 using namespace llvm;
100 
101 #define DEBUG_TYPE "loop-idiom"
102 
103 STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
104 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
105 
107  "use-lir-code-size-heurs",
108  cl::desc("Use loop idiom recognition code size heuristics when compiling"
109  "with -Os/-Oz"),
110  cl::init(true), cl::Hidden);
111 
112 namespace {
113 
114 class LoopIdiomRecognize {
115  Loop *CurLoop = nullptr;
116  AliasAnalysis *AA;
117  DominatorTree *DT;
118  LoopInfo *LI;
119  ScalarEvolution *SE;
120  TargetLibraryInfo *TLI;
121  const TargetTransformInfo *TTI;
122  const DataLayout *DL;
124  bool ApplyCodeSizeHeuristics;
125 
126 public:
127  explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
128  LoopInfo *LI, ScalarEvolution *SE,
129  TargetLibraryInfo *TLI,
130  const TargetTransformInfo *TTI,
131  const DataLayout *DL,
133  : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
134 
135  bool runOnLoop(Loop *L);
136 
137 private:
138  using StoreList = SmallVector<StoreInst *, 8>;
139  using StoreListMap = MapVector<Value *, StoreList>;
140 
141  StoreListMap StoreRefsForMemset;
142  StoreListMap StoreRefsForMemsetPattern;
143  StoreList StoreRefsForMemcpy;
144  bool HasMemset;
145  bool HasMemsetPattern;
146  bool HasMemcpy;
147 
148  /// Return code for isLegalStore()
149  enum LegalStoreKind {
150  None = 0,
151  Memset,
152  MemsetPattern,
153  Memcpy,
154  UnorderedAtomicMemcpy,
155  DontUse // Dummy retval never to be used. Allows catching errors in retval
156  // handling.
157  };
158 
159  /// \name Countable Loop Idiom Handling
160  /// @{
161 
162  bool runOnCountableLoop();
163  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
164  SmallVectorImpl<BasicBlock *> &ExitBlocks);
165 
166  void collectStores(BasicBlock *BB);
167  LegalStoreKind isLegalStore(StoreInst *SI);
168  enum class ForMemset { No, Yes };
169  bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
170  ForMemset For);
171  bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
172 
173  bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
174  unsigned StoreAlignment, Value *StoredVal,
175  Instruction *TheStore,
177  const SCEVAddRecExpr *Ev, const SCEV *BECount,
178  bool NegStride, bool IsLoopMemset = false);
179  bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
180  bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
181  bool IsLoopMemset = false);
182 
183  /// @}
184  /// \name Noncountable Loop Idiom Handling
185  /// @{
186 
187  bool runOnNoncountableLoop();
188 
189  bool recognizePopcount();
190  void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
191  PHINode *CntPhi, Value *Var);
192  bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
193  void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
194  Instruction *CntInst, PHINode *CntPhi,
195  Value *Var, Instruction *DefX,
196  const DebugLoc &DL, bool ZeroCheck,
197  bool IsCntPhiUsedOutsideLoop);
198 
199  /// @}
200 };
201 
202 class LoopIdiomRecognizeLegacyPass : public LoopPass {
203 public:
204  static char ID;
205 
206  explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
209  }
210 
211  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
212  if (skipLoop(L))
213  return false;
214 
215  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
216  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
217  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
218  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
219  TargetLibraryInfo *TLI =
220  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
221  *L->getHeader()->getParent());
222  const TargetTransformInfo *TTI =
223  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
224  *L->getHeader()->getParent());
225  const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
226 
227  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
228  // pass. Function analyses need to be preserved across loop transformations
229  // but ORE cannot be preserved (see comment before the pass definition).
231 
232  LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
233  return LIR.runOnLoop(L);
234  }
235 
236  /// This transformation requires natural loop information & requires that
237  /// loop preheaders be inserted into the CFG.
238  void getAnalysisUsage(AnalysisUsage &AU) const override {
242  }
243 };
244 
245 } // end anonymous namespace
246 
248 
251  LPMUpdater &) {
252  const auto *DL = &L.getHeader()->getModule()->getDataLayout();
253 
254  const auto &FAM =
255  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
256  Function *F = L.getHeader()->getParent();
257 
258  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
259  // FIXME: This should probably be optional rather than required.
260  if (!ORE)
262  "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
263  "at a higher level");
264 
265  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
266  *ORE);
267  if (!LIR.runOnLoop(&L))
268  return PreservedAnalyses::all();
269 
271 }
272 
273 INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
274  "Recognize loop idioms", false, false)
278 INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
279  "Recognize loop idioms", false, false)
280 
281 Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
282 
285  I->eraseFromParent();
286 }
287 
288 //===----------------------------------------------------------------------===//
289 //
290 // Implementation of LoopIdiomRecognize
291 //
292 //===----------------------------------------------------------------------===//
293 
294 bool LoopIdiomRecognize::runOnLoop(Loop *L) {
295  CurLoop = L;
296  // If the loop could not be converted to canonical form, it must have an
297  // indirectbr in it, just give up.
298  if (!L->getLoopPreheader())
299  return false;
300 
301  // Disable loop idiom recognition if the function's name is a common idiom.
303  if (Name == "memset" || Name == "memcpy")
304  return false;
305 
306  // Determine if code size heuristics need to be applied.
307  ApplyCodeSizeHeuristics =
309 
310  HasMemset = TLI->has(LibFunc_memset);
311  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
312  HasMemcpy = TLI->has(LibFunc_memcpy);
313 
314  if (HasMemset || HasMemsetPattern || HasMemcpy)
316  return runOnCountableLoop();
317 
318  return runOnNoncountableLoop();
319 }
320 
321 bool LoopIdiomRecognize::runOnCountableLoop() {
322  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
323  assert(!isa<SCEVCouldNotCompute>(BECount) &&
324  "runOnCountableLoop() called on a loop without a predictable"
325  "backedge-taken count");
326 
327  // If this loop executes exactly one time, then it should be peeled, not
328  // optimized by this pass.
329  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
330  if (BECst->getAPInt() == 0)
331  return false;
332 
333  SmallVector<BasicBlock *, 8> ExitBlocks;
334  CurLoop->getUniqueExitBlocks(ExitBlocks);
335 
336  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
337  << CurLoop->getHeader()->getParent()->getName()
338  << "] Countable Loop %" << CurLoop->getHeader()->getName()
339  << "\n");
340 
341  bool MadeChange = false;
342 
343  // The following transforms hoist stores/memsets into the loop pre-header.
344  // Give up if the loop has instructions may throw.
345  SimpleLoopSafetyInfo SafetyInfo;
346  SafetyInfo.computeLoopSafetyInfo(CurLoop);
347  if (SafetyInfo.anyBlockMayThrow())
348  return MadeChange;
349 
350  // Scan all the blocks in the loop that are not in subloops.
351  for (auto *BB : CurLoop->getBlocks()) {
352  // Ignore blocks in subloops.
353  if (LI->getLoopFor(BB) != CurLoop)
354  continue;
355 
356  MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
357  }
358  return MadeChange;
359 }
360 
361 static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
362  const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
363  return ConstStride->getAPInt();
364 }
365 
366 /// getMemSetPatternValue - If a strided store of the specified value is safe to
367 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
368 /// be passed in. Otherwise, return null.
369 ///
370 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
371 /// just replicate their input array and then pass on to memset_pattern16.
373  // FIXME: This could check for UndefValue because it can be merged into any
374  // other valid pattern.
375 
376  // If the value isn't a constant, we can't promote it to being in a constant
377  // array. We could theoretically do a store to an alloca or something, but
378  // that doesn't seem worthwhile.
379  Constant *C = dyn_cast<Constant>(V);
380  if (!C)
381  return nullptr;
382 
383  // Only handle simple values that are a power of two bytes in size.
384  uint64_t Size = DL->getTypeSizeInBits(V->getType());
385  if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
386  return nullptr;
387 
388  // Don't care enough about darwin/ppc to implement this.
389  if (DL->isBigEndian())
390  return nullptr;
391 
392  // Convert to size in bytes.
393  Size /= 8;
394 
395  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
396  // if the top and bottom are the same (e.g. for vectors and large integers).
397  if (Size > 16)
398  return nullptr;
399 
400  // If the constant is exactly 16 bytes, just use it.
401  if (Size == 16)
402  return C;
403 
404  // Otherwise, we'll use an array of the constants.
405  unsigned ArraySize = 16 / Size;
406  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
407  return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
408 }
409 
410 LoopIdiomRecognize::LegalStoreKind
411 LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
412  // Don't touch volatile stores.
413  if (SI->isVolatile())
414  return LegalStoreKind::None;
415  // We only want simple or unordered-atomic stores.
416  if (!SI->isUnordered())
417  return LegalStoreKind::None;
418 
419  // Don't convert stores of non-integral pointer types to memsets (which stores
420  // integers).
421  if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
422  return LegalStoreKind::None;
423 
424  // Avoid merging nontemporal stores.
425  if (SI->getMetadata(LLVMContext::MD_nontemporal))
426  return LegalStoreKind::None;
427 
428  Value *StoredVal = SI->getValueOperand();
429  Value *StorePtr = SI->getPointerOperand();
430 
431  // Reject stores that are so large that they overflow an unsigned.
432  uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
433  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
434  return LegalStoreKind::None;
435 
436  // See if the pointer expression is an AddRec like {base,+,1} on the current
437  // loop, which indicates a strided store. If we have something else, it's a
438  // random store we can't handle.
439  const SCEVAddRecExpr *StoreEv =
440  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
441  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
442  return LegalStoreKind::None;
443 
444  // Check to see if we have a constant stride.
445  if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
446  return LegalStoreKind::None;
447 
448  // See if the store can be turned into a memset.
449 
450  // If the stored value is a byte-wise value (like i32 -1), then it may be
451  // turned into a memset of i8 -1, assuming that all the consecutive bytes
452  // are stored. A store of i32 0x01020304 can never be turned into a memset,
453  // but it can be turned into memset_pattern if the target supports it.
454  Value *SplatValue = isBytewiseValue(StoredVal, *DL);
455  Constant *PatternValue = nullptr;
456 
457  // Note: memset and memset_pattern on unordered-atomic is yet not supported
458  bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
459 
460  // If we're allowed to form a memset, and the stored value would be
461  // acceptable for memset, use it.
462  if (!UnorderedAtomic && HasMemset && SplatValue &&
463  // Verify that the stored value is loop invariant. If not, we can't
464  // promote the memset.
465  CurLoop->isLoopInvariant(SplatValue)) {
466  // It looks like we can use SplatValue.
467  return LegalStoreKind::Memset;
468  } else if (!UnorderedAtomic && HasMemsetPattern &&
469  // Don't create memset_pattern16s with address spaces.
470  StorePtr->getType()->getPointerAddressSpace() == 0 &&
471  (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
472  // It looks like we can use PatternValue!
473  return LegalStoreKind::MemsetPattern;
474  }
475 
476  // Otherwise, see if the store can be turned into a memcpy.
477  if (HasMemcpy) {
478  // Check to see if the stride matches the size of the store. If so, then we
479  // know that every byte is touched in the loop.
480  APInt Stride = getStoreStride(StoreEv);
481  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
482  if (StoreSize != Stride && StoreSize != -Stride)
483  return LegalStoreKind::None;
484 
485  // The store must be feeding a non-volatile load.
487 
488  // Only allow non-volatile loads
489  if (!LI || LI->isVolatile())
490  return LegalStoreKind::None;
491  // Only allow simple or unordered-atomic loads
492  if (!LI->isUnordered())
493  return LegalStoreKind::None;
494 
495  // See if the pointer expression is an AddRec like {base,+,1} on the current
496  // loop, which indicates a strided load. If we have something else, it's a
497  // random load we can't handle.
498  const SCEVAddRecExpr *LoadEv =
500  if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
501  return LegalStoreKind::None;
502 
503  // The store and load must share the same stride.
504  if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
505  return LegalStoreKind::None;
506 
507  // Success. This store can be converted into a memcpy.
508  UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
509  return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
510  : LegalStoreKind::Memcpy;
511  }
512  // This store can't be transformed into a memset/memcpy.
513  return LegalStoreKind::None;
514 }
515 
516 void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
517  StoreRefsForMemset.clear();
518  StoreRefsForMemsetPattern.clear();
519  StoreRefsForMemcpy.clear();
520  for (Instruction &I : *BB) {
521  StoreInst *SI = dyn_cast<StoreInst>(&I);
522  if (!SI)
523  continue;
524 
525  // Make sure this is a strided store with a constant stride.
526  switch (isLegalStore(SI)) {
528  // Nothing to do
529  break;
530  case LegalStoreKind::Memset: {
531  // Find the base pointer.
532  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
533  StoreRefsForMemset[Ptr].push_back(SI);
534  } break;
535  case LegalStoreKind::MemsetPattern: {
536  // Find the base pointer.
537  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
538  StoreRefsForMemsetPattern[Ptr].push_back(SI);
539  } break;
540  case LegalStoreKind::Memcpy:
541  case LegalStoreKind::UnorderedAtomicMemcpy:
542  StoreRefsForMemcpy.push_back(SI);
543  break;
544  default:
545  assert(false && "unhandled return value");
546  break;
547  }
548  }
549 }
550 
551 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
552 /// with the specified backedge count. This block is known to be in the current
553 /// loop and not in any subloops.
554 bool LoopIdiomRecognize::runOnLoopBlock(
555  BasicBlock *BB, const SCEV *BECount,
556  SmallVectorImpl<BasicBlock *> &ExitBlocks) {
557  // We can only promote stores in this block if they are unconditionally
558  // executed in the loop. For a block to be unconditionally executed, it has
559  // to dominate all the exit blocks of the loop. Verify this now.
560  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
561  if (!DT->dominates(BB, ExitBlocks[i]))
562  return false;
563 
564  bool MadeChange = false;
565  // Look for store instructions, which may be optimized to memset/memcpy.
566  collectStores(BB);
567 
568  // Look for a single store or sets of stores with a common base, which can be
569  // optimized into a memset (memset_pattern). The latter most commonly happens
570  // with structs and handunrolled loops.
571  for (auto &SL : StoreRefsForMemset)
572  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
573 
574  for (auto &SL : StoreRefsForMemsetPattern)
575  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
576 
577  // Optimize the store into a memcpy, if it feeds an similarly strided load.
578  for (auto &SI : StoreRefsForMemcpy)
579  MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
580 
581  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
582  Instruction *Inst = &*I++;
583  // Look for memset instructions, which may be optimized to a larger memset.
584  if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
585  WeakTrackingVH InstPtr(&*I);
586  if (!processLoopMemSet(MSI, BECount))
587  continue;
588  MadeChange = true;
589 
590  // If processing the memset invalidated our iterator, start over from the
591  // top of the block.
592  if (!InstPtr)
593  I = BB->begin();
594  continue;
595  }
596  }
597 
598  return MadeChange;
599 }
600 
601 /// See if this store(s) can be promoted to a memset.
602 bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
603  const SCEV *BECount, ForMemset For) {
604  // Try to find consecutive stores that can be transformed into memsets.
605  SetVector<StoreInst *> Heads, Tails;
607 
608  // Do a quadratic search on all of the given stores and find
609  // all of the pairs of stores that follow each other.
610  SmallVector<unsigned, 16> IndexQueue;
611  for (unsigned i = 0, e = SL.size(); i < e; ++i) {
612  assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
613 
614  Value *FirstStoredVal = SL[i]->getValueOperand();
615  Value *FirstStorePtr = SL[i]->getPointerOperand();
616  const SCEVAddRecExpr *FirstStoreEv =
617  cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
618  APInt FirstStride = getStoreStride(FirstStoreEv);
619  unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
620 
621  // See if we can optimize just this store in isolation.
622  if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
623  Heads.insert(SL[i]);
624  continue;
625  }
626 
627  Value *FirstSplatValue = nullptr;
628  Constant *FirstPatternValue = nullptr;
629 
630  if (For == ForMemset::Yes)
631  FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
632  else
633  FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
634 
635  assert((FirstSplatValue || FirstPatternValue) &&
636  "Expected either splat value or pattern value.");
637 
638  IndexQueue.clear();
639  // If a store has multiple consecutive store candidates, search Stores
640  // array according to the sequence: from i+1 to e, then from i-1 to 0.
641  // This is because usually pairing with immediate succeeding or preceding
642  // candidate create the best chance to find memset opportunity.
643  unsigned j = 0;
644  for (j = i + 1; j < e; ++j)
645  IndexQueue.push_back(j);
646  for (j = i; j > 0; --j)
647  IndexQueue.push_back(j - 1);
648 
649  for (auto &k : IndexQueue) {
650  assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
651  Value *SecondStorePtr = SL[k]->getPointerOperand();
652  const SCEVAddRecExpr *SecondStoreEv =
653  cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
654  APInt SecondStride = getStoreStride(SecondStoreEv);
655 
656  if (FirstStride != SecondStride)
657  continue;
658 
659  Value *SecondStoredVal = SL[k]->getValueOperand();
660  Value *SecondSplatValue = nullptr;
661  Constant *SecondPatternValue = nullptr;
662 
663  if (For == ForMemset::Yes)
664  SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
665  else
666  SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
667 
668  assert((SecondSplatValue || SecondPatternValue) &&
669  "Expected either splat value or pattern value.");
670 
671  if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
672  if (For == ForMemset::Yes) {
673  if (isa<UndefValue>(FirstSplatValue))
674  FirstSplatValue = SecondSplatValue;
675  if (FirstSplatValue != SecondSplatValue)
676  continue;
677  } else {
678  if (isa<UndefValue>(FirstPatternValue))
679  FirstPatternValue = SecondPatternValue;
680  if (FirstPatternValue != SecondPatternValue)
681  continue;
682  }
683  Tails.insert(SL[k]);
684  Heads.insert(SL[i]);
685  ConsecutiveChain[SL[i]] = SL[k];
686  break;
687  }
688  }
689  }
690 
691  // We may run into multiple chains that merge into a single chain. We mark the
692  // stores that we transformed so that we don't visit the same store twice.
693  SmallPtrSet<Value *, 16> TransformedStores;
694  bool Changed = false;
695 
696  // For stores that start but don't end a link in the chain:
697  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
698  it != e; ++it) {
699  if (Tails.count(*it))
700  continue;
701 
702  // We found a store instr that starts a chain. Now follow the chain and try
703  // to transform it.
704  SmallPtrSet<Instruction *, 8> AdjacentStores;
705  StoreInst *I = *it;
706 
707  StoreInst *HeadStore = I;
708  unsigned StoreSize = 0;
709 
710  // Collect the chain into a list.
711  while (Tails.count(I) || Heads.count(I)) {
712  if (TransformedStores.count(I))
713  break;
714  AdjacentStores.insert(I);
715 
716  StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
717  // Move to the next value in the chain.
718  I = ConsecutiveChain[I];
719  }
720 
721  Value *StoredVal = HeadStore->getValueOperand();
722  Value *StorePtr = HeadStore->getPointerOperand();
723  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
724  APInt Stride = getStoreStride(StoreEv);
725 
726  // Check to see if the stride matches the size of the stores. If so, then
727  // we know that every byte is touched in the loop.
728  if (StoreSize != Stride && StoreSize != -Stride)
729  continue;
730 
731  bool NegStride = StoreSize == -Stride;
732 
733  if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
734  StoredVal, HeadStore, AdjacentStores, StoreEv,
735  BECount, NegStride)) {
736  TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
737  Changed = true;
738  }
739  }
740 
741  return Changed;
742 }
743 
744 /// processLoopMemSet - See if this memset can be promoted to a large memset.
745 bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
746  const SCEV *BECount) {
747  // We can only handle non-volatile memsets with a constant size.
748  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
749  return false;
750 
751  // If we're not allowed to hack on memset, we fail.
752  if (!HasMemset)
753  return false;
754 
755  Value *Pointer = MSI->getDest();
756 
757  // See if the pointer expression is an AddRec like {base,+,1} on the current
758  // loop, which indicates a strided store. If we have something else, it's a
759  // random store we can't handle.
760  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
761  if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
762  return false;
763 
764  // Reject memsets that are so large that they overflow an unsigned.
765  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
766  if ((SizeInBytes >> 32) != 0)
767  return false;
768 
769  // Check to see if the stride matches the size of the memset. If so, then we
770  // know that every byte is touched in the loop.
771  const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
772  if (!ConstStride)
773  return false;
774 
775  APInt Stride = ConstStride->getAPInt();
776  if (SizeInBytes != Stride && SizeInBytes != -Stride)
777  return false;
778 
779  // Verify that the memset value is loop invariant. If not, we can't promote
780  // the memset.
781  Value *SplatValue = MSI->getValue();
782  if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
783  return false;
784 
786  MSIs.insert(MSI);
787  bool NegStride = SizeInBytes == -Stride;
788  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
789  MSI->getDestAlignment(), SplatValue, MSI, MSIs,
790  Ev, BECount, NegStride, /*IsLoopMemset=*/true);
791 }
792 
793 /// mayLoopAccessLocation - Return true if the specified loop might access the
794 /// specified pointer location, which is a loop-strided access. The 'Access'
795 /// argument specifies what the verboten forms of access are (read or write).
796 static bool
798  const SCEV *BECount, unsigned StoreSize,
799  AliasAnalysis &AA,
800  SmallPtrSetImpl<Instruction *> &IgnoredStores) {
801  // Get the location that may be stored across the loop. Since the access is
802  // strided positively through memory, we say that the modified location starts
803  // at the pointer and has infinite size.
804  LocationSize AccessSize = LocationSize::unknown();
805 
806  // If the loop iterates a fixed number of times, we can refine the access size
807  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
808  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
809  AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
810  StoreSize);
811 
812  // TODO: For this to be really effective, we have to dive into the pointer
813  // operand in the store. Store to &A[i] of 100 will always return may alias
814  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
815  // which will then no-alias a store to &A[100].
816  MemoryLocation StoreLoc(Ptr, AccessSize);
817 
818  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
819  ++BI)
820  for (Instruction &I : **BI)
821  if (IgnoredStores.count(&I) == 0 &&
823  intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
824  return true;
825 
826  return false;
827 }
828 
829 // If we have a negative stride, Start refers to the end of the memory location
830 // we're trying to memset. Therefore, we need to recompute the base pointer,
831 // which is just Start - BECount*Size.
832 static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
833  Type *IntPtr, unsigned StoreSize,
834  ScalarEvolution *SE) {
835  const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
836  if (StoreSize != 1)
837  Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
838  SCEV::FlagNUW);
839  return SE->getMinusSCEV(Start, Index);
840 }
841 
842 /// Compute the number of bytes as a SCEV from the backedge taken count.
843 ///
844 /// This also maps the SCEV into the provided type and tries to handle the
845 /// computation in a way that will fold cleanly.
846 static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
847  unsigned StoreSize, Loop *CurLoop,
848  const DataLayout *DL, ScalarEvolution *SE) {
849  const SCEV *NumBytesS;
850  // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
851  // pointer size if it isn't already.
852  //
853  // If we're going to need to zero extend the BE count, check if we can add
854  // one to it prior to zero extending without overflow. Provided this is safe,
855  // it allows better simplification of the +1.
856  if (DL->getTypeSizeInBits(BECount->getType()) <
857  DL->getTypeSizeInBits(IntPtr) &&
859  CurLoop, ICmpInst::ICMP_NE, BECount,
860  SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
861  NumBytesS = SE->getZeroExtendExpr(
862  SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
863  IntPtr);
864  } else {
865  NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
866  SE->getOne(IntPtr), SCEV::FlagNUW);
867  }
868 
869  // And scale it based on the store size.
870  if (StoreSize != 1) {
871  NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
872  SCEV::FlagNUW);
873  }
874  return NumBytesS;
875 }
876 
877 /// processLoopStridedStore - We see a strided store of some value. If we can
878 /// transform this into a memset or memset_pattern in the loop preheader, do so.
879 bool LoopIdiomRecognize::processLoopStridedStore(
880  Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
881  Value *StoredVal, Instruction *TheStore,
883  const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
884  Value *SplatValue = isBytewiseValue(StoredVal, *DL);
885  Constant *PatternValue = nullptr;
886 
887  if (!SplatValue)
888  PatternValue = getMemSetPatternValue(StoredVal, DL);
889 
890  assert((SplatValue || PatternValue) &&
891  "Expected either splat value or pattern value.");
892 
893  // The trip count of the loop and the base pointer of the addrec SCEV is
894  // guaranteed to be loop invariant, which means that it should dominate the
895  // header. This allows us to insert code for it in the preheader.
896  unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
897  BasicBlock *Preheader = CurLoop->getLoopPreheader();
898  IRBuilder<> Builder(Preheader->getTerminator());
899  SCEVExpander Expander(*SE, *DL, "loop-idiom");
900 
901  Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
902  Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
903 
904  const SCEV *Start = Ev->getStart();
905  // Handle negative strided loops.
906  if (NegStride)
907  Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
908 
909  // TODO: ideally we should still be able to generate memset if SCEV expander
910  // is taught to generate the dependencies at the latest point.
911  if (!isSafeToExpand(Start, *SE))
912  return false;
913 
914  // Okay, we have a strided store "p[i]" of a splattable value. We can turn
915  // this into a memset in the loop preheader now if we want. However, this
916  // would be unsafe to do if there is anything else in the loop that may read
917  // or write to the aliased location. Check for any overlap by generating the
918  // base pointer and checking the region.
919  Value *BasePtr =
920  Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
921  if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
922  StoreSize, *AA, Stores)) {
923  Expander.clear();
924  // If we generated new code for the base pointer, clean up.
926  return false;
927  }
928 
929  if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
930  return false;
931 
932  // Okay, everything looks good, insert the memset.
933 
934  const SCEV *NumBytesS =
935  getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE);
936 
937  // TODO: ideally we should still be able to generate memset if SCEV expander
938  // is taught to generate the dependencies at the latest point.
939  if (!isSafeToExpand(NumBytesS, *SE))
940  return false;
941 
942  Value *NumBytes =
943  Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
944 
945  CallInst *NewCall;
946  if (SplatValue) {
947  NewCall =
948  Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
949  } else {
950  // Everything is emitted in default address space
951  Type *Int8PtrTy = DestInt8PtrTy;
952 
953  Module *M = TheStore->getModule();
954  StringRef FuncName = "memset_pattern16";
955  FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
956  Int8PtrTy, Int8PtrTy, IntPtr);
957  inferLibFuncAttributes(M, FuncName, *TLI);
958 
959  // Otherwise we should form a memset_pattern16. PatternValue is known to be
960  // an constant array of 16-bytes. Plop the value into a mergable global.
961  GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
963  PatternValue, ".memset_pattern");
964  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
965  GV->setAlignment(16);
966  Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
967  NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
968  }
969 
970  LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
971  << " from store to: " << *Ev << " at: " << *TheStore
972  << "\n");
973  NewCall->setDebugLoc(TheStore->getDebugLoc());
974 
975  ORE.emit([&]() {
976  return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
977  NewCall->getDebugLoc(), Preheader)
978  << "Transformed loop-strided store into a call to "
979  << ore::NV("NewFunction", NewCall->getCalledFunction())
980  << "() function";
981  });
982 
983  // Okay, the memset has been formed. Zap the original store and anything that
984  // feeds into it.
985  for (auto *I : Stores)
987  ++NumMemSet;
988  return true;
989 }
990 
991 /// If the stored value is a strided load in the same loop with the same stride
992 /// this may be transformable into a memcpy. This kicks in for stuff like
993 /// for (i) A[i] = B[i];
994 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
995  const SCEV *BECount) {
996  assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
997 
998  Value *StorePtr = SI->getPointerOperand();
999  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1000  APInt Stride = getStoreStride(StoreEv);
1001  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1002  bool NegStride = StoreSize == -Stride;
1003 
1004  // The store must be feeding a non-volatile load.
1005  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1006  assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
1007 
1008  // See if the pointer expression is an AddRec like {base,+,1} on the current
1009  // loop, which indicates a strided load. If we have something else, it's a
1010  // random load we can't handle.
1011  const SCEVAddRecExpr *LoadEv =
1012  cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
1013 
1014  // The trip count of the loop and the base pointer of the addrec SCEV is
1015  // guaranteed to be loop invariant, which means that it should dominate the
1016  // header. This allows us to insert code for it in the preheader.
1017  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1018  IRBuilder<> Builder(Preheader->getTerminator());
1019  SCEVExpander Expander(*SE, *DL, "loop-idiom");
1020 
1021  const SCEV *StrStart = StoreEv->getStart();
1022  unsigned StrAS = SI->getPointerAddressSpace();
1023  Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
1024 
1025  // Handle negative strided loops.
1026  if (NegStride)
1027  StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
1028 
1029  // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1030  // this into a memcpy in the loop preheader now if we want. However, this
1031  // would be unsafe to do if there is anything else in the loop that may read
1032  // or write the memory region we're storing to. This includes the load that
1033  // feeds the stores. Check for an alias by generating the base address and
1034  // checking everything.
1035  Value *StoreBasePtr = Expander.expandCodeFor(
1036  StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
1037 
1039  Stores.insert(SI);
1040  if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1041  StoreSize, *AA, Stores)) {
1042  Expander.clear();
1043  // If we generated new code for the base pointer, clean up.
1044  RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1045  return false;
1046  }
1047 
1048  const SCEV *LdStart = LoadEv->getStart();
1049  unsigned LdAS = LI->getPointerAddressSpace();
1050 
1051  // Handle negative strided loops.
1052  if (NegStride)
1053  LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
1054 
1055  // For a memcpy, we have to make sure that the input array is not being
1056  // mutated by the loop.
1057  Value *LoadBasePtr = Expander.expandCodeFor(
1058  LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
1059 
1060  if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1061  StoreSize, *AA, Stores)) {
1062  Expander.clear();
1063  // If we generated new code for the base pointer, clean up.
1065  RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1066  return false;
1067  }
1068 
1069  if (avoidLIRForMultiBlockLoop())
1070  return false;
1071 
1072  // Okay, everything is safe, we can transform this!
1073 
1074  const SCEV *NumBytesS =
1075  getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE);
1076 
1077  Value *NumBytes =
1078  Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
1079 
1080  CallInst *NewCall = nullptr;
1081  // Check whether to generate an unordered atomic memcpy:
1082  // If the load or store are atomic, then they must necessarily be unordered
1083  // by previous checks.
1084  if (!SI->isAtomic() && !LI->isAtomic())
1085  NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
1086  LoadBasePtr, LI->getAlignment(), NumBytes);
1087  else {
1088  // We cannot allow unaligned ops for unordered load/store, so reject
1089  // anything where the alignment isn't at least the element size.
1090  unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
1091  if (Align < StoreSize)
1092  return false;
1093 
1094  // If the element.atomic memcpy is not lowered into explicit
1095  // loads/stores later, then it will be lowered into an element-size
1096  // specific lib call. If the lib call doesn't exist for our store size, then
1097  // we shouldn't generate the memcpy.
1098  if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1099  return false;
1100 
1101  // Create the call.
1102  // Note that unordered atomic loads/stores are *required* by the spec to
1103  // have an alignment but non-atomic loads/stores may not.
1104  NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1105  StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(),
1106  NumBytes, StoreSize);
1107  }
1108  NewCall->setDebugLoc(SI->getDebugLoc());
1109 
1110  LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
1111  << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
1112  << " from store ptr=" << *StoreEv << " at: " << *SI
1113  << "\n");
1114 
1115  ORE.emit([&]() {
1116  return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
1117  NewCall->getDebugLoc(), Preheader)
1118  << "Formed a call to "
1119  << ore::NV("NewFunction", NewCall->getCalledFunction())
1120  << "() function";
1121  });
1122 
1123  // Okay, the memcpy has been formed. Zap the original store and anything that
1124  // feeds into it.
1126  ++NumMemCpy;
1127  return true;
1128 }
1129 
1130 // When compiling for codesize we avoid idiom recognition for a multi-block loop
1131 // unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1132 //
1133 bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1134  bool IsLoopMemset) {
1135  if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1136  if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
1137  LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1138  << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1139  << " avoided: multi-block top-level loop\n");
1140  return true;
1141  }
1142  }
1143 
1144  return false;
1145 }
1146 
1147 bool LoopIdiomRecognize::runOnNoncountableLoop() {
1148  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
1149  << CurLoop->getHeader()->getParent()->getName()
1150  << "] Noncountable Loop %"
1151  << CurLoop->getHeader()->getName() << "\n");
1152 
1153  return recognizePopcount() || recognizeAndInsertFFS();
1154 }
1155 
1156 /// Check if the given conditional branch is based on the comparison between
1157 /// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1158 /// true), the control yields to the loop entry. If the branch matches the
1159 /// behavior, the variable involved in the comparison is returned. This function
1160 /// will be called to see if the precondition and postcondition of the loop are
1161 /// in desirable form.
1162 static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1163  bool JmpOnZero = false) {
1164  if (!BI || !BI->isConditional())
1165  return nullptr;
1166 
1167  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1168  if (!Cond)
1169  return nullptr;
1170 
1171  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1172  if (!CmpZero || !CmpZero->isZero())
1173  return nullptr;
1174 
1175  BasicBlock *TrueSucc = BI->getSuccessor(0);
1176  BasicBlock *FalseSucc = BI->getSuccessor(1);
1177  if (JmpOnZero)
1178  std::swap(TrueSucc, FalseSucc);
1179 
1180  ICmpInst::Predicate Pred = Cond->getPredicate();
1181  if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1182  (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1183  return Cond->getOperand(0);
1184 
1185  return nullptr;
1186 }
1187 
1188 // Check if the recurrence variable `VarX` is in the right form to create
1189 // the idiom. Returns the value coerced to a PHINode if so.
1191  BasicBlock *LoopEntry) {
1192  auto *PhiX = dyn_cast<PHINode>(VarX);
1193  if (PhiX && PhiX->getParent() == LoopEntry &&
1194  (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1195  return PhiX;
1196  return nullptr;
1197 }
1198 
1199 /// Return true iff the idiom is detected in the loop.
1200 ///
1201 /// Additionally:
1202 /// 1) \p CntInst is set to the instruction counting the population bit.
1203 /// 2) \p CntPhi is set to the corresponding phi node.
1204 /// 3) \p Var is set to the value whose population bits are being counted.
1205 ///
1206 /// The core idiom we are trying to detect is:
1207 /// \code
1208 /// if (x0 != 0)
1209 /// goto loop-exit // the precondition of the loop
1210 /// cnt0 = init-val;
1211 /// do {
1212 /// x1 = phi (x0, x2);
1213 /// cnt1 = phi(cnt0, cnt2);
1214 ///
1215 /// cnt2 = cnt1 + 1;
1216 /// ...
1217 /// x2 = x1 & (x1 - 1);
1218 /// ...
1219 /// } while(x != 0);
1220 ///
1221 /// loop-exit:
1222 /// \endcode
1223 static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
1224  Instruction *&CntInst, PHINode *&CntPhi,
1225  Value *&Var) {
1226  // step 1: Check to see if the look-back branch match this pattern:
1227  // "if (a!=0) goto loop-entry".
1228  BasicBlock *LoopEntry;
1229  Instruction *DefX2, *CountInst;
1230  Value *VarX1, *VarX0;
1231  PHINode *PhiX, *CountPhi;
1232 
1233  DefX2 = CountInst = nullptr;
1234  VarX1 = VarX0 = nullptr;
1235  PhiX = CountPhi = nullptr;
1236  LoopEntry = *(CurLoop->block_begin());
1237 
1238  // step 1: Check if the loop-back branch is in desirable form.
1239  {
1240  if (Value *T = matchCondition(
1241  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1242  DefX2 = dyn_cast<Instruction>(T);
1243  else
1244  return false;
1245  }
1246 
1247  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1248  {
1249  if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1250  return false;
1251 
1252  BinaryOperator *SubOneOp;
1253 
1254  if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1255  VarX1 = DefX2->getOperand(1);
1256  else {
1257  VarX1 = DefX2->getOperand(0);
1258  SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1259  }
1260  if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
1261  return false;
1262 
1263  ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1264  if (!Dec ||
1265  !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1266  (SubOneOp->getOpcode() == Instruction::Add &&
1267  Dec->isMinusOne()))) {
1268  return false;
1269  }
1270  }
1271 
1272  // step 3: Check the recurrence of variable X
1273  PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1274  if (!PhiX)
1275  return false;
1276 
1277  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1278  {
1279  CountInst = nullptr;
1280  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1281  IterE = LoopEntry->end();
1282  Iter != IterE; Iter++) {
1283  Instruction *Inst = &*Iter;
1284  if (Inst->getOpcode() != Instruction::Add)
1285  continue;
1286 
1287  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1288  if (!Inc || !Inc->isOne())
1289  continue;
1290 
1291  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1292  if (!Phi)
1293  continue;
1294 
1295  // Check if the result of the instruction is live of the loop.
1296  bool LiveOutLoop = false;
1297  for (User *U : Inst->users()) {
1298  if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1299  LiveOutLoop = true;
1300  break;
1301  }
1302  }
1303 
1304  if (LiveOutLoop) {
1305  CountInst = Inst;
1306  CountPhi = Phi;
1307  break;
1308  }
1309  }
1310 
1311  if (!CountInst)
1312  return false;
1313  }
1314 
1315  // step 5: check if the precondition is in this form:
1316  // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1317  {
1318  auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1319  Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1320  if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1321  return false;
1322 
1323  CntInst = CountInst;
1324  CntPhi = CountPhi;
1325  Var = T;
1326  }
1327 
1328  return true;
1329 }
1330 
1331 /// Return true if the idiom is detected in the loop.
1332 ///
1333 /// Additionally:
1334 /// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1335 /// or nullptr if there is no such.
1336 /// 2) \p CntPhi is set to the corresponding phi node
1337 /// or nullptr if there is no such.
1338 /// 3) \p Var is set to the value whose CTLZ could be used.
1339 /// 4) \p DefX is set to the instruction calculating Loop exit condition.
1340 ///
1341 /// The core idiom we are trying to detect is:
1342 /// \code
1343 /// if (x0 == 0)
1344 /// goto loop-exit // the precondition of the loop
1345 /// cnt0 = init-val;
1346 /// do {
1347 /// x = phi (x0, x.next); //PhiX
1348 /// cnt = phi(cnt0, cnt.next);
1349 ///
1350 /// cnt.next = cnt + 1;
1351 /// ...
1352 /// x.next = x >> 1; // DefX
1353 /// ...
1354 /// } while(x.next != 0);
1355 ///
1356 /// loop-exit:
1357 /// \endcode
1358 static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1359  Intrinsic::ID &IntrinID, Value *&InitX,
1360  Instruction *&CntInst, PHINode *&CntPhi,
1361  Instruction *&DefX) {
1362  BasicBlock *LoopEntry;
1363  Value *VarX = nullptr;
1364 
1365  DefX = nullptr;
1366  CntInst = nullptr;
1367  CntPhi = nullptr;
1368  LoopEntry = *(CurLoop->block_begin());
1369 
1370  // step 1: Check if the loop-back branch is in desirable form.
1371  if (Value *T = matchCondition(
1372  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1373  DefX = dyn_cast<Instruction>(T);
1374  else
1375  return false;
1376 
1377  // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1378  if (!DefX || !DefX->isShift())
1379  return false;
1380  IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1381  Intrinsic::ctlz;
1382  ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1383  if (!Shft || !Shft->isOne())
1384  return false;
1385  VarX = DefX->getOperand(0);
1386 
1387  // step 3: Check the recurrence of variable X
1388  PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1389  if (!PhiX)
1390  return false;
1391 
1392  InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1393 
1394  // Make sure the initial value can't be negative otherwise the ashr in the
1395  // loop might never reach zero which would make the loop infinite.
1396  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1397  return false;
1398 
1399  // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1400  // TODO: We can skip the step. If loop trip count is known (CTLZ),
1401  // then all uses of "cnt.next" could be optimized to the trip count
1402  // plus "cnt0". Currently it is not optimized.
1403  // This step could be used to detect POPCNT instruction:
1404  // cnt.next = cnt + (x.next & 1)
1405  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1406  IterE = LoopEntry->end();
1407  Iter != IterE; Iter++) {
1408  Instruction *Inst = &*Iter;
1409  if (Inst->getOpcode() != Instruction::Add)
1410  continue;
1411 
1412  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1413  if (!Inc || !Inc->isOne())
1414  continue;
1415 
1416  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1417  if (!Phi)
1418  continue;
1419 
1420  CntInst = Inst;
1421  CntPhi = Phi;
1422  break;
1423  }
1424  if (!CntInst)
1425  return false;
1426 
1427  return true;
1428 }
1429 
1430 /// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1431 /// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1432 /// trip count returns true; otherwise, returns false.
1433 bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1434  // Give up if the loop has multiple blocks or multiple backedges.
1435  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1436  return false;
1437 
1438  Intrinsic::ID IntrinID;
1439  Value *InitX;
1440  Instruction *DefX = nullptr;
1441  PHINode *CntPhi = nullptr;
1442  Instruction *CntInst = nullptr;
1443  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1444  // this is always 6.
1445  size_t IdiomCanonicalSize = 6;
1446 
1447  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
1448  CntInst, CntPhi, DefX))
1449  return false;
1450 
1451  bool IsCntPhiUsedOutsideLoop = false;
1452  for (User *U : CntPhi->users())
1453  if (!CurLoop->contains(cast<Instruction>(U))) {
1454  IsCntPhiUsedOutsideLoop = true;
1455  break;
1456  }
1457  bool IsCntInstUsedOutsideLoop = false;
1458  for (User *U : CntInst->users())
1459  if (!CurLoop->contains(cast<Instruction>(U))) {
1460  IsCntInstUsedOutsideLoop = true;
1461  break;
1462  }
1463  // If both CntInst and CntPhi are used outside the loop the profitability
1464  // is questionable.
1465  if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1466  return false;
1467 
1468  // For some CPUs result of CTLZ(X) intrinsic is undefined
1469  // when X is 0. If we can not guarantee X != 0, we need to check this
1470  // when expand.
1471  bool ZeroCheck = false;
1472  // It is safe to assume Preheader exist as it was checked in
1473  // parent function RunOnLoop.
1474  BasicBlock *PH = CurLoop->getLoopPreheader();
1475 
1476  // If we are using the count instruction outside the loop, make sure we
1477  // have a zero check as a precondition. Without the check the loop would run
1478  // one iteration for before any check of the input value. This means 0 and 1
1479  // would have identical behavior in the original loop and thus
1480  if (!IsCntPhiUsedOutsideLoop) {
1481  auto *PreCondBB = PH->getSinglePredecessor();
1482  if (!PreCondBB)
1483  return false;
1484  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1485  if (!PreCondBI)
1486  return false;
1487  if (matchCondition(PreCondBI, PH) != InitX)
1488  return false;
1489  ZeroCheck = true;
1490  }
1491 
1492  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1493  // profitable if we delete the loop.
1494 
1495  // the loop has only 6 instructions:
1496  // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1497  // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1498  // %shr = ashr %n.addr.0, 1
1499  // %tobool = icmp eq %shr, 0
1500  // %inc = add nsw %i.0, 1
1501  // br i1 %tobool
1502 
1503  const Value *Args[] =
1504  {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
1505  : ConstantInt::getFalse(InitX->getContext())};
1506 
1507  // @llvm.dbg doesn't count as they have no semantic effect.
1508  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1510  std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1511 
1512  if (HeaderSize != IdiomCanonicalSize &&
1513  TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
1515  return false;
1516 
1517  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1518  DefX->getDebugLoc(), ZeroCheck,
1519  IsCntPhiUsedOutsideLoop);
1520  return true;
1521 }
1522 
1523 /// Recognizes a population count idiom in a non-countable loop.
1524 ///
1525 /// If detected, transforms the relevant code to issue the popcount intrinsic
1526 /// function call, and returns true; otherwise, returns false.
1527 bool LoopIdiomRecognize::recognizePopcount() {
1528  if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
1529  return false;
1530 
1531  // Counting population are usually conducted by few arithmetic instructions.
1532  // Such instructions can be easily "absorbed" by vacant slots in a
1533  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
1534  // in a compact loop.
1535 
1536  // Give up if the loop has multiple blocks or multiple backedges.
1537  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1538  return false;
1539 
1540  BasicBlock *LoopBody = *(CurLoop->block_begin());
1541  if (LoopBody->size() >= 20) {
1542  // The loop is too big, bail out.
1543  return false;
1544  }
1545 
1546  // It should have a preheader containing nothing but an unconditional branch.
1547  BasicBlock *PH = CurLoop->getLoopPreheader();
1548  if (!PH || &PH->front() != PH->getTerminator())
1549  return false;
1550  auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
1551  if (!EntryBI || EntryBI->isConditional())
1552  return false;
1553 
1554  // It should have a precondition block where the generated popcount intrinsic
1555  // function can be inserted.
1556  auto *PreCondBB = PH->getSinglePredecessor();
1557  if (!PreCondBB)
1558  return false;
1559  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1560  if (!PreCondBI || PreCondBI->isUnconditional())
1561  return false;
1562 
1563  Instruction *CntInst;
1564  PHINode *CntPhi;
1565  Value *Val;
1566  if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
1567  return false;
1568 
1569  transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1570  return true;
1571 }
1572 
1574  const DebugLoc &DL) {
1575  Value *Ops[] = {Val};
1576  Type *Tys[] = {Val->getType()};
1577 
1578  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1579  Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
1580  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1581  CI->setDebugLoc(DL);
1582 
1583  return CI;
1584 }
1585 
1587  const DebugLoc &DL, bool ZeroCheck,
1588  Intrinsic::ID IID) {
1589  Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
1590  Type *Tys[] = {Val->getType()};
1591 
1592  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1593  Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
1594  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1595  CI->setDebugLoc(DL);
1596 
1597  return CI;
1598 }
1599 
1600 /// Transform the following loop (Using CTLZ, CTTZ is similar):
1601 /// loop:
1602 /// CntPhi = PHI [Cnt0, CntInst]
1603 /// PhiX = PHI [InitX, DefX]
1604 /// CntInst = CntPhi + 1
1605 /// DefX = PhiX >> 1
1606 /// LOOP_BODY
1607 /// Br: loop if (DefX != 0)
1608 /// Use(CntPhi) or Use(CntInst)
1609 ///
1610 /// Into:
1611 /// If CntPhi used outside the loop:
1612 /// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
1613 /// Count = CountPrev + 1
1614 /// else
1615 /// Count = BitWidth(InitX) - CTLZ(InitX)
1616 /// loop:
1617 /// CntPhi = PHI [Cnt0, CntInst]
1618 /// PhiX = PHI [InitX, DefX]
1619 /// PhiCount = PHI [Count, Dec]
1620 /// CntInst = CntPhi + 1
1621 /// DefX = PhiX >> 1
1622 /// Dec = PhiCount - 1
1623 /// LOOP_BODY
1624 /// Br: loop if (Dec != 0)
1625 /// Use(CountPrev + Cnt0) // Use(CntPhi)
1626 /// or
1627 /// Use(Count + Cnt0) // Use(CntInst)
1628 ///
1629 /// If LOOP_BODY is empty the loop will be deleted.
1630 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
1631 void LoopIdiomRecognize::transformLoopToCountable(
1632  Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
1633  PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
1634  bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
1635  BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
1636 
1637  // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
1638  IRBuilder<> Builder(PreheaderBr);
1639  Builder.SetCurrentDebugLocation(DL);
1640  Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
1641 
1642  // Count = BitWidth - CTLZ(InitX);
1643  // If there are uses of CntPhi create:
1644  // CountPrev = BitWidth - CTLZ(InitX >> 1);
1645  if (IsCntPhiUsedOutsideLoop) {
1646  if (DefX->getOpcode() == Instruction::AShr)
1647  InitXNext =
1648  Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1));
1649  else if (DefX->getOpcode() == Instruction::LShr)
1650  InitXNext =
1651  Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
1652  else if (DefX->getOpcode() == Instruction::Shl) // cttz
1653  InitXNext =
1654  Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
1655  else
1656  llvm_unreachable("Unexpected opcode!");
1657  } else
1658  InitXNext = InitX;
1659  FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
1660  Count = Builder.CreateSub(
1661  ConstantInt::get(FFS->getType(),
1662  FFS->getType()->getIntegerBitWidth()),
1663  FFS);
1664  if (IsCntPhiUsedOutsideLoop) {
1665  CountPrev = Count;
1666  Count = Builder.CreateAdd(
1667  CountPrev,
1668  ConstantInt::get(CountPrev->getType(), 1));
1669  }
1670 
1671  NewCount = Builder.CreateZExtOrTrunc(
1672  IsCntPhiUsedOutsideLoop ? CountPrev : Count,
1673  cast<IntegerType>(CntInst->getType()));
1674 
1675  // If the counter's initial value is not zero, insert Add Inst.
1676  Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1677  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1678  if (!InitConst || !InitConst->isZero())
1679  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1680 
1681  // Step 2: Insert new IV and loop condition:
1682  // loop:
1683  // ...
1684  // PhiCount = PHI [Count, Dec]
1685  // ...
1686  // Dec = PhiCount - 1
1687  // ...
1688  // Br: loop if (Dec != 0)
1689  BasicBlock *Body = *(CurLoop->block_begin());
1690  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1691  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1692  Type *Ty = Count->getType();
1693 
1694  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1695 
1696  Builder.SetInsertPoint(LbCond);
1697  Instruction *TcDec = cast<Instruction>(
1698  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1699  "tcdec", false, true));
1700 
1701  TcPhi->addIncoming(Count, Preheader);
1702  TcPhi->addIncoming(TcDec, Body);
1703 
1704  CmpInst::Predicate Pred =
1705  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
1706  LbCond->setPredicate(Pred);
1707  LbCond->setOperand(0, TcDec);
1708  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1709 
1710  // Step 3: All the references to the original counter outside
1711  // the loop are replaced with the NewCount
1712  if (IsCntPhiUsedOutsideLoop)
1713  CntPhi->replaceUsesOutsideBlock(NewCount, Body);
1714  else
1715  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1716 
1717  // step 4: Forget the "non-computable" trip-count SCEV associated with the
1718  // loop. The loop would otherwise not be deleted even if it becomes empty.
1719  SE->forgetLoop(CurLoop);
1720 }
1721 
1722 void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
1723  Instruction *CntInst,
1724  PHINode *CntPhi, Value *Var) {
1725  BasicBlock *PreHead = CurLoop->getLoopPreheader();
1726  auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
1727  const DebugLoc &DL = CntInst->getDebugLoc();
1728 
1729  // Assuming before transformation, the loop is following:
1730  // if (x) // the precondition
1731  // do { cnt++; x &= x - 1; } while(x);
1732 
1733  // Step 1: Insert the ctpop instruction at the end of the precondition block
1734  IRBuilder<> Builder(PreCondBr);
1735  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
1736  {
1737  PopCnt = createPopcntIntrinsic(Builder, Var, DL);
1738  NewCount = PopCntZext =
1739  Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
1740 
1741  if (NewCount != PopCnt)
1742  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1743 
1744  // TripCnt is exactly the number of iterations the loop has
1745  TripCnt = NewCount;
1746 
1747  // If the population counter's initial value is not zero, insert Add Inst.
1748  Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
1749  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1750  if (!InitConst || !InitConst->isZero()) {
1751  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1752  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1753  }
1754  }
1755 
1756  // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
1757  // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
1758  // function would be partial dead code, and downstream passes will drag
1759  // it back from the precondition block to the preheader.
1760  {
1761  ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
1762 
1763  Value *Opnd0 = PopCntZext;
1764  Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
1765  if (PreCond->getOperand(0) != Var)
1766  std::swap(Opnd0, Opnd1);
1767 
1768  ICmpInst *NewPreCond = cast<ICmpInst>(
1769  Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
1770  PreCondBr->setCondition(NewPreCond);
1771 
1773  }
1774 
1775  // Step 3: Note that the population count is exactly the trip count of the
1776  // loop in question, which enable us to convert the loop from noncountable
1777  // loop into a countable one. The benefit is twofold:
1778  //
1779  // - If the loop only counts population, the entire loop becomes dead after
1780  // the transformation. It is a lot easier to prove a countable loop dead
1781  // than to prove a noncountable one. (In some C dialects, an infinite loop
1782  // isn't dead even if it computes nothing useful. In general, DCE needs
1783  // to prove a noncountable loop finite before safely delete it.)
1784  //
1785  // - If the loop also performs something else, it remains alive.
1786  // Since it is transformed to countable form, it can be aggressively
1787  // optimized by some optimizations which are in general not applicable
1788  // to a noncountable loop.
1789  //
1790  // After this step, this loop (conceptually) would look like following:
1791  // newcnt = __builtin_ctpop(x);
1792  // t = newcnt;
1793  // if (x)
1794  // do { cnt++; x &= x-1; t--) } while (t > 0);
1795  BasicBlock *Body = *(CurLoop->block_begin());
1796  {
1797  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1798  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1799  Type *Ty = TripCnt->getType();
1800 
1801  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1802 
1803  Builder.SetInsertPoint(LbCond);
1804  Instruction *TcDec = cast<Instruction>(
1805  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1806  "tcdec", false, true));
1807 
1808  TcPhi->addIncoming(TripCnt, PreHead);
1809  TcPhi->addIncoming(TcDec, Body);
1810 
1811  CmpInst::Predicate Pred =
1812  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
1813  LbCond->setPredicate(Pred);
1814  LbCond->setOperand(0, TcDec);
1815  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1816  }
1817 
1818  // Step 4: All the references to the original population counter outside
1819  // the loop are replaced with the NewCount -- the value returned from
1820  // __builtin_ctpop().
1821  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1822 
1823  // step 5: Forget the "non-computable" trip-count SCEV associated with the
1824  // loop. The loop would otherwise not be deleted even if it becomes empty.
1825  SE->forgetLoop(CurLoop);
1826 }
The access may reference and may modify the value stored in memory.
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:49
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:616
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2212
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1888
const SCEV * getConstant(ConstantInt *V)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:402
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
ArrayRef< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:158
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static constexpr LocationSize unknown()
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
bool isUnordered() const
Definition: Instructions.h:403
void push_back(const T &Elt)
Definition: SmallVector.h:211
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
Value * getValue() const
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:733
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:606
An instruction for reading from memory.
Definition: Instructions.h:167
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero...
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
void setAlignment(unsigned Align)
FIXME: Remove this setter once the migration to MaybeAlign is over.
Definition: Globals.cpp:116
Value * getLength() const
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
static Constant * get(ArrayType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:1014
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
Definition: MustExecute.h:104
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:140
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:231
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:928
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1118
unsigned getDestAlignment() const
This file implements a class to represent arbitrary precision integral constant values and operations...
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:105
static bool isSimple(Instruction *I)
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:181
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
This node represents a polynomial recurrence on the trip count of the specified loop.
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:126
Class to represent array types.
Definition: DerivedTypes.h:403
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
bool has(LibFunc F) const
Tests whether a library function is available.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:244
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1135
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:320
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:208
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
static void deleteDeadInstruction(Instruction *I)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1079
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS. ...
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1804
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
const SCEV * getOperand(unsigned i) const
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:196
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:328
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:240
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:323
Conditional or Unconditional Branch instruction.
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:285
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
size_t size() const
Definition: BasicBlock.h:283
Diagnostic information for applied optimization remarks.
bool isUnordered() const
Definition: Instructions.h:278
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:284
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
self_iterator getIterator()
Definition: ilist_node.h:81
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:440
Used in the streaming interface as the general argument type.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
bool isVolatile() const
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Representation for a specific memory location.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:275
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:143
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, unsigned StoreSize, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:609
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
signed less or equal
Definition: InstrTypes.h:762
loop Recognize loop idioms
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
loop idiom
iterator_range< user_iterator > users()
Definition: Value.h:419
This class uses information about analyze scalars to rewrite expressions in canonical form...
Pass * createLoopIdiomPass()
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:328
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:598
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:807
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:219
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
LLVM_NODISCARD ModRefInfo intersectModRef(const ModRefInfo MRI1, const ModRefInfo MRI2)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:138
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:582
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
block_iterator block_end() const
Definition: LoopInfo.h:160
uint32_t Size
Definition: Profile.cpp:46
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2237
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, unsigned StoreSize, ScalarEvolution *SE)
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
Definition: SetVector.h:40
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction *> &IgnoredStores)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
unsigned greater than
Definition: InstrTypes.h:755
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition: Value.cpp:439
This pass exposes codegen information to IR-level passes.
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
bool isSimple() const
Definition: Instructions.h:401
This header defines various interfaces for pass management in LLVM.
bool isBigEndian() const
Definition: DataLayout.h:234
#define DEBUG_TYPE
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:45
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
block_iterator block_begin() const
Definition: LoopInfo.h:159
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count...
Value * getPointerOperand()
Definition: Instructions.h:412
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
The optimization diagnostic interface.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
This class represents a constant integer value.