LLVM  9.0.0svn
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1 //===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements an idiom recognizer that transforms simple loops into a
10 // non-loop form. In cases that this kicks in, it can be a significant
11 // performance win.
12 //
13 // If compiling for code size we avoid idiom recognition if the resulting
14 // code could be larger than the code for the original loop. One way this could
15 // happen is if the loop is not removable after idiom recognition due to the
16 // presence of non-idiom instructions. The initial implementation of the
17 // heuristics applies to idioms in multi-block loops.
18 //
19 //===----------------------------------------------------------------------===//
20 //
21 // TODO List:
22 //
23 // Future loop memory idioms to recognize:
24 // memcmp, memmove, strlen, etc.
25 // Future floating point idioms to recognize in -ffast-math mode:
26 // fpowi
27 // Future integer operation idioms to recognize:
28 // ctpop
29 //
30 // Beware that isel's default lowering for ctpop is highly inefficient for
31 // i64 and larger types when i64 is legal and the value has few bits set. It
32 // would be good to enhance isel to emit a loop for ctpop in this case.
33 //
34 // This could recognize common matrix multiplies and dot product idioms and
35 // replace them with calls to BLAS (if linked in??).
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #include "llvm/ADT/APInt.h"
40 #include "llvm/ADT/ArrayRef.h"
41 #include "llvm/ADT/DenseMap.h"
42 #include "llvm/ADT/MapVector.h"
43 #include "llvm/ADT/SetVector.h"
44 #include "llvm/ADT/SmallPtrSet.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
47 #include "llvm/ADT/StringRef.h"
50 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/Analysis/LoopPass.h"
60 #include "llvm/IR/Attributes.h"
61 #include "llvm/IR/BasicBlock.h"
62 #include "llvm/IR/Constant.h"
63 #include "llvm/IR/Constants.h"
64 #include "llvm/IR/DataLayout.h"
65 #include "llvm/IR/DebugLoc.h"
66 #include "llvm/IR/DerivedTypes.h"
67 #include "llvm/IR/Dominators.h"
68 #include "llvm/IR/GlobalValue.h"
69 #include "llvm/IR/GlobalVariable.h"
70 #include "llvm/IR/IRBuilder.h"
71 #include "llvm/IR/InstrTypes.h"
72 #include "llvm/IR/Instruction.h"
73 #include "llvm/IR/Instructions.h"
74 #include "llvm/IR/IntrinsicInst.h"
75 #include "llvm/IR/Intrinsics.h"
76 #include "llvm/IR/LLVMContext.h"
77 #include "llvm/IR/Module.h"
78 #include "llvm/IR/PassManager.h"
79 #include "llvm/IR/Type.h"
80 #include "llvm/IR/User.h"
81 #include "llvm/IR/Value.h"
82 #include "llvm/IR/ValueHandle.h"
83 #include "llvm/Pass.h"
84 #include "llvm/Support/Casting.h"
86 #include "llvm/Support/Debug.h"
88 #include "llvm/Transforms/Scalar.h"
92 #include <algorithm>
93 #include <cassert>
94 #include <cstdint>
95 #include <utility>
96 #include <vector>
97 
98 using namespace llvm;
99 
100 #define DEBUG_TYPE "loop-idiom"
101 
102 STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
103 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
104 
106  "use-lir-code-size-heurs",
107  cl::desc("Use loop idiom recognition code size heuristics when compiling"
108  "with -Os/-Oz"),
109  cl::init(true), cl::Hidden);
110 
111 namespace {
112 
113 class LoopIdiomRecognize {
114  Loop *CurLoop = nullptr;
115  AliasAnalysis *AA;
116  DominatorTree *DT;
117  LoopInfo *LI;
118  ScalarEvolution *SE;
119  TargetLibraryInfo *TLI;
120  const TargetTransformInfo *TTI;
121  const DataLayout *DL;
122  bool ApplyCodeSizeHeuristics;
123 
124 public:
125  explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
126  LoopInfo *LI, ScalarEvolution *SE,
127  TargetLibraryInfo *TLI,
128  const TargetTransformInfo *TTI,
129  const DataLayout *DL)
130  : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {}
131 
132  bool runOnLoop(Loop *L);
133 
134 private:
135  using StoreList = SmallVector<StoreInst *, 8>;
136  using StoreListMap = MapVector<Value *, StoreList>;
137 
138  StoreListMap StoreRefsForMemset;
139  StoreListMap StoreRefsForMemsetPattern;
140  StoreList StoreRefsForMemcpy;
141  bool HasMemset;
142  bool HasMemsetPattern;
143  bool HasMemcpy;
144 
145  /// Return code for isLegalStore()
146  enum LegalStoreKind {
147  None = 0,
148  Memset,
149  MemsetPattern,
150  Memcpy,
151  UnorderedAtomicMemcpy,
152  DontUse // Dummy retval never to be used. Allows catching errors in retval
153  // handling.
154  };
155 
156  /// \name Countable Loop Idiom Handling
157  /// @{
158 
159  bool runOnCountableLoop();
160  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
161  SmallVectorImpl<BasicBlock *> &ExitBlocks);
162 
163  void collectStores(BasicBlock *BB);
164  LegalStoreKind isLegalStore(StoreInst *SI);
165  enum class ForMemset { No, Yes };
166  bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
167  ForMemset For);
168  bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
169 
170  bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
171  unsigned StoreAlignment, Value *StoredVal,
172  Instruction *TheStore,
174  const SCEVAddRecExpr *Ev, const SCEV *BECount,
175  bool NegStride, bool IsLoopMemset = false);
176  bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
177  bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
178  bool IsLoopMemset = false);
179 
180  /// @}
181  /// \name Noncountable Loop Idiom Handling
182  /// @{
183 
184  bool runOnNoncountableLoop();
185 
186  bool recognizePopcount();
187  void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
188  PHINode *CntPhi, Value *Var);
189  bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
190  void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
191  Instruction *CntInst, PHINode *CntPhi,
192  Value *Var, Instruction *DefX,
193  const DebugLoc &DL, bool ZeroCheck,
194  bool IsCntPhiUsedOutsideLoop);
195 
196  /// @}
197 };
198 
199 class LoopIdiomRecognizeLegacyPass : public LoopPass {
200 public:
201  static char ID;
202 
203  explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
206  }
207 
208  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
209  if (skipLoop(L))
210  return false;
211 
212  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
213  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
214  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
215  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
216  TargetLibraryInfo *TLI =
217  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
218  const TargetTransformInfo *TTI =
219  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
220  *L->getHeader()->getParent());
221  const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
222 
223  LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
224  return LIR.runOnLoop(L);
225  }
226 
227  /// This transformation requires natural loop information & requires that
228  /// loop preheaders be inserted into the CFG.
229  void getAnalysisUsage(AnalysisUsage &AU) const override {
233  }
234 };
235 
236 } // end anonymous namespace
237 
239 
242  LPMUpdater &) {
243  const auto *DL = &L.getHeader()->getModule()->getDataLayout();
244 
245  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
246  if (!LIR.runOnLoop(&L))
247  return PreservedAnalyses::all();
248 
250 }
251 
252 INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
253  "Recognize loop idioms", false, false)
257 INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
258  "Recognize loop idioms", false, false)
259 
260 Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
261 
264  I->eraseFromParent();
265 }
266 
267 //===----------------------------------------------------------------------===//
268 //
269 // Implementation of LoopIdiomRecognize
270 //
271 //===----------------------------------------------------------------------===//
272 
273 bool LoopIdiomRecognize::runOnLoop(Loop *L) {
274  CurLoop = L;
275  // If the loop could not be converted to canonical form, it must have an
276  // indirectbr in it, just give up.
277  if (!L->getLoopPreheader())
278  return false;
279 
280  // Disable loop idiom recognition if the function's name is a common idiom.
282  if (Name == "memset" || Name == "memcpy")
283  return false;
284 
285  // Determine if code size heuristics need to be applied.
286  ApplyCodeSizeHeuristics =
288 
289  HasMemset = TLI->has(LibFunc_memset);
290  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
291  HasMemcpy = TLI->has(LibFunc_memcpy);
292 
293  if (HasMemset || HasMemsetPattern || HasMemcpy)
295  return runOnCountableLoop();
296 
297  return runOnNoncountableLoop();
298 }
299 
300 bool LoopIdiomRecognize::runOnCountableLoop() {
301  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
302  assert(!isa<SCEVCouldNotCompute>(BECount) &&
303  "runOnCountableLoop() called on a loop without a predictable"
304  "backedge-taken count");
305 
306  // If this loop executes exactly one time, then it should be peeled, not
307  // optimized by this pass.
308  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
309  if (BECst->getAPInt() == 0)
310  return false;
311 
312  SmallVector<BasicBlock *, 8> ExitBlocks;
313  CurLoop->getUniqueExitBlocks(ExitBlocks);
314 
315  LLVM_DEBUG(dbgs() << "loop-idiom Scanning: F["
316  << CurLoop->getHeader()->getParent()->getName()
317  << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
318 
319  bool MadeChange = false;
320 
321  // The following transforms hoist stores/memsets into the loop pre-header.
322  // Give up if the loop has instructions may throw.
323  SimpleLoopSafetyInfo SafetyInfo;
324  SafetyInfo.computeLoopSafetyInfo(CurLoop);
325  if (SafetyInfo.anyBlockMayThrow())
326  return MadeChange;
327 
328  // Scan all the blocks in the loop that are not in subloops.
329  for (auto *BB : CurLoop->getBlocks()) {
330  // Ignore blocks in subloops.
331  if (LI->getLoopFor(BB) != CurLoop)
332  continue;
333 
334  MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
335  }
336  return MadeChange;
337 }
338 
339 static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
340  const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
341  return ConstStride->getAPInt();
342 }
343 
344 /// getMemSetPatternValue - If a strided store of the specified value is safe to
345 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
346 /// be passed in. Otherwise, return null.
347 ///
348 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
349 /// just replicate their input array and then pass on to memset_pattern16.
351  // FIXME: This could check for UndefValue because it can be merged into any
352  // other valid pattern.
353 
354  // If the value isn't a constant, we can't promote it to being in a constant
355  // array. We could theoretically do a store to an alloca or something, but
356  // that doesn't seem worthwhile.
357  Constant *C = dyn_cast<Constant>(V);
358  if (!C)
359  return nullptr;
360 
361  // Only handle simple values that are a power of two bytes in size.
362  uint64_t Size = DL->getTypeSizeInBits(V->getType());
363  if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
364  return nullptr;
365 
366  // Don't care enough about darwin/ppc to implement this.
367  if (DL->isBigEndian())
368  return nullptr;
369 
370  // Convert to size in bytes.
371  Size /= 8;
372 
373  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
374  // if the top and bottom are the same (e.g. for vectors and large integers).
375  if (Size > 16)
376  return nullptr;
377 
378  // If the constant is exactly 16 bytes, just use it.
379  if (Size == 16)
380  return C;
381 
382  // Otherwise, we'll use an array of the constants.
383  unsigned ArraySize = 16 / Size;
384  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
385  return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
386 }
387 
388 LoopIdiomRecognize::LegalStoreKind
389 LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
390  // Don't touch volatile stores.
391  if (SI->isVolatile())
392  return LegalStoreKind::None;
393  // We only want simple or unordered-atomic stores.
394  if (!SI->isUnordered())
395  return LegalStoreKind::None;
396 
397  // Don't convert stores of non-integral pointer types to memsets (which stores
398  // integers).
399  if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
400  return LegalStoreKind::None;
401 
402  // Avoid merging nontemporal stores.
404  return LegalStoreKind::None;
405 
406  Value *StoredVal = SI->getValueOperand();
407  Value *StorePtr = SI->getPointerOperand();
408 
409  // Reject stores that are so large that they overflow an unsigned.
410  uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
411  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
412  return LegalStoreKind::None;
413 
414  // See if the pointer expression is an AddRec like {base,+,1} on the current
415  // loop, which indicates a strided store. If we have something else, it's a
416  // random store we can't handle.
417  const SCEVAddRecExpr *StoreEv =
418  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
419  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
420  return LegalStoreKind::None;
421 
422  // Check to see if we have a constant stride.
423  if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
424  return LegalStoreKind::None;
425 
426  // See if the store can be turned into a memset.
427 
428  // If the stored value is a byte-wise value (like i32 -1), then it may be
429  // turned into a memset of i8 -1, assuming that all the consecutive bytes
430  // are stored. A store of i32 0x01020304 can never be turned into a memset,
431  // but it can be turned into memset_pattern if the target supports it.
432  Value *SplatValue = isBytewiseValue(StoredVal);
433  Constant *PatternValue = nullptr;
434 
435  // Note: memset and memset_pattern on unordered-atomic is yet not supported
436  bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
437 
438  // If we're allowed to form a memset, and the stored value would be
439  // acceptable for memset, use it.
440  if (!UnorderedAtomic && HasMemset && SplatValue &&
441  // Verify that the stored value is loop invariant. If not, we can't
442  // promote the memset.
443  CurLoop->isLoopInvariant(SplatValue)) {
444  // It looks like we can use SplatValue.
445  return LegalStoreKind::Memset;
446  } else if (!UnorderedAtomic && HasMemsetPattern &&
447  // Don't create memset_pattern16s with address spaces.
448  StorePtr->getType()->getPointerAddressSpace() == 0 &&
449  (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
450  // It looks like we can use PatternValue!
451  return LegalStoreKind::MemsetPattern;
452  }
453 
454  // Otherwise, see if the store can be turned into a memcpy.
455  if (HasMemcpy) {
456  // Check to see if the stride matches the size of the store. If so, then we
457  // know that every byte is touched in the loop.
458  APInt Stride = getStoreStride(StoreEv);
459  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
460  if (StoreSize != Stride && StoreSize != -Stride)
461  return LegalStoreKind::None;
462 
463  // The store must be feeding a non-volatile load.
465 
466  // Only allow non-volatile loads
467  if (!LI || LI->isVolatile())
468  return LegalStoreKind::None;
469  // Only allow simple or unordered-atomic loads
470  if (!LI->isUnordered())
471  return LegalStoreKind::None;
472 
473  // See if the pointer expression is an AddRec like {base,+,1} on the current
474  // loop, which indicates a strided load. If we have something else, it's a
475  // random load we can't handle.
476  const SCEVAddRecExpr *LoadEv =
478  if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
479  return LegalStoreKind::None;
480 
481  // The store and load must share the same stride.
482  if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
483  return LegalStoreKind::None;
484 
485  // Success. This store can be converted into a memcpy.
486  UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
487  return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
488  : LegalStoreKind::Memcpy;
489  }
490  // This store can't be transformed into a memset/memcpy.
491  return LegalStoreKind::None;
492 }
493 
494 void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
495  StoreRefsForMemset.clear();
496  StoreRefsForMemsetPattern.clear();
497  StoreRefsForMemcpy.clear();
498  for (Instruction &I : *BB) {
499  StoreInst *SI = dyn_cast<StoreInst>(&I);
500  if (!SI)
501  continue;
502 
503  // Make sure this is a strided store with a constant stride.
504  switch (isLegalStore(SI)) {
506  // Nothing to do
507  break;
508  case LegalStoreKind::Memset: {
509  // Find the base pointer.
510  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
511  StoreRefsForMemset[Ptr].push_back(SI);
512  } break;
513  case LegalStoreKind::MemsetPattern: {
514  // Find the base pointer.
515  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
516  StoreRefsForMemsetPattern[Ptr].push_back(SI);
517  } break;
518  case LegalStoreKind::Memcpy:
519  case LegalStoreKind::UnorderedAtomicMemcpy:
520  StoreRefsForMemcpy.push_back(SI);
521  break;
522  default:
523  assert(false && "unhandled return value");
524  break;
525  }
526  }
527 }
528 
529 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
530 /// with the specified backedge count. This block is known to be in the current
531 /// loop and not in any subloops.
532 bool LoopIdiomRecognize::runOnLoopBlock(
533  BasicBlock *BB, const SCEV *BECount,
534  SmallVectorImpl<BasicBlock *> &ExitBlocks) {
535  // We can only promote stores in this block if they are unconditionally
536  // executed in the loop. For a block to be unconditionally executed, it has
537  // to dominate all the exit blocks of the loop. Verify this now.
538  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
539  if (!DT->dominates(BB, ExitBlocks[i]))
540  return false;
541 
542  bool MadeChange = false;
543  // Look for store instructions, which may be optimized to memset/memcpy.
544  collectStores(BB);
545 
546  // Look for a single store or sets of stores with a common base, which can be
547  // optimized into a memset (memset_pattern). The latter most commonly happens
548  // with structs and handunrolled loops.
549  for (auto &SL : StoreRefsForMemset)
550  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
551 
552  for (auto &SL : StoreRefsForMemsetPattern)
553  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
554 
555  // Optimize the store into a memcpy, if it feeds an similarly strided load.
556  for (auto &SI : StoreRefsForMemcpy)
557  MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
558 
559  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
560  Instruction *Inst = &*I++;
561  // Look for memset instructions, which may be optimized to a larger memset.
562  if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
563  WeakTrackingVH InstPtr(&*I);
564  if (!processLoopMemSet(MSI, BECount))
565  continue;
566  MadeChange = true;
567 
568  // If processing the memset invalidated our iterator, start over from the
569  // top of the block.
570  if (!InstPtr)
571  I = BB->begin();
572  continue;
573  }
574  }
575 
576  return MadeChange;
577 }
578 
579 /// See if this store(s) can be promoted to a memset.
580 bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
581  const SCEV *BECount, ForMemset For) {
582  // Try to find consecutive stores that can be transformed into memsets.
583  SetVector<StoreInst *> Heads, Tails;
585 
586  // Do a quadratic search on all of the given stores and find
587  // all of the pairs of stores that follow each other.
588  SmallVector<unsigned, 16> IndexQueue;
589  for (unsigned i = 0, e = SL.size(); i < e; ++i) {
590  assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
591 
592  Value *FirstStoredVal = SL[i]->getValueOperand();
593  Value *FirstStorePtr = SL[i]->getPointerOperand();
594  const SCEVAddRecExpr *FirstStoreEv =
595  cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
596  APInt FirstStride = getStoreStride(FirstStoreEv);
597  unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
598 
599  // See if we can optimize just this store in isolation.
600  if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
601  Heads.insert(SL[i]);
602  continue;
603  }
604 
605  Value *FirstSplatValue = nullptr;
606  Constant *FirstPatternValue = nullptr;
607 
608  if (For == ForMemset::Yes)
609  FirstSplatValue = isBytewiseValue(FirstStoredVal);
610  else
611  FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
612 
613  assert((FirstSplatValue || FirstPatternValue) &&
614  "Expected either splat value or pattern value.");
615 
616  IndexQueue.clear();
617  // If a store has multiple consecutive store candidates, search Stores
618  // array according to the sequence: from i+1 to e, then from i-1 to 0.
619  // This is because usually pairing with immediate succeeding or preceding
620  // candidate create the best chance to find memset opportunity.
621  unsigned j = 0;
622  for (j = i + 1; j < e; ++j)
623  IndexQueue.push_back(j);
624  for (j = i; j > 0; --j)
625  IndexQueue.push_back(j - 1);
626 
627  for (auto &k : IndexQueue) {
628  assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
629  Value *SecondStorePtr = SL[k]->getPointerOperand();
630  const SCEVAddRecExpr *SecondStoreEv =
631  cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
632  APInt SecondStride = getStoreStride(SecondStoreEv);
633 
634  if (FirstStride != SecondStride)
635  continue;
636 
637  Value *SecondStoredVal = SL[k]->getValueOperand();
638  Value *SecondSplatValue = nullptr;
639  Constant *SecondPatternValue = nullptr;
640 
641  if (For == ForMemset::Yes)
642  SecondSplatValue = isBytewiseValue(SecondStoredVal);
643  else
644  SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
645 
646  assert((SecondSplatValue || SecondPatternValue) &&
647  "Expected either splat value or pattern value.");
648 
649  if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
650  if (For == ForMemset::Yes) {
651  if (isa<UndefValue>(FirstSplatValue))
652  FirstSplatValue = SecondSplatValue;
653  if (FirstSplatValue != SecondSplatValue)
654  continue;
655  } else {
656  if (isa<UndefValue>(FirstPatternValue))
657  FirstPatternValue = SecondPatternValue;
658  if (FirstPatternValue != SecondPatternValue)
659  continue;
660  }
661  Tails.insert(SL[k]);
662  Heads.insert(SL[i]);
663  ConsecutiveChain[SL[i]] = SL[k];
664  break;
665  }
666  }
667  }
668 
669  // We may run into multiple chains that merge into a single chain. We mark the
670  // stores that we transformed so that we don't visit the same store twice.
671  SmallPtrSet<Value *, 16> TransformedStores;
672  bool Changed = false;
673 
674  // For stores that start but don't end a link in the chain:
675  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
676  it != e; ++it) {
677  if (Tails.count(*it))
678  continue;
679 
680  // We found a store instr that starts a chain. Now follow the chain and try
681  // to transform it.
682  SmallPtrSet<Instruction *, 8> AdjacentStores;
683  StoreInst *I = *it;
684 
685  StoreInst *HeadStore = I;
686  unsigned StoreSize = 0;
687 
688  // Collect the chain into a list.
689  while (Tails.count(I) || Heads.count(I)) {
690  if (TransformedStores.count(I))
691  break;
692  AdjacentStores.insert(I);
693 
694  StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
695  // Move to the next value in the chain.
696  I = ConsecutiveChain[I];
697  }
698 
699  Value *StoredVal = HeadStore->getValueOperand();
700  Value *StorePtr = HeadStore->getPointerOperand();
701  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
702  APInt Stride = getStoreStride(StoreEv);
703 
704  // Check to see if the stride matches the size of the stores. If so, then
705  // we know that every byte is touched in the loop.
706  if (StoreSize != Stride && StoreSize != -Stride)
707  continue;
708 
709  bool NegStride = StoreSize == -Stride;
710 
711  if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
712  StoredVal, HeadStore, AdjacentStores, StoreEv,
713  BECount, NegStride)) {
714  TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
715  Changed = true;
716  }
717  }
718 
719  return Changed;
720 }
721 
722 /// processLoopMemSet - See if this memset can be promoted to a large memset.
723 bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
724  const SCEV *BECount) {
725  // We can only handle non-volatile memsets with a constant size.
726  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
727  return false;
728 
729  // If we're not allowed to hack on memset, we fail.
730  if (!HasMemset)
731  return false;
732 
733  Value *Pointer = MSI->getDest();
734 
735  // See if the pointer expression is an AddRec like {base,+,1} on the current
736  // loop, which indicates a strided store. If we have something else, it's a
737  // random store we can't handle.
738  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
739  if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
740  return false;
741 
742  // Reject memsets that are so large that they overflow an unsigned.
743  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
744  if ((SizeInBytes >> 32) != 0)
745  return false;
746 
747  // Check to see if the stride matches the size of the memset. If so, then we
748  // know that every byte is touched in the loop.
749  const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
750  if (!ConstStride)
751  return false;
752 
753  APInt Stride = ConstStride->getAPInt();
754  if (SizeInBytes != Stride && SizeInBytes != -Stride)
755  return false;
756 
757  // Verify that the memset value is loop invariant. If not, we can't promote
758  // the memset.
759  Value *SplatValue = MSI->getValue();
760  if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
761  return false;
762 
764  MSIs.insert(MSI);
765  bool NegStride = SizeInBytes == -Stride;
766  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
767  MSI->getDestAlignment(), SplatValue, MSI, MSIs,
768  Ev, BECount, NegStride, /*IsLoopMemset=*/true);
769 }
770 
771 /// mayLoopAccessLocation - Return true if the specified loop might access the
772 /// specified pointer location, which is a loop-strided access. The 'Access'
773 /// argument specifies what the verboten forms of access are (read or write).
774 static bool
776  const SCEV *BECount, unsigned StoreSize,
777  AliasAnalysis &AA,
778  SmallPtrSetImpl<Instruction *> &IgnoredStores) {
779  // Get the location that may be stored across the loop. Since the access is
780  // strided positively through memory, we say that the modified location starts
781  // at the pointer and has infinite size.
782  LocationSize AccessSize = LocationSize::unknown();
783 
784  // If the loop iterates a fixed number of times, we can refine the access size
785  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
786  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
787  AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
788  StoreSize);
789 
790  // TODO: For this to be really effective, we have to dive into the pointer
791  // operand in the store. Store to &A[i] of 100 will always return may alias
792  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
793  // which will then no-alias a store to &A[100].
794  MemoryLocation StoreLoc(Ptr, AccessSize);
795 
796  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
797  ++BI)
798  for (Instruction &I : **BI)
799  if (IgnoredStores.count(&I) == 0 &&
801  intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
802  return true;
803 
804  return false;
805 }
806 
807 // If we have a negative stride, Start refers to the end of the memory location
808 // we're trying to memset. Therefore, we need to recompute the base pointer,
809 // which is just Start - BECount*Size.
810 static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
811  Type *IntPtr, unsigned StoreSize,
812  ScalarEvolution *SE) {
813  const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
814  if (StoreSize != 1)
815  Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
816  SCEV::FlagNUW);
817  return SE->getMinusSCEV(Start, Index);
818 }
819 
820 /// Compute the number of bytes as a SCEV from the backedge taken count.
821 ///
822 /// This also maps the SCEV into the provided type and tries to handle the
823 /// computation in a way that will fold cleanly.
824 static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
825  unsigned StoreSize, Loop *CurLoop,
826  const DataLayout *DL, ScalarEvolution *SE) {
827  const SCEV *NumBytesS;
828  // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
829  // pointer size if it isn't already.
830  //
831  // If we're going to need to zero extend the BE count, check if we can add
832  // one to it prior to zero extending without overflow. Provided this is safe,
833  // it allows better simplification of the +1.
834  if (DL->getTypeSizeInBits(BECount->getType()) <
835  DL->getTypeSizeInBits(IntPtr) &&
837  CurLoop, ICmpInst::ICMP_NE, BECount,
838  SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
839  NumBytesS = SE->getZeroExtendExpr(
840  SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
841  IntPtr);
842  } else {
843  NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
844  SE->getOne(IntPtr), SCEV::FlagNUW);
845  }
846 
847  // And scale it based on the store size.
848  if (StoreSize != 1) {
849  NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
850  SCEV::FlagNUW);
851  }
852  return NumBytesS;
853 }
854 
855 /// processLoopStridedStore - We see a strided store of some value. If we can
856 /// transform this into a memset or memset_pattern in the loop preheader, do so.
857 bool LoopIdiomRecognize::processLoopStridedStore(
858  Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
859  Value *StoredVal, Instruction *TheStore,
861  const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
862  Value *SplatValue = isBytewiseValue(StoredVal);
863  Constant *PatternValue = nullptr;
864 
865  if (!SplatValue)
866  PatternValue = getMemSetPatternValue(StoredVal, DL);
867 
868  assert((SplatValue || PatternValue) &&
869  "Expected either splat value or pattern value.");
870 
871  // The trip count of the loop and the base pointer of the addrec SCEV is
872  // guaranteed to be loop invariant, which means that it should dominate the
873  // header. This allows us to insert code for it in the preheader.
874  unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
875  BasicBlock *Preheader = CurLoop->getLoopPreheader();
876  IRBuilder<> Builder(Preheader->getTerminator());
877  SCEVExpander Expander(*SE, *DL, "loop-idiom");
878 
879  Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
880  Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
881 
882  const SCEV *Start = Ev->getStart();
883  // Handle negative strided loops.
884  if (NegStride)
885  Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
886 
887  // TODO: ideally we should still be able to generate memset if SCEV expander
888  // is taught to generate the dependencies at the latest point.
889  if (!isSafeToExpand(Start, *SE))
890  return false;
891 
892  // Okay, we have a strided store "p[i]" of a splattable value. We can turn
893  // this into a memset in the loop preheader now if we want. However, this
894  // would be unsafe to do if there is anything else in the loop that may read
895  // or write to the aliased location. Check for any overlap by generating the
896  // base pointer and checking the region.
897  Value *BasePtr =
898  Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
899  if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
900  StoreSize, *AA, Stores)) {
901  Expander.clear();
902  // If we generated new code for the base pointer, clean up.
904  return false;
905  }
906 
907  if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
908  return false;
909 
910  // Okay, everything looks good, insert the memset.
911 
912  const SCEV *NumBytesS =
913  getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE);
914 
915  // TODO: ideally we should still be able to generate memset if SCEV expander
916  // is taught to generate the dependencies at the latest point.
917  if (!isSafeToExpand(NumBytesS, *SE))
918  return false;
919 
920  Value *NumBytes =
921  Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
922 
923  CallInst *NewCall;
924  if (SplatValue) {
925  NewCall =
926  Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
927  } else {
928  // Everything is emitted in default address space
929  Type *Int8PtrTy = DestInt8PtrTy;
930 
931  Module *M = TheStore->getModule();
932  StringRef FuncName = "memset_pattern16";
933  FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
934  Int8PtrTy, Int8PtrTy, IntPtr);
935  inferLibFuncAttributes(M, FuncName, *TLI);
936 
937  // Otherwise we should form a memset_pattern16. PatternValue is known to be
938  // an constant array of 16-bytes. Plop the value into a mergable global.
939  GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
941  PatternValue, ".memset_pattern");
942  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
943  GV->setAlignment(16);
944  Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
945  NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
946  }
947 
948  LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
949  << " from store to: " << *Ev << " at: " << *TheStore
950  << "\n");
951  NewCall->setDebugLoc(TheStore->getDebugLoc());
952 
953  // Okay, the memset has been formed. Zap the original store and anything that
954  // feeds into it.
955  for (auto *I : Stores)
957  ++NumMemSet;
958  return true;
959 }
960 
961 /// If the stored value is a strided load in the same loop with the same stride
962 /// this may be transformable into a memcpy. This kicks in for stuff like
963 /// for (i) A[i] = B[i];
964 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
965  const SCEV *BECount) {
966  assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
967 
968  Value *StorePtr = SI->getPointerOperand();
969  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
970  APInt Stride = getStoreStride(StoreEv);
971  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
972  bool NegStride = StoreSize == -Stride;
973 
974  // The store must be feeding a non-volatile load.
975  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
976  assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
977 
978  // See if the pointer expression is an AddRec like {base,+,1} on the current
979  // loop, which indicates a strided load. If we have something else, it's a
980  // random load we can't handle.
981  const SCEVAddRecExpr *LoadEv =
982  cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
983 
984  // The trip count of the loop and the base pointer of the addrec SCEV is
985  // guaranteed to be loop invariant, which means that it should dominate the
986  // header. This allows us to insert code for it in the preheader.
987  BasicBlock *Preheader = CurLoop->getLoopPreheader();
988  IRBuilder<> Builder(Preheader->getTerminator());
989  SCEVExpander Expander(*SE, *DL, "loop-idiom");
990 
991  const SCEV *StrStart = StoreEv->getStart();
992  unsigned StrAS = SI->getPointerAddressSpace();
993  Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
994 
995  // Handle negative strided loops.
996  if (NegStride)
997  StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
998 
999  // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1000  // this into a memcpy in the loop preheader now if we want. However, this
1001  // would be unsafe to do if there is anything else in the loop that may read
1002  // or write the memory region we're storing to. This includes the load that
1003  // feeds the stores. Check for an alias by generating the base address and
1004  // checking everything.
1005  Value *StoreBasePtr = Expander.expandCodeFor(
1006  StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
1007 
1009  Stores.insert(SI);
1010  if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1011  StoreSize, *AA, Stores)) {
1012  Expander.clear();
1013  // If we generated new code for the base pointer, clean up.
1014  RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1015  return false;
1016  }
1017 
1018  const SCEV *LdStart = LoadEv->getStart();
1019  unsigned LdAS = LI->getPointerAddressSpace();
1020 
1021  // Handle negative strided loops.
1022  if (NegStride)
1023  LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
1024 
1025  // For a memcpy, we have to make sure that the input array is not being
1026  // mutated by the loop.
1027  Value *LoadBasePtr = Expander.expandCodeFor(
1028  LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
1029 
1030  if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1031  StoreSize, *AA, Stores)) {
1032  Expander.clear();
1033  // If we generated new code for the base pointer, clean up.
1035  RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1036  return false;
1037  }
1038 
1039  if (avoidLIRForMultiBlockLoop())
1040  return false;
1041 
1042  // Okay, everything is safe, we can transform this!
1043 
1044  const SCEV *NumBytesS =
1045  getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE);
1046 
1047  Value *NumBytes =
1048  Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
1049 
1050  CallInst *NewCall = nullptr;
1051  // Check whether to generate an unordered atomic memcpy:
1052  // If the load or store are atomic, then they must necessarily be unordered
1053  // by previous checks.
1054  if (!SI->isAtomic() && !LI->isAtomic())
1055  NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
1056  LoadBasePtr, LI->getAlignment(), NumBytes);
1057  else {
1058  // We cannot allow unaligned ops for unordered load/store, so reject
1059  // anything where the alignment isn't at least the element size.
1060  unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
1061  if (Align < StoreSize)
1062  return false;
1063 
1064  // If the element.atomic memcpy is not lowered into explicit
1065  // loads/stores later, then it will be lowered into an element-size
1066  // specific lib call. If the lib call doesn't exist for our store size, then
1067  // we shouldn't generate the memcpy.
1068  if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1069  return false;
1070 
1071  // Create the call.
1072  // Note that unordered atomic loads/stores are *required* by the spec to
1073  // have an alignment but non-atomic loads/stores may not.
1074  NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1075  StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(),
1076  NumBytes, StoreSize);
1077  }
1078  NewCall->setDebugLoc(SI->getDebugLoc());
1079 
1080  LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
1081  << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
1082  << " from store ptr=" << *StoreEv << " at: " << *SI
1083  << "\n");
1084 
1085  // Okay, the memcpy has been formed. Zap the original store and anything that
1086  // feeds into it.
1088  ++NumMemCpy;
1089  return true;
1090 }
1091 
1092 // When compiling for codesize we avoid idiom recognition for a multi-block loop
1093 // unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1094 //
1095 bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1096  bool IsLoopMemset) {
1097  if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1098  if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
1099  LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1100  << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1101  << " avoided: multi-block top-level loop\n");
1102  return true;
1103  }
1104  }
1105 
1106  return false;
1107 }
1108 
1109 bool LoopIdiomRecognize::runOnNoncountableLoop() {
1110  return recognizePopcount() || recognizeAndInsertFFS();
1111 }
1112 
1113 /// Check if the given conditional branch is based on the comparison between
1114 /// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1115 /// true), the control yields to the loop entry. If the branch matches the
1116 /// behavior, the variable involved in the comparison is returned. This function
1117 /// will be called to see if the precondition and postcondition of the loop are
1118 /// in desirable form.
1119 static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1120  bool JmpOnZero = false) {
1121  if (!BI || !BI->isConditional())
1122  return nullptr;
1123 
1124  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1125  if (!Cond)
1126  return nullptr;
1127 
1128  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1129  if (!CmpZero || !CmpZero->isZero())
1130  return nullptr;
1131 
1132  BasicBlock *TrueSucc = BI->getSuccessor(0);
1133  BasicBlock *FalseSucc = BI->getSuccessor(1);
1134  if (JmpOnZero)
1135  std::swap(TrueSucc, FalseSucc);
1136 
1137  ICmpInst::Predicate Pred = Cond->getPredicate();
1138  if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1139  (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1140  return Cond->getOperand(0);
1141 
1142  return nullptr;
1143 }
1144 
1145 // Check if the recurrence variable `VarX` is in the right form to create
1146 // the idiom. Returns the value coerced to a PHINode if so.
1148  BasicBlock *LoopEntry) {
1149  auto *PhiX = dyn_cast<PHINode>(VarX);
1150  if (PhiX && PhiX->getParent() == LoopEntry &&
1151  (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1152  return PhiX;
1153  return nullptr;
1154 }
1155 
1156 /// Return true iff the idiom is detected in the loop.
1157 ///
1158 /// Additionally:
1159 /// 1) \p CntInst is set to the instruction counting the population bit.
1160 /// 2) \p CntPhi is set to the corresponding phi node.
1161 /// 3) \p Var is set to the value whose population bits are being counted.
1162 ///
1163 /// The core idiom we are trying to detect is:
1164 /// \code
1165 /// if (x0 != 0)
1166 /// goto loop-exit // the precondition of the loop
1167 /// cnt0 = init-val;
1168 /// do {
1169 /// x1 = phi (x0, x2);
1170 /// cnt1 = phi(cnt0, cnt2);
1171 ///
1172 /// cnt2 = cnt1 + 1;
1173 /// ...
1174 /// x2 = x1 & (x1 - 1);
1175 /// ...
1176 /// } while(x != 0);
1177 ///
1178 /// loop-exit:
1179 /// \endcode
1180 static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
1181  Instruction *&CntInst, PHINode *&CntPhi,
1182  Value *&Var) {
1183  // step 1: Check to see if the look-back branch match this pattern:
1184  // "if (a!=0) goto loop-entry".
1185  BasicBlock *LoopEntry;
1186  Instruction *DefX2, *CountInst;
1187  Value *VarX1, *VarX0;
1188  PHINode *PhiX, *CountPhi;
1189 
1190  DefX2 = CountInst = nullptr;
1191  VarX1 = VarX0 = nullptr;
1192  PhiX = CountPhi = nullptr;
1193  LoopEntry = *(CurLoop->block_begin());
1194 
1195  // step 1: Check if the loop-back branch is in desirable form.
1196  {
1197  if (Value *T = matchCondition(
1198  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1199  DefX2 = dyn_cast<Instruction>(T);
1200  else
1201  return false;
1202  }
1203 
1204  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1205  {
1206  if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1207  return false;
1208 
1209  BinaryOperator *SubOneOp;
1210 
1211  if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1212  VarX1 = DefX2->getOperand(1);
1213  else {
1214  VarX1 = DefX2->getOperand(0);
1215  SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1216  }
1217  if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
1218  return false;
1219 
1220  ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1221  if (!Dec ||
1222  !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1223  (SubOneOp->getOpcode() == Instruction::Add &&
1224  Dec->isMinusOne()))) {
1225  return false;
1226  }
1227  }
1228 
1229  // step 3: Check the recurrence of variable X
1230  PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1231  if (!PhiX)
1232  return false;
1233 
1234  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1235  {
1236  CountInst = nullptr;
1237  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1238  IterE = LoopEntry->end();
1239  Iter != IterE; Iter++) {
1240  Instruction *Inst = &*Iter;
1241  if (Inst->getOpcode() != Instruction::Add)
1242  continue;
1243 
1244  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1245  if (!Inc || !Inc->isOne())
1246  continue;
1247 
1248  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1249  if (!Phi)
1250  continue;
1251 
1252  // Check if the result of the instruction is live of the loop.
1253  bool LiveOutLoop = false;
1254  for (User *U : Inst->users()) {
1255  if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1256  LiveOutLoop = true;
1257  break;
1258  }
1259  }
1260 
1261  if (LiveOutLoop) {
1262  CountInst = Inst;
1263  CountPhi = Phi;
1264  break;
1265  }
1266  }
1267 
1268  if (!CountInst)
1269  return false;
1270  }
1271 
1272  // step 5: check if the precondition is in this form:
1273  // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1274  {
1275  auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1276  Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1277  if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1278  return false;
1279 
1280  CntInst = CountInst;
1281  CntPhi = CountPhi;
1282  Var = T;
1283  }
1284 
1285  return true;
1286 }
1287 
1288 /// Return true if the idiom is detected in the loop.
1289 ///
1290 /// Additionally:
1291 /// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1292 /// or nullptr if there is no such.
1293 /// 2) \p CntPhi is set to the corresponding phi node
1294 /// or nullptr if there is no such.
1295 /// 3) \p Var is set to the value whose CTLZ could be used.
1296 /// 4) \p DefX is set to the instruction calculating Loop exit condition.
1297 ///
1298 /// The core idiom we are trying to detect is:
1299 /// \code
1300 /// if (x0 == 0)
1301 /// goto loop-exit // the precondition of the loop
1302 /// cnt0 = init-val;
1303 /// do {
1304 /// x = phi (x0, x.next); //PhiX
1305 /// cnt = phi(cnt0, cnt.next);
1306 ///
1307 /// cnt.next = cnt + 1;
1308 /// ...
1309 /// x.next = x >> 1; // DefX
1310 /// ...
1311 /// } while(x.next != 0);
1312 ///
1313 /// loop-exit:
1314 /// \endcode
1315 static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1316  Intrinsic::ID &IntrinID, Value *&InitX,
1317  Instruction *&CntInst, PHINode *&CntPhi,
1318  Instruction *&DefX) {
1319  BasicBlock *LoopEntry;
1320  Value *VarX = nullptr;
1321 
1322  DefX = nullptr;
1323  CntInst = nullptr;
1324  CntPhi = nullptr;
1325  LoopEntry = *(CurLoop->block_begin());
1326 
1327  // step 1: Check if the loop-back branch is in desirable form.
1328  if (Value *T = matchCondition(
1329  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1330  DefX = dyn_cast<Instruction>(T);
1331  else
1332  return false;
1333 
1334  // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1335  if (!DefX || !DefX->isShift())
1336  return false;
1337  IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1338  Intrinsic::ctlz;
1339  ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1340  if (!Shft || !Shft->isOne())
1341  return false;
1342  VarX = DefX->getOperand(0);
1343 
1344  // step 3: Check the recurrence of variable X
1345  PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1346  if (!PhiX)
1347  return false;
1348 
1349  InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1350 
1351  // Make sure the initial value can't be negative otherwise the ashr in the
1352  // loop might never reach zero which would make the loop infinite.
1353  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1354  return false;
1355 
1356  // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1357  // TODO: We can skip the step. If loop trip count is known (CTLZ),
1358  // then all uses of "cnt.next" could be optimized to the trip count
1359  // plus "cnt0". Currently it is not optimized.
1360  // This step could be used to detect POPCNT instruction:
1361  // cnt.next = cnt + (x.next & 1)
1362  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1363  IterE = LoopEntry->end();
1364  Iter != IterE; Iter++) {
1365  Instruction *Inst = &*Iter;
1366  if (Inst->getOpcode() != Instruction::Add)
1367  continue;
1368 
1369  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1370  if (!Inc || !Inc->isOne())
1371  continue;
1372 
1373  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1374  if (!Phi)
1375  continue;
1376 
1377  CntInst = Inst;
1378  CntPhi = Phi;
1379  break;
1380  }
1381  if (!CntInst)
1382  return false;
1383 
1384  return true;
1385 }
1386 
1387 /// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1388 /// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1389 /// trip count returns true; otherwise, returns false.
1390 bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1391  // Give up if the loop has multiple blocks or multiple backedges.
1392  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1393  return false;
1394 
1395  Intrinsic::ID IntrinID;
1396  Value *InitX;
1397  Instruction *DefX = nullptr;
1398  PHINode *CntPhi = nullptr;
1399  Instruction *CntInst = nullptr;
1400  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1401  // this is always 6.
1402  size_t IdiomCanonicalSize = 6;
1403 
1404  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
1405  CntInst, CntPhi, DefX))
1406  return false;
1407 
1408  bool IsCntPhiUsedOutsideLoop = false;
1409  for (User *U : CntPhi->users())
1410  if (!CurLoop->contains(cast<Instruction>(U))) {
1411  IsCntPhiUsedOutsideLoop = true;
1412  break;
1413  }
1414  bool IsCntInstUsedOutsideLoop = false;
1415  for (User *U : CntInst->users())
1416  if (!CurLoop->contains(cast<Instruction>(U))) {
1417  IsCntInstUsedOutsideLoop = true;
1418  break;
1419  }
1420  // If both CntInst and CntPhi are used outside the loop the profitability
1421  // is questionable.
1422  if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1423  return false;
1424 
1425  // For some CPUs result of CTLZ(X) intrinsic is undefined
1426  // when X is 0. If we can not guarantee X != 0, we need to check this
1427  // when expand.
1428  bool ZeroCheck = false;
1429  // It is safe to assume Preheader exist as it was checked in
1430  // parent function RunOnLoop.
1431  BasicBlock *PH = CurLoop->getLoopPreheader();
1432 
1433  // If we are using the count instruction outside the loop, make sure we
1434  // have a zero check as a precondition. Without the check the loop would run
1435  // one iteration for before any check of the input value. This means 0 and 1
1436  // would have identical behavior in the original loop and thus
1437  if (!IsCntPhiUsedOutsideLoop) {
1438  auto *PreCondBB = PH->getSinglePredecessor();
1439  if (!PreCondBB)
1440  return false;
1441  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1442  if (!PreCondBI)
1443  return false;
1444  if (matchCondition(PreCondBI, PH) != InitX)
1445  return false;
1446  ZeroCheck = true;
1447  }
1448 
1449  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1450  // profitable if we delete the loop.
1451 
1452  // the loop has only 6 instructions:
1453  // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1454  // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1455  // %shr = ashr %n.addr.0, 1
1456  // %tobool = icmp eq %shr, 0
1457  // %inc = add nsw %i.0, 1
1458  // br i1 %tobool
1459 
1460  const Value *Args[] =
1461  {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
1462  : ConstantInt::getFalse(InitX->getContext())};
1463 
1464  // @llvm.dbg doesn't count as they have no semantic effect.
1465  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1467  std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1468 
1469  if (HeaderSize != IdiomCanonicalSize &&
1470  TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
1472  return false;
1473 
1474  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1475  DefX->getDebugLoc(), ZeroCheck,
1476  IsCntPhiUsedOutsideLoop);
1477  return true;
1478 }
1479 
1480 /// Recognizes a population count idiom in a non-countable loop.
1481 ///
1482 /// If detected, transforms the relevant code to issue the popcount intrinsic
1483 /// function call, and returns true; otherwise, returns false.
1484 bool LoopIdiomRecognize::recognizePopcount() {
1486  return false;
1487 
1488  // Counting population are usually conducted by few arithmetic instructions.
1489  // Such instructions can be easily "absorbed" by vacant slots in a
1490  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
1491  // in a compact loop.
1492 
1493  // Give up if the loop has multiple blocks or multiple backedges.
1494  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1495  return false;
1496 
1497  BasicBlock *LoopBody = *(CurLoop->block_begin());
1498  if (LoopBody->size() >= 20) {
1499  // The loop is too big, bail out.
1500  return false;
1501  }
1502 
1503  // It should have a preheader containing nothing but an unconditional branch.
1504  BasicBlock *PH = CurLoop->getLoopPreheader();
1505  if (!PH || &PH->front() != PH->getTerminator())
1506  return false;
1507  auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
1508  if (!EntryBI || EntryBI->isConditional())
1509  return false;
1510 
1511  // It should have a precondition block where the generated popcount intrinsic
1512  // function can be inserted.
1513  auto *PreCondBB = PH->getSinglePredecessor();
1514  if (!PreCondBB)
1515  return false;
1516  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1517  if (!PreCondBI || PreCondBI->isUnconditional())
1518  return false;
1519 
1520  Instruction *CntInst;
1521  PHINode *CntPhi;
1522  Value *Val;
1523  if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
1524  return false;
1525 
1526  transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1527  return true;
1528 }
1529 
1531  const DebugLoc &DL) {
1532  Value *Ops[] = {Val};
1533  Type *Tys[] = {Val->getType()};
1534 
1535  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1536  Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
1537  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1538  CI->setDebugLoc(DL);
1539 
1540  return CI;
1541 }
1542 
1544  const DebugLoc &DL, bool ZeroCheck,
1545  Intrinsic::ID IID) {
1546  Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
1547  Type *Tys[] = {Val->getType()};
1548 
1549  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1550  Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
1551  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1552  CI->setDebugLoc(DL);
1553 
1554  return CI;
1555 }
1556 
1557 /// Transform the following loop (Using CTLZ, CTTZ is similar):
1558 /// loop:
1559 /// CntPhi = PHI [Cnt0, CntInst]
1560 /// PhiX = PHI [InitX, DefX]
1561 /// CntInst = CntPhi + 1
1562 /// DefX = PhiX >> 1
1563 /// LOOP_BODY
1564 /// Br: loop if (DefX != 0)
1565 /// Use(CntPhi) or Use(CntInst)
1566 ///
1567 /// Into:
1568 /// If CntPhi used outside the loop:
1569 /// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
1570 /// Count = CountPrev + 1
1571 /// else
1572 /// Count = BitWidth(InitX) - CTLZ(InitX)
1573 /// loop:
1574 /// CntPhi = PHI [Cnt0, CntInst]
1575 /// PhiX = PHI [InitX, DefX]
1576 /// PhiCount = PHI [Count, Dec]
1577 /// CntInst = CntPhi + 1
1578 /// DefX = PhiX >> 1
1579 /// Dec = PhiCount - 1
1580 /// LOOP_BODY
1581 /// Br: loop if (Dec != 0)
1582 /// Use(CountPrev + Cnt0) // Use(CntPhi)
1583 /// or
1584 /// Use(Count + Cnt0) // Use(CntInst)
1585 ///
1586 /// If LOOP_BODY is empty the loop will be deleted.
1587 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
1588 void LoopIdiomRecognize::transformLoopToCountable(
1589  Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
1590  PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
1591  bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
1592  BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
1593 
1594  // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
1595  IRBuilder<> Builder(PreheaderBr);
1596  Builder.SetCurrentDebugLocation(DL);
1597  Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
1598 
1599  // Count = BitWidth - CTLZ(InitX);
1600  // If there are uses of CntPhi create:
1601  // CountPrev = BitWidth - CTLZ(InitX >> 1);
1602  if (IsCntPhiUsedOutsideLoop) {
1603  if (DefX->getOpcode() == Instruction::AShr)
1604  InitXNext =
1605  Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1));
1606  else if (DefX->getOpcode() == Instruction::LShr)
1607  InitXNext =
1608  Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
1609  else if (DefX->getOpcode() == Instruction::Shl) // cttz
1610  InitXNext =
1611  Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
1612  else
1613  llvm_unreachable("Unexpected opcode!");
1614  } else
1615  InitXNext = InitX;
1616  FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
1617  Count = Builder.CreateSub(
1618  ConstantInt::get(FFS->getType(),
1619  FFS->getType()->getIntegerBitWidth()),
1620  FFS);
1621  if (IsCntPhiUsedOutsideLoop) {
1622  CountPrev = Count;
1623  Count = Builder.CreateAdd(
1624  CountPrev,
1625  ConstantInt::get(CountPrev->getType(), 1));
1626  }
1627 
1628  NewCount = Builder.CreateZExtOrTrunc(
1629  IsCntPhiUsedOutsideLoop ? CountPrev : Count,
1630  cast<IntegerType>(CntInst->getType()));
1631 
1632  // If the counter's initial value is not zero, insert Add Inst.
1633  Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1634  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1635  if (!InitConst || !InitConst->isZero())
1636  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1637 
1638  // Step 2: Insert new IV and loop condition:
1639  // loop:
1640  // ...
1641  // PhiCount = PHI [Count, Dec]
1642  // ...
1643  // Dec = PhiCount - 1
1644  // ...
1645  // Br: loop if (Dec != 0)
1646  BasicBlock *Body = *(CurLoop->block_begin());
1647  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1648  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1649  Type *Ty = Count->getType();
1650 
1651  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1652 
1653  Builder.SetInsertPoint(LbCond);
1654  Instruction *TcDec = cast<Instruction>(
1655  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1656  "tcdec", false, true));
1657 
1658  TcPhi->addIncoming(Count, Preheader);
1659  TcPhi->addIncoming(TcDec, Body);
1660 
1661  CmpInst::Predicate Pred =
1662  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
1663  LbCond->setPredicate(Pred);
1664  LbCond->setOperand(0, TcDec);
1665  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1666 
1667  // Step 3: All the references to the original counter outside
1668  // the loop are replaced with the NewCount
1669  if (IsCntPhiUsedOutsideLoop)
1670  CntPhi->replaceUsesOutsideBlock(NewCount, Body);
1671  else
1672  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1673 
1674  // step 4: Forget the "non-computable" trip-count SCEV associated with the
1675  // loop. The loop would otherwise not be deleted even if it becomes empty.
1676  SE->forgetLoop(CurLoop);
1677 }
1678 
1679 void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
1680  Instruction *CntInst,
1681  PHINode *CntPhi, Value *Var) {
1682  BasicBlock *PreHead = CurLoop->getLoopPreheader();
1683  auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
1684  const DebugLoc &DL = CntInst->getDebugLoc();
1685 
1686  // Assuming before transformation, the loop is following:
1687  // if (x) // the precondition
1688  // do { cnt++; x &= x - 1; } while(x);
1689 
1690  // Step 1: Insert the ctpop instruction at the end of the precondition block
1691  IRBuilder<> Builder(PreCondBr);
1692  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
1693  {
1694  PopCnt = createPopcntIntrinsic(Builder, Var, DL);
1695  NewCount = PopCntZext =
1696  Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
1697 
1698  if (NewCount != PopCnt)
1699  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1700 
1701  // TripCnt is exactly the number of iterations the loop has
1702  TripCnt = NewCount;
1703 
1704  // If the population counter's initial value is not zero, insert Add Inst.
1705  Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
1706  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1707  if (!InitConst || !InitConst->isZero()) {
1708  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1709  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1710  }
1711  }
1712 
1713  // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
1714  // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
1715  // function would be partial dead code, and downstream passes will drag
1716  // it back from the precondition block to the preheader.
1717  {
1718  ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
1719 
1720  Value *Opnd0 = PopCntZext;
1721  Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
1722  if (PreCond->getOperand(0) != Var)
1723  std::swap(Opnd0, Opnd1);
1724 
1725  ICmpInst *NewPreCond = cast<ICmpInst>(
1726  Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
1727  PreCondBr->setCondition(NewPreCond);
1728 
1730  }
1731 
1732  // Step 3: Note that the population count is exactly the trip count of the
1733  // loop in question, which enable us to convert the loop from noncountable
1734  // loop into a countable one. The benefit is twofold:
1735  //
1736  // - If the loop only counts population, the entire loop becomes dead after
1737  // the transformation. It is a lot easier to prove a countable loop dead
1738  // than to prove a noncountable one. (In some C dialects, an infinite loop
1739  // isn't dead even if it computes nothing useful. In general, DCE needs
1740  // to prove a noncountable loop finite before safely delete it.)
1741  //
1742  // - If the loop also performs something else, it remains alive.
1743  // Since it is transformed to countable form, it can be aggressively
1744  // optimized by some optimizations which are in general not applicable
1745  // to a noncountable loop.
1746  //
1747  // After this step, this loop (conceptually) would look like following:
1748  // newcnt = __builtin_ctpop(x);
1749  // t = newcnt;
1750  // if (x)
1751  // do { cnt++; x &= x-1; t--) } while (t > 0);
1752  BasicBlock *Body = *(CurLoop->block_begin());
1753  {
1754  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1755  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1756  Type *Ty = TripCnt->getType();
1757 
1758  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1759 
1760  Builder.SetInsertPoint(LbCond);
1761  Instruction *TcDec = cast<Instruction>(
1762  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1763  "tcdec", false, true));
1764 
1765  TcPhi->addIncoming(TripCnt, PreHead);
1766  TcPhi->addIncoming(TcDec, Body);
1767 
1768  CmpInst::Predicate Pred =
1769  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
1770  LbCond->setPredicate(Pred);
1771  LbCond->setOperand(0, TcDec);
1772  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1773  }
1774 
1775  // Step 4: All the references to the original population counter outside
1776  // the loop are replaced with the NewCount -- the value returned from
1777  // __builtin_ctpop().
1778  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1779 
1780  // step 5: Forget the "non-computable" trip-count SCEV associated with the
1781  // loop. The loop would otherwise not be deleted even if it becomes empty.
1782  SE->forgetLoop(CurLoop);
1783 }
The access may reference and may modify the value stored in memory.
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:44
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:594
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1984
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1704
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:316
Value * isBytewiseValue(Value *V)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
ArrayRef< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:152
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static constexpr LocationSize unknown()
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
bool isUnordered() const
Definition: Instructions.h:403
void push_back(const T &Elt)
Definition: SmallVector.h:211
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
Value * getValue() const
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
An instruction for reading from memory.
Definition: Instructions.h:167
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero...
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
void setAlignment(unsigned Align)
Definition: Globals.cpp:115
Value * getLength() const
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
static Constant * get(ArrayType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:992
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
Definition: MustExecute.h:97
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:231
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:689
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1049
unsigned getDestAlignment() const
This file implements a class to represent arbitrary precision integral constant values and operations...
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:99
static bool isSimple(Instruction *I)
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:181
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
This node represents a polynomial recurrence on the trip count of the specified loop.
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
#define T
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:120
Class to represent array types.
Definition: DerivedTypes.h:400
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
bool has(LibFunc F) const
Tests whether a library function is available.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1066
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:320
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:208
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
static void deleteDeadInstruction(Instruction *I)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1018
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:126
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS. ...
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1782
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
const SCEV * getOperand(unsigned i) const
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:189
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:321
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:233
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:286
Conditional or Unconditional Branch instruction.
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:280
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
size_t size() const
Definition: BasicBlock.h:278
bool isUnordered() const
Definition: Instructions.h:278
Represent the analysis usage information of a pass.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:597
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:284
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
self_iterator getIterator()
Definition: ilist_node.h:81
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:430
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
bool isVolatile() const
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Representation for a specific memory location.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:270
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:143
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, unsigned StoreSize, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:587
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
signed less or equal
Definition: InstrTypes.h:676
loop Recognize loop idioms
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
loop idiom
iterator_range< user_iterator > users()
Definition: Value.h:399
This class uses information about analyze scalars to rewrite expressions in canonical form...
Pass * createLoopIdiomPass()
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U=nullptr) const
Estimate the cost of an intrinsic when lowered.
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:291
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:593
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:721
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:324
INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:215
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
LLVM_NODISCARD ModRefInfo intersectModRef(const ModRefInfo MRI1, const ModRefInfo MRI2)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:580
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
block_iterator block_end() const
Definition: LoopInfo.h:154
uint32_t Size
Definition: Profile.cpp:46
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2009
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, unsigned StoreSize, ScalarEvolution *SE)
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
Definition: SetVector.h:40
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction *> &IgnoredStores)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
unsigned greater than
Definition: InstrTypes.h:669
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition: Value.cpp:439
This pass exposes codegen information to IR-level passes.
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
bool isSimple() const
Definition: Instructions.h:401
This header defines various interfaces for pass management in LLVM.
bool isBigEndian() const
Definition: DataLayout.h:233
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:40
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
block_iterator block_begin() const
Definition: LoopInfo.h:153
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count...
Value * getPointerOperand()
Definition: Instructions.h:412
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
This class represents a constant integer value.