LLVM  16.0.0git
LoopFuse.cpp
Go to the documentation of this file.
1 //===- LoopFuse.cpp - Loop Fusion Pass ------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the loop fusion pass.
11 /// The implementation is largely based on the following document:
12 ///
13 /// Code Transformations to Augment the Scope of Loop Fusion in a
14 /// Production Compiler
15 /// Christopher Mark Barton
16 /// MSc Thesis
17 /// https://webdocs.cs.ualberta.ca/~amaral/thesis/ChristopherBartonMSc.pdf
18 ///
19 /// The general approach taken is to collect sets of control flow equivalent
20 /// loops and test whether they can be fused. The necessary conditions for
21 /// fusion are:
22 /// 1. The loops must be adjacent (there cannot be any statements between
23 /// the two loops).
24 /// 2. The loops must be conforming (they must execute the same number of
25 /// iterations).
26 /// 3. The loops must be control flow equivalent (if one loop executes, the
27 /// other is guaranteed to execute).
28 /// 4. There cannot be any negative distance dependencies between the loops.
29 /// If all of these conditions are satisfied, it is safe to fuse the loops.
30 ///
31 /// This implementation creates FusionCandidates that represent the loop and the
32 /// necessary information needed by fusion. It then operates on the fusion
33 /// candidates, first confirming that the candidate is eligible for fusion. The
34 /// candidates are then collected into control flow equivalent sets, sorted in
35 /// dominance order. Each set of control flow equivalent candidates is then
36 /// traversed, attempting to fuse pairs of candidates in the set. If all
37 /// requirements for fusion are met, the two candidates are fused, creating a
38 /// new (fused) candidate which is then added back into the set to consider for
39 /// additional fusion.
40 ///
41 /// This implementation currently does not make any modifications to remove
42 /// conditions for fusion. Code transformations to make loops conform to each of
43 /// the conditions for fusion are discussed in more detail in the document
44 /// above. These can be added to the current implementation in the future.
45 //===----------------------------------------------------------------------===//
46 
48 #include "llvm/ADT/Statistic.h"
52 #include "llvm/Analysis/LoopInfo.h"
58 #include "llvm/IR/Function.h"
59 #include "llvm/IR/Verifier.h"
60 #include "llvm/InitializePasses.h"
61 #include "llvm/Pass.h"
63 #include "llvm/Support/Debug.h"
65 #include "llvm/Transforms/Scalar.h"
66 #include "llvm/Transforms/Utils.h"
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "loop-fusion"
75 
76 STATISTIC(FuseCounter, "Loops fused");
77 STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
78 STATISTIC(InvalidPreheader, "Loop has invalid preheader");
79 STATISTIC(InvalidHeader, "Loop has invalid header");
80 STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks");
81 STATISTIC(InvalidExitBlock, "Loop has invalid exit block");
82 STATISTIC(InvalidLatch, "Loop has invalid latch");
83 STATISTIC(InvalidLoop, "Loop is invalid");
84 STATISTIC(AddressTakenBB, "Basic block has address taken");
85 STATISTIC(MayThrowException, "Loop may throw an exception");
86 STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
87 STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
88 STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
89 STATISTIC(UnknownTripCount, "Loop has unknown trip count");
90 STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
91 STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
92 STATISTIC(NonAdjacent, "Loops are not adjacent");
93 STATISTIC(
94  NonEmptyPreheader,
95  "Loop has a non-empty preheader with instructions that cannot be moved");
96 STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
97 STATISTIC(NonIdenticalGuards, "Candidates have different guards");
98 STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block with "
99  "instructions that cannot be moved");
100 STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with "
101  "instructions that cannot be moved");
102 STATISTIC(NotRotated, "Candidate is not rotated");
103 STATISTIC(OnlySecondCandidateIsGuarded,
104  "The second candidate is guarded while the first one is not");
105 STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
106 STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
107 
112 };
113 
115  "loop-fusion-dependence-analysis",
116  cl::desc("Which dependence analysis should loop fusion use?"),
118  "Use the scalar evolution interface"),
120  "Use the dependence analysis interface"),
122  "Use all available analyses")),
124 
126  "loop-fusion-peel-max-count", cl::init(0), cl::Hidden,
127  cl::desc("Max number of iterations to be peeled from a loop, such that "
128  "fusion can take place"));
129 
130 #ifndef NDEBUG
131 static cl::opt<bool>
132  VerboseFusionDebugging("loop-fusion-verbose-debug",
133  cl::desc("Enable verbose debugging for Loop Fusion"),
134  cl::Hidden, cl::init(false));
135 #endif
136 
137 namespace {
138 /// This class is used to represent a candidate for loop fusion. When it is
139 /// constructed, it checks the conditions for loop fusion to ensure that it
140 /// represents a valid candidate. It caches several parts of a loop that are
141 /// used throughout loop fusion (e.g., loop preheader, loop header, etc) instead
142 /// of continually querying the underlying Loop to retrieve these values. It is
143 /// assumed these will not change throughout loop fusion.
144 ///
145 /// The invalidate method should be used to indicate that the FusionCandidate is
146 /// no longer a valid candidate for fusion. Similarly, the isValid() method can
147 /// be used to ensure that the FusionCandidate is still valid for fusion.
148 struct FusionCandidate {
149  /// Cache of parts of the loop used throughout loop fusion. These should not
150  /// need to change throughout the analysis and transformation.
151  /// These parts are cached to avoid repeatedly looking up in the Loop class.
152 
153  /// Preheader of the loop this candidate represents
154  BasicBlock *Preheader;
155  /// Header of the loop this candidate represents
156  BasicBlock *Header;
157  /// Blocks in the loop that exit the loop
158  BasicBlock *ExitingBlock;
159  /// The successor block of this loop (where the exiting blocks go to)
160  BasicBlock *ExitBlock;
161  /// Latch of the loop
162  BasicBlock *Latch;
163  /// The loop that this fusion candidate represents
164  Loop *L;
165  /// Vector of instructions in this loop that read from memory
167  /// Vector of instructions in this loop that write to memory
169  /// Are all of the members of this fusion candidate still valid
170  bool Valid;
171  /// Guard branch of the loop, if it exists
172  BranchInst *GuardBranch;
173  /// Peeling Paramaters of the Loop.
175  /// Can you Peel this Loop?
176  bool AbleToPeel;
177  /// Has this loop been Peeled
178  bool Peeled;
179 
180  /// Dominator and PostDominator trees are needed for the
181  /// FusionCandidateCompare function, required by FusionCandidateSet to
182  /// determine where the FusionCandidate should be inserted into the set. These
183  /// are used to establish ordering of the FusionCandidates based on dominance.
184  DominatorTree &DT;
185  const PostDominatorTree *PDT;
186 
188 
189  FusionCandidate(Loop *L, DominatorTree &DT, const PostDominatorTree *PDT,
191  : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
192  ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
193  Latch(L->getLoopLatch()), L(L), Valid(true),
194  GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
195  Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
196 
197  // Walk over all blocks in the loop and check for conditions that may
198  // prevent fusion. For each block, walk over all instructions and collect
199  // the memory reads and writes If any instructions that prevent fusion are
200  // found, invalidate this object and return.
201  for (BasicBlock *BB : L->blocks()) {
202  if (BB->hasAddressTaken()) {
203  invalidate();
204  reportInvalidCandidate(AddressTakenBB);
205  return;
206  }
207 
208  for (Instruction &I : *BB) {
209  if (I.mayThrow()) {
210  invalidate();
211  reportInvalidCandidate(MayThrowException);
212  return;
213  }
214  if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
215  if (SI->isVolatile()) {
216  invalidate();
217  reportInvalidCandidate(ContainsVolatileAccess);
218  return;
219  }
220  }
221  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
222  if (LI->isVolatile()) {
223  invalidate();
224  reportInvalidCandidate(ContainsVolatileAccess);
225  return;
226  }
227  }
228  if (I.mayWriteToMemory())
229  MemWrites.push_back(&I);
230  if (I.mayReadFromMemory())
231  MemReads.push_back(&I);
232  }
233  }
234  }
235 
236  /// Check if all members of the class are valid.
237  bool isValid() const {
238  return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&
239  !L->isInvalid() && Valid;
240  }
241 
242  /// Verify that all members are in sync with the Loop object.
243  void verify() const {
244  assert(isValid() && "Candidate is not valid!!");
245  assert(!L->isInvalid() && "Loop is invalid!");
246  assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync");
247  assert(Header == L->getHeader() && "Header is out of sync");
248  assert(ExitingBlock == L->getExitingBlock() &&
249  "Exiting Blocks is out of sync");
250  assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync");
251  assert(Latch == L->getLoopLatch() && "Latch is out of sync");
252  }
253 
254  /// Get the entry block for this fusion candidate.
255  ///
256  /// If this fusion candidate represents a guarded loop, the entry block is the
257  /// loop guard block. If it represents an unguarded loop, the entry block is
258  /// the preheader of the loop.
259  BasicBlock *getEntryBlock() const {
260  if (GuardBranch)
261  return GuardBranch->getParent();
262  else
263  return Preheader;
264  }
265 
266  /// After Peeling the loop is modified quite a bit, hence all of the Blocks
267  /// need to be updated accordingly.
268  void updateAfterPeeling() {
269  Preheader = L->getLoopPreheader();
270  Header = L->getHeader();
271  ExitingBlock = L->getExitingBlock();
272  ExitBlock = L->getExitBlock();
273  Latch = L->getLoopLatch();
274  verify();
275  }
276 
277  /// Given a guarded loop, get the successor of the guard that is not in the
278  /// loop.
279  ///
280  /// This method returns the successor of the loop guard that is not located
281  /// within the loop (i.e., the successor of the guard that is not the
282  /// preheader).
283  /// This method is only valid for guarded loops.
284  BasicBlock *getNonLoopBlock() const {
285  assert(GuardBranch && "Only valid on guarded loops.");
286  assert(GuardBranch->isConditional() &&
287  "Expecting guard to be a conditional branch.");
288  if (Peeled)
289  return GuardBranch->getSuccessor(1);
290  return (GuardBranch->getSuccessor(0) == Preheader)
291  ? GuardBranch->getSuccessor(1)
292  : GuardBranch->getSuccessor(0);
293  }
294 
295 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
296  LLVM_DUMP_METHOD void dump() const {
297  dbgs() << "\tGuardBranch: ";
298  if (GuardBranch)
299  dbgs() << *GuardBranch;
300  else
301  dbgs() << "nullptr";
302  dbgs() << "\n"
303  << (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n"
304  << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
305  << "\n"
306  << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
307  << "\tExitingBB: "
308  << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
309  << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
310  << "\n"
311  << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"
312  << "\tEntryBlock: "
313  << (getEntryBlock() ? getEntryBlock()->getName() : "nullptr")
314  << "\n";
315  }
316 #endif
317 
318  /// Determine if a fusion candidate (representing a loop) is eligible for
319  /// fusion. Note that this only checks whether a single loop can be fused - it
320  /// does not check whether it is *legal* to fuse two loops together.
321  bool isEligibleForFusion(ScalarEvolution &SE) const {
322  if (!isValid()) {
323  LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");
324  if (!Preheader)
325  ++InvalidPreheader;
326  if (!Header)
327  ++InvalidHeader;
328  if (!ExitingBlock)
329  ++InvalidExitingBlock;
330  if (!ExitBlock)
331  ++InvalidExitBlock;
332  if (!Latch)
333  ++InvalidLatch;
334  if (L->isInvalid())
335  ++InvalidLoop;
336 
337  return false;
338  }
339 
340  // Require ScalarEvolution to be able to determine a trip count.
342  LLVM_DEBUG(dbgs() << "Loop " << L->getName()
343  << " trip count not computable!\n");
344  return reportInvalidCandidate(UnknownTripCount);
345  }
346 
347  if (!L->isLoopSimplifyForm()) {
348  LLVM_DEBUG(dbgs() << "Loop " << L->getName()
349  << " is not in simplified form!\n");
350  return reportInvalidCandidate(NotSimplifiedForm);
351  }
352 
353  if (!L->isRotatedForm()) {
354  LLVM_DEBUG(dbgs() << "Loop " << L->getName() << " is not rotated!\n");
355  return reportInvalidCandidate(NotRotated);
356  }
357 
358  return true;
359  }
360 
361 private:
362  // This is only used internally for now, to clear the MemWrites and MemReads
363  // list and setting Valid to false. I can't envision other uses of this right
364  // now, since once FusionCandidates are put into the FusionCandidateSet they
365  // are immutable. Thus, any time we need to change/update a FusionCandidate,
366  // we must create a new one and insert it into the FusionCandidateSet to
367  // ensure the FusionCandidateSet remains ordered correctly.
368  void invalidate() {
369  MemWrites.clear();
370  MemReads.clear();
371  Valid = false;
372  }
373 
374  bool reportInvalidCandidate(llvm::Statistic &Stat) const {
375  using namespace ore;
376  assert(L && Preheader && "Fusion candidate not initialized properly!");
377 #if LLVM_ENABLE_STATS
378  ++Stat;
379  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(),
380  L->getStartLoc(), Preheader)
381  << "[" << Preheader->getParent()->getName() << "]: "
382  << "Loop is not a candidate for fusion: " << Stat.getDesc());
383 #endif
384  return false;
385  }
386 };
387 
388 struct FusionCandidateCompare {
389  /// Comparison functor to sort two Control Flow Equivalent fusion candidates
390  /// into dominance order.
391  /// If LHS dominates RHS and RHS post-dominates LHS, return true;
392  /// IF RHS dominates LHS and LHS post-dominates RHS, return false;
393  bool operator()(const FusionCandidate &LHS,
394  const FusionCandidate &RHS) const {
395  const DominatorTree *DT = &(LHS.DT);
396 
397  BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
398  BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
399 
400  // Do not save PDT to local variable as it is only used in asserts and thus
401  // will trigger an unused variable warning if building without asserts.
402  assert(DT && LHS.PDT && "Expecting valid dominator tree");
403 
404  // Do this compare first so if LHS == RHS, function returns false.
405  if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
406  // RHS dominates LHS
407  // Verify LHS post-dominates RHS
408  assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
409  return false;
410  }
411 
412  if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
413  // Verify RHS Postdominates LHS
414  assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
415  return true;
416  }
417 
418  // If LHS does not dominate RHS and RHS does not dominate LHS then there is
419  // no dominance relationship between the two FusionCandidates. Thus, they
420  // should not be in the same set together.
422  "No dominance relationship between these fusion candidates!");
423  }
424 };
425 
426 using LoopVector = SmallVector<Loop *, 4>;
427 
428 // Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
429 // order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
430 // dominates FC1 and FC1 post-dominates FC0.
431 // std::set was chosen because we want a sorted data structure with stable
432 // iterators. A subsequent patch to loop fusion will enable fusing non-adjacent
433 // loops by moving intervening code around. When this intervening code contains
434 // loops, those loops will be moved also. The corresponding FusionCandidates
435 // will also need to be moved accordingly. As this is done, having stable
436 // iterators will simplify the logic. Similarly, having an efficient insert that
437 // keeps the FusionCandidateSet sorted will also simplify the implementation.
438 using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
439 using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
440 
441 #if !defined(NDEBUG)
443  const FusionCandidate &FC) {
444  if (FC.isValid())
445  OS << FC.Preheader->getName();
446  else
447  OS << "<Invalid>";
448 
449  return OS;
450 }
451 
453  const FusionCandidateSet &CandSet) {
454  for (const FusionCandidate &FC : CandSet)
455  OS << FC << '\n';
456 
457  return OS;
458 }
459 
460 static void
461 printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
462  dbgs() << "Fusion Candidates: \n";
463  for (const auto &CandidateSet : FusionCandidates) {
464  dbgs() << "*** Fusion Candidate Set ***\n";
465  dbgs() << CandidateSet;
466  dbgs() << "****************************\n";
467  }
468 }
469 #endif
470 
471 /// Collect all loops in function at the same nest level, starting at the
472 /// outermost level.
473 ///
474 /// This data structure collects all loops at the same nest level for a
475 /// given function (specified by the LoopInfo object). It starts at the
476 /// outermost level.
477 struct LoopDepthTree {
478  using LoopsOnLevelTy = SmallVector<LoopVector, 4>;
479  using iterator = LoopsOnLevelTy::iterator;
480  using const_iterator = LoopsOnLevelTy::const_iterator;
481 
482  LoopDepthTree(LoopInfo &LI) : Depth(1) {
483  if (!LI.empty())
484  LoopsOnLevel.emplace_back(LoopVector(LI.rbegin(), LI.rend()));
485  }
486 
487  /// Test whether a given loop has been removed from the function, and thus is
488  /// no longer valid.
489  bool isRemovedLoop(const Loop *L) const { return RemovedLoops.count(L); }
490 
491  /// Record that a given loop has been removed from the function and is no
492  /// longer valid.
493  void removeLoop(const Loop *L) { RemovedLoops.insert(L); }
494 
495  /// Descend the tree to the next (inner) nesting level
496  void descend() {
497  LoopsOnLevelTy LoopsOnNextLevel;
498 
499  for (const LoopVector &LV : *this)
500  for (Loop *L : LV)
501  if (!isRemovedLoop(L) && L->begin() != L->end())
502  LoopsOnNextLevel.emplace_back(LoopVector(L->begin(), L->end()));
503 
504  LoopsOnLevel = LoopsOnNextLevel;
505  RemovedLoops.clear();
506  Depth++;
507  }
508 
509  bool empty() const { return size() == 0; }
510  size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); }
511  unsigned getDepth() const { return Depth; }
512 
513  iterator begin() { return LoopsOnLevel.begin(); }
514  iterator end() { return LoopsOnLevel.end(); }
515  const_iterator begin() const { return LoopsOnLevel.begin(); }
516  const_iterator end() const { return LoopsOnLevel.end(); }
517 
518 private:
519  /// Set of loops that have been removed from the function and are no longer
520  /// valid.
521  SmallPtrSet<const Loop *, 8> RemovedLoops;
522 
523  /// Depth of the current level, starting at 1 (outermost loops).
524  unsigned Depth;
525 
526  /// Vector of loops at the current depth level that have the same parent loop
527  LoopsOnLevelTy LoopsOnLevel;
528 };
529 
530 #ifndef NDEBUG
531 static void printLoopVector(const LoopVector &LV) {
532  dbgs() << "****************************\n";
533  for (auto *L : LV)
534  printLoop(*L, dbgs());
535  dbgs() << "****************************\n";
536 }
537 #endif
538 
539 struct LoopFuser {
540 private:
541  // Sets of control flow equivalent fusion candidates for a given nest level.
542  FusionCandidateCollection FusionCandidates;
543 
544  LoopDepthTree LDT;
545  DomTreeUpdater DTU;
546 
547  LoopInfo &LI;
548  DominatorTree &DT;
549  DependenceInfo &DI;
550  ScalarEvolution &SE;
551  PostDominatorTree &PDT;
553  AssumptionCache &AC;
554  const TargetTransformInfo &TTI;
555 
556 public:
557  LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
561  : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
562  DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}
563 
564  /// This is the main entry point for loop fusion. It will traverse the
565  /// specified function and collect candidate loops to fuse, starting at the
566  /// outermost nesting level and working inwards.
567  bool fuseLoops(Function &F) {
568 #ifndef NDEBUG
570  LI.print(dbgs());
571  }
572 #endif
573 
574  LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName()
575  << "\n");
576  bool Changed = false;
577 
578  while (!LDT.empty()) {
579  LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth "
580  << LDT.getDepth() << "\n";);
581 
582  for (const LoopVector &LV : LDT) {
583  assert(LV.size() > 0 && "Empty loop set was build!");
584 
585  // Skip singleton loop sets as they do not offer fusion opportunities on
586  // this level.
587  if (LV.size() == 1)
588  continue;
589 #ifndef NDEBUG
591  LLVM_DEBUG({
592  dbgs() << " Visit loop set (#" << LV.size() << "):\n";
593  printLoopVector(LV);
594  });
595  }
596 #endif
597 
598  collectFusionCandidates(LV);
599  Changed |= fuseCandidates();
600  }
601 
602  // Finished analyzing candidates at this level.
603  // Descend to the next level and clear all of the candidates currently
604  // collected. Note that it will not be possible to fuse any of the
605  // existing candidates with new candidates because the new candidates will
606  // be at a different nest level and thus not be control flow equivalent
607  // with all of the candidates collected so far.
608  LLVM_DEBUG(dbgs() << "Descend one level!\n");
609  LDT.descend();
610  FusionCandidates.clear();
611  }
612 
613  if (Changed)
614  LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump(););
615 
616 #ifndef NDEBUG
617  assert(DT.verify());
618  assert(PDT.verify());
619  LI.verify(DT);
620  SE.verify();
621 #endif
622 
623  LLVM_DEBUG(dbgs() << "Loop Fusion complete\n");
624  return Changed;
625  }
626 
627 private:
628  /// Determine if two fusion candidates are control flow equivalent.
629  ///
630  /// Two fusion candidates are control flow equivalent if when one executes,
631  /// the other is guaranteed to execute. This is determined using dominators
632  /// and post-dominators: if A dominates B and B post-dominates A then A and B
633  /// are control-flow equivalent.
634  bool isControlFlowEquivalent(const FusionCandidate &FC0,
635  const FusionCandidate &FC1) const {
636  assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
637 
638  return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
639  DT, PDT);
640  }
641 
642  /// Iterate over all loops in the given loop set and identify the loops that
643  /// are eligible for fusion. Place all eligible fusion candidates into Control
644  /// Flow Equivalent sets, sorted by dominance.
645  void collectFusionCandidates(const LoopVector &LV) {
646  for (Loop *L : LV) {
649  FusionCandidate CurrCand(L, DT, &PDT, ORE, PP);
650  if (!CurrCand.isEligibleForFusion(SE))
651  continue;
652 
653  // Go through each list in FusionCandidates and determine if L is control
654  // flow equivalent with the first loop in that list. If it is, append LV.
655  // If not, go to the next list.
656  // If no suitable list is found, start another list and add it to
657  // FusionCandidates.
658  bool FoundSet = false;
659 
660  for (auto &CurrCandSet : FusionCandidates) {
661  if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
662  CurrCandSet.insert(CurrCand);
663  FoundSet = true;
664 #ifndef NDEBUG
666  LLVM_DEBUG(dbgs() << "Adding " << CurrCand
667  << " to existing candidate set\n");
668 #endif
669  break;
670  }
671  }
672  if (!FoundSet) {
673  // No set was found. Create a new set and add to FusionCandidates
674 #ifndef NDEBUG
676  LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
677 #endif
678  FusionCandidateSet NewCandSet;
679  NewCandSet.insert(CurrCand);
680  FusionCandidates.push_back(NewCandSet);
681  }
682  NumFusionCandidates++;
683  }
684  }
685 
686  /// Determine if it is beneficial to fuse two loops.
687  ///
688  /// For now, this method simply returns true because we want to fuse as much
689  /// as possible (primarily to test the pass). This method will evolve, over
690  /// time, to add heuristics for profitability of fusion.
691  bool isBeneficialFusion(const FusionCandidate &FC0,
692  const FusionCandidate &FC1) {
693  return true;
694  }
695 
696  /// Determine if two fusion candidates have the same trip count (i.e., they
697  /// execute the same number of iterations).
698  ///
699  /// This function will return a pair of values. The first is a boolean,
700  /// stating whether or not the two candidates are known at compile time to
701  /// have the same TripCount. The second is the difference in the two
702  /// TripCounts. This information can be used later to determine whether or not
703  /// peeling can be performed on either one of the candidates.
704  std::pair<bool, Optional<unsigned>>
705  haveIdenticalTripCounts(const FusionCandidate &FC0,
706  const FusionCandidate &FC1) const {
707  const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
708  if (isa<SCEVCouldNotCompute>(TripCount0)) {
709  UncomputableTripCount++;
710  LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
711  return {false, None};
712  }
713 
714  const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
715  if (isa<SCEVCouldNotCompute>(TripCount1)) {
716  UncomputableTripCount++;
717  LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
718  return {false, None};
719  }
720 
721  LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
722  << *TripCount1 << " are "
723  << (TripCount0 == TripCount1 ? "identical" : "different")
724  << "\n");
725 
726  if (TripCount0 == TripCount1)
727  return {true, 0};
728 
729  LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "
730  "determining the difference between trip counts\n");
731 
732  // Currently only considering loops with a single exit point
733  // and a non-constant trip count.
734  const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);
735  const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);
736 
737  // If any of the tripcounts are zero that means that loop(s) do not have
738  // a single exit or a constant tripcount.
739  if (TC0 == 0 || TC1 == 0) {
740  LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "
741  "have a constant number of iterations. Peeling "
742  "is not benefical\n");
743  return {false, None};
744  }
745 
746  Optional<unsigned> Difference;
747  int Diff = TC0 - TC1;
748 
749  if (Diff > 0)
750  Difference = Diff;
751  else {
752  LLVM_DEBUG(
753  dbgs() << "Difference is less than 0. FC1 (second loop) has more "
754  "iterations than the first one. Currently not supported\n");
755  }
756 
757  LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference
758  << "\n");
759 
760  return {false, Difference};
761  }
762 
763  void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,
764  unsigned PeelCount) {
765  assert(FC0.AbleToPeel && "Should be able to peel loop");
766 
767  LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
768  << " iterations of the first loop. \n");
769 
770  FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, DT, &AC, true);
771  if (FC0.Peeled) {
772  LLVM_DEBUG(dbgs() << "Done Peeling\n");
773 
774 #ifndef NDEBUG
775  auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
776 
777  assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&
778  "Loops should have identical trip counts after peeling");
779 #endif
780 
781  FC0.PP.PeelCount += PeelCount;
782 
783  // Peeling does not update the PDT
784  PDT.recalculate(*FC0.Preheader->getParent());
785 
786  FC0.updateAfterPeeling();
787 
788  // In this case the iterations of the loop are constant, so the first
789  // loop will execute completely (will not jump from one of
790  // the peeled blocks to the second loop). Here we are updating the
791  // branch conditions of each of the peeled blocks, such that it will
792  // branch to its successor which is not the preheader of the second loop
793  // in the case of unguarded loops, or the succesors of the exit block of
794  // the first loop otherwise. Doing this update will ensure that the entry
795  // block of the first loop dominates the entry block of the second loop.
796  BasicBlock *BB =
797  FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
798  if (BB) {
801  for (BasicBlock *Pred : predecessors(BB)) {
802  if (Pred != FC0.ExitBlock) {
803  WorkList.emplace_back(Pred->getTerminator());
804  TreeUpdates.emplace_back(
806  }
807  }
808  // Cannot modify the predecessors inside the above loop as it will cause
809  // the iterators to be nullptrs, causing memory errors.
810  for (Instruction *CurrentBranch : WorkList) {
811  BasicBlock *Succ = CurrentBranch->getSuccessor(0);
812  if (Succ == BB)
813  Succ = CurrentBranch->getSuccessor(1);
814  ReplaceInstWithInst(CurrentBranch, BranchInst::Create(Succ));
815  }
816 
817  DTU.applyUpdates(TreeUpdates);
818  DTU.flush();
819  }
820  LLVM_DEBUG(
821  dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount
822  << " iterations from the first loop.\n"
823  "Both Loops have the same number of iterations now.\n");
824  }
825  }
826 
827  /// Walk each set of control flow equivalent fusion candidates and attempt to
828  /// fuse them. This does a single linear traversal of all candidates in the
829  /// set. The conditions for legal fusion are checked at this point. If a pair
830  /// of fusion candidates passes all legality checks, they are fused together
831  /// and a new fusion candidate is created and added to the FusionCandidateSet.
832  /// The original fusion candidates are then removed, as they are no longer
833  /// valid.
834  bool fuseCandidates() {
835  bool Fused = false;
836  LLVM_DEBUG(printFusionCandidates(FusionCandidates));
837  for (auto &CandidateSet : FusionCandidates) {
838  if (CandidateSet.size() < 2)
839  continue;
840 
841  LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
842  << CandidateSet << "\n");
843 
844  for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
845  assert(!LDT.isRemovedLoop(FC0->L) &&
846  "Should not have removed loops in CandidateSet!");
847  auto FC1 = FC0;
848  for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
849  assert(!LDT.isRemovedLoop(FC1->L) &&
850  "Should not have removed loops in CandidateSet!");
851 
852  LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
853  dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
854 
855  FC0->verify();
856  FC1->verify();
857 
858  // Check if the candidates have identical tripcounts (first value of
859  // pair), and if not check the difference in the tripcounts between
860  // the loops (second value of pair). The difference is not equal to
861  // None iff the loops iterate a constant number of times, and have a
862  // single exit.
863  std::pair<bool, Optional<unsigned>> IdenticalTripCountRes =
864  haveIdenticalTripCounts(*FC0, *FC1);
865  bool SameTripCount = IdenticalTripCountRes.first;
866  Optional<unsigned> TCDifference = IdenticalTripCountRes.second;
867 
868  // Here we are checking that FC0 (the first loop) can be peeled, and
869  // both loops have different tripcounts.
870  if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
871  if (*TCDifference > FusionPeelMaxCount) {
872  LLVM_DEBUG(dbgs()
873  << "Difference in loop trip counts: " << *TCDifference
874  << " is greater than maximum peel count specificed: "
875  << FusionPeelMaxCount << "\n");
876  } else {
877  // Dependent on peeling being performed on the first loop, and
878  // assuming all other conditions for fusion return true.
879  SameTripCount = true;
880  }
881  }
882 
883  if (!SameTripCount) {
884  LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
885  "counts. Not fusing.\n");
886  reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
887  NonEqualTripCount);
888  continue;
889  }
890 
891  if (!isAdjacent(*FC0, *FC1)) {
892  LLVM_DEBUG(dbgs()
893  << "Fusion candidates are not adjacent. Not fusing.\n");
894  reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
895  continue;
896  }
897 
898  if (!FC0->GuardBranch && FC1->GuardBranch) {
899  LLVM_DEBUG(dbgs() << "The second candidate is guarded while the "
900  "first one is not. Not fusing.\n");
901  reportLoopFusion<OptimizationRemarkMissed>(
902  *FC0, *FC1, OnlySecondCandidateIsGuarded);
903  continue;
904  }
905 
906  // Ensure that FC0 and FC1 have identical guards.
907  // If one (or both) are not guarded, this check is not necessary.
908  if (FC0->GuardBranch && FC1->GuardBranch &&
909  !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
910  LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
911  "guards. Not Fusing.\n");
912  reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
913  NonIdenticalGuards);
914  continue;
915  }
916 
917  if (FC0->GuardBranch) {
918  assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
919 
920  if (!isSafeToMoveBefore(*FC0->ExitBlock,
921  *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
922  &PDT, &DI)) {
923  LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
924  "instructions in exit block. Not fusing.\n");
925  reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
926  NonEmptyExitBlock);
927  continue;
928  }
929 
930  if (!isSafeToMoveBefore(
931  *FC1->GuardBranch->getParent(),
932  *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,
933  &DI)) {
934  LLVM_DEBUG(dbgs()
935  << "Fusion candidate contains unsafe "
936  "instructions in guard block. Not fusing.\n");
937  reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
938  NonEmptyGuardBlock);
939  continue;
940  }
941  }
942 
943  // Check the dependencies across the loops and do not fuse if it would
944  // violate them.
945  if (!dependencesAllowFusion(*FC0, *FC1)) {
946  LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
947  reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
948  InvalidDependencies);
949  continue;
950  }
951 
952  // If the second loop has instructions in the pre-header, attempt to
953  // hoist them up to the first loop's pre-header or sink them into the
954  // body of the second loop.
955  SmallVector<Instruction *, 4> SafeToHoist;
957  // At this point, this is the last remaining legality check.
958  // Which means if we can make this pre-header empty, we can fuse
959  // these loops
960  if (!isEmptyPreheader(*FC1)) {
961  LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
962  "preheader.\n");
963 
964  // If it is not safe to hoist/sink all instructions in the
965  // pre-header, we cannot fuse these loops.
966  if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
967  SafeToSink)) {
968  LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
969  "Fusion Candidate Pre-header.\n"
970  << "Not Fusing.\n");
971  reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
972  NonEmptyPreheader);
973  continue;
974  }
975  }
976 
977  bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
978  LLVM_DEBUG(dbgs()
979  << "\tFusion appears to be "
980  << (BeneficialToFuse ? "" : "un") << "profitable!\n");
981  if (!BeneficialToFuse) {
982  reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
983  FusionNotBeneficial);
984  continue;
985  }
986  // All analysis has completed and has determined that fusion is legal
987  // and profitable. At this point, start transforming the code and
988  // perform fusion.
989 
990  // Execute the hoist/sink operations on preheader instructions
991  movePreheaderInsts(*FC0, *FC1, SafeToHoist, SafeToSink);
992 
993  LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
994  << *FC1 << "\n");
995 
996  FusionCandidate FC0Copy = *FC0;
997  // Peel the loop after determining that fusion is legal. The Loops
998  // will still be safe to fuse after the peeling is performed.
999  bool Peel = TCDifference && *TCDifference > 0;
1000  if (Peel)
1001  peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
1002 
1003  // Report fusion to the Optimization Remarks.
1004  // Note this needs to be done *before* performFusion because
1005  // performFusion will change the original loops, making it not
1006  // possible to identify them after fusion is complete.
1007  reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
1008  FuseCounter);
1009 
1010  FusionCandidate FusedCand(
1011  performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
1012  FC0Copy.PP);
1013  FusedCand.verify();
1014  assert(FusedCand.isEligibleForFusion(SE) &&
1015  "Fused candidate should be eligible for fusion!");
1016 
1017  // Notify the loop-depth-tree that these loops are not valid objects
1018  LDT.removeLoop(FC1->L);
1019 
1020  CandidateSet.erase(FC0);
1021  CandidateSet.erase(FC1);
1022 
1023  auto InsertPos = CandidateSet.insert(FusedCand);
1024 
1025  assert(InsertPos.second &&
1026  "Unable to insert TargetCandidate in CandidateSet!");
1027 
1028  // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations
1029  // of the FC1 loop will attempt to fuse the new (fused) loop with the
1030  // remaining candidates in the current candidate set.
1031  FC0 = FC1 = InsertPos.first;
1032 
1033  LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
1034  << "\n");
1035 
1036  Fused = true;
1037  }
1038  }
1039  }
1040  return Fused;
1041  }
1042 
1043  // Returns true if the instruction \p I can be hoisted to the end of the
1044  // preheader of \p FC0. \p SafeToHoist contains the instructions that are
1045  // known to be safe to hoist. The instructions encountered that cannot be
1046  // hoisted are in \p NotHoisting.
1047  // TODO: Move functionality into CodeMoverUtils
1048  bool canHoistInst(Instruction &I,
1049  const SmallVector<Instruction *, 4> &SafeToHoist,
1050  const SmallVector<Instruction *, 4> &NotHoisting,
1051  const FusionCandidate &FC0) const {
1052  const BasicBlock *FC0PreheaderTarget = FC0.Preheader->getSingleSuccessor();
1053  assert(FC0PreheaderTarget &&
1054  "Expected single successor for loop preheader.");
1055 
1056  for (Use &Op : I.operands()) {
1057  if (auto *OpInst = dyn_cast<Instruction>(Op)) {
1058  bool OpHoisted = is_contained(SafeToHoist, OpInst);
1059  // Check if we have already decided to hoist this operand. In this
1060  // case, it does not dominate FC0 *yet*, but will after we hoist it.
1061  if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {
1062  return false;
1063  }
1064  }
1065  }
1066 
1067  // If this isn't a memory inst, hoisting is safe
1068  if (!I.mayReadOrWriteMemory())
1069  return true;
1070 
1071  LLVM_DEBUG(dbgs() << "Checking if this mem inst can be hoisted.\n");
1072  for (Instruction *NotHoistedInst : NotHoisting) {
1073  if (auto D = DI.depends(&I, NotHoistedInst, true)) {
1074  // Dependency is not read-before-write, write-before-read or
1075  // write-before-write
1076  if (D->isFlow() || D->isAnti() || D->isOutput()) {
1077  LLVM_DEBUG(dbgs() << "Inst depends on an instruction in FC1's "
1078  "preheader that is not being hoisted.\n");
1079  return false;
1080  }
1081  }
1082  }
1083 
1084  for (Instruction *ReadInst : FC0.MemReads) {
1085  if (auto D = DI.depends(ReadInst, &I, true)) {
1086  // Dependency is not read-before-write
1087  if (D->isAnti()) {
1088  LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC0.\n");
1089  return false;
1090  }
1091  }
1092  }
1093 
1094  for (Instruction *WriteInst : FC0.MemWrites) {
1095  if (auto D = DI.depends(WriteInst, &I, true)) {
1096  // Dependency is not write-before-read or write-before-write
1097  if (D->isFlow() || D->isOutput()) {
1098  LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC0.\n");
1099  return false;
1100  }
1101  }
1102  }
1103  return true;
1104  }
1105 
1106  // Returns true if the instruction \p I can be sunk to the top of the exit
1107  // block of \p FC1.
1108  // TODO: Move functionality into CodeMoverUtils
1109  bool canSinkInst(Instruction &I, const FusionCandidate &FC1) const {
1110  for (User *U : I.users()) {
1111  if (auto *UI{dyn_cast<Instruction>(U)}) {
1112  // Cannot sink if user in loop
1113  // If FC1 has phi users of this value, we cannot sink it into FC1.
1114  if (FC1.L->contains(UI)) {
1115  // Cannot hoist or sink this instruction. No hoisting/sinking
1116  // should take place, loops should not fuse
1117  return false;
1118  }
1119  }
1120  }
1121 
1122  // If this isn't a memory inst, sinking is safe
1123  if (!I.mayReadOrWriteMemory())
1124  return true;
1125 
1126  for (Instruction *ReadInst : FC1.MemReads) {
1127  if (auto D = DI.depends(&I, ReadInst, true)) {
1128  // Dependency is not write-before-read
1129  if (D->isFlow()) {
1130  LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC1.\n");
1131  return false;
1132  }
1133  }
1134  }
1135 
1136  for (Instruction *WriteInst : FC1.MemWrites) {
1137  if (auto D = DI.depends(&I, WriteInst, true)) {
1138  // Dependency is not write-before-write or read-before-write
1139  if (D->isOutput() || D->isAnti()) {
1140  LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC1.\n");
1141  return false;
1142  }
1143  }
1144  }
1145 
1146  return true;
1147  }
1148 
1149  /// Collect instructions in the \p FC1 Preheader that can be hoisted
1150  /// to the \p FC0 Preheader or sunk into the \p FC1 Body
1151  bool collectMovablePreheaderInsts(
1152  const FusionCandidate &FC0, const FusionCandidate &FC1,
1153  SmallVector<Instruction *, 4> &SafeToHoist,
1154  SmallVector<Instruction *, 4> &SafeToSink) const {
1155  BasicBlock *FC1Preheader = FC1.Preheader;
1156  // Save the instructions that are not being hoisted, so we know not to hoist
1157  // mem insts that they dominate.
1158  SmallVector<Instruction *, 4> NotHoisting;
1159 
1160  for (Instruction &I : *FC1Preheader) {
1161  // Can't move a branch
1162  if (&I == FC1Preheader->getTerminator())
1163  continue;
1164  // If the instruction has side-effects, give up.
1165  // TODO: The case of mayReadFromMemory we can handle but requires
1166  // additional work with a dependence analysis so for now we give
1167  // up on memory reads.
1168  if (I.mayThrow() || !I.willReturn()) {
1169  LLVM_DEBUG(dbgs() << "Inst: " << I << " may throw or won't return.\n");
1170  return false;
1171  }
1172 
1173  LLVM_DEBUG(dbgs() << "Checking Inst: " << I << "\n");
1174 
1175  if (I.isAtomic() || I.isVolatile()) {
1176  LLVM_DEBUG(
1177  dbgs() << "\tInstruction is volatile or atomic. Cannot move it.\n");
1178  return false;
1179  }
1180 
1181  if (canHoistInst(I, SafeToHoist, NotHoisting, FC0)) {
1182  SafeToHoist.push_back(&I);
1183  LLVM_DEBUG(dbgs() << "\tSafe to hoist.\n");
1184  } else {
1185  LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n");
1186  NotHoisting.push_back(&I);
1187 
1188  if (canSinkInst(I, FC1)) {
1189  SafeToSink.push_back(&I);
1190  LLVM_DEBUG(dbgs() << "\tSafe to sink.\n");
1191  } else {
1192  LLVM_DEBUG(dbgs() << "\tCould not sink.\n");
1193  return false;
1194  }
1195  }
1196  }
1197  LLVM_DEBUG(
1198  dbgs() << "All preheader instructions could be sunk or hoisted!\n");
1199  return true;
1200  }
1201 
1202  /// Rewrite all additive recurrences in a SCEV to use a new loop.
1203  class AddRecLoopReplacer : public SCEVRewriteVisitor<AddRecLoopReplacer> {
1204  public:
1205  AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,
1206  bool UseMax = true)
1207  : SCEVRewriteVisitor(SE), Valid(true), UseMax(UseMax), OldL(OldL),
1208  NewL(NewL) {}
1209 
1210  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
1211  const Loop *ExprL = Expr->getLoop();
1213  if (ExprL == &OldL) {
1214  Operands.append(Expr->op_begin(), Expr->op_end());
1215  return SE.getAddRecExpr(Operands, &NewL, Expr->getNoWrapFlags());
1216  }
1217 
1218  if (OldL.contains(ExprL)) {
1219  bool Pos = SE.isKnownPositive(Expr->getStepRecurrence(SE));
1220  if (!UseMax || !Pos || !Expr->isAffine()) {
1221  Valid = false;
1222  return Expr;
1223  }
1224  return visit(Expr->getStart());
1225  }
1226 
1227  for (const SCEV *Op : Expr->operands())
1228  Operands.push_back(visit(Op));
1229  return SE.getAddRecExpr(Operands, ExprL, Expr->getNoWrapFlags());
1230  }
1231 
1232  bool wasValidSCEV() const { return Valid; }
1233 
1234  private:
1235  bool Valid, UseMax;
1236  const Loop &OldL, &NewL;
1237  };
1238 
1239  /// Return false if the access functions of \p I0 and \p I1 could cause
1240  /// a negative dependence.
1241  bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0,
1242  Instruction &I1, bool EqualIsInvalid) {
1243  Value *Ptr0 = getLoadStorePointerOperand(&I0);
1244  Value *Ptr1 = getLoadStorePointerOperand(&I1);
1245  if (!Ptr0 || !Ptr1)
1246  return false;
1247 
1248  const SCEV *SCEVPtr0 = SE.getSCEVAtScope(Ptr0, &L0);
1249  const SCEV *SCEVPtr1 = SE.getSCEVAtScope(Ptr1, &L1);
1250 #ifndef NDEBUG
1252  LLVM_DEBUG(dbgs() << " Access function check: " << *SCEVPtr0 << " vs "
1253  << *SCEVPtr1 << "\n");
1254 #endif
1255  AddRecLoopReplacer Rewriter(SE, L0, L1);
1256  SCEVPtr0 = Rewriter.visit(SCEVPtr0);
1257 #ifndef NDEBUG
1259  LLVM_DEBUG(dbgs() << " Access function after rewrite: " << *SCEVPtr0
1260  << " [Valid: " << Rewriter.wasValidSCEV() << "]\n");
1261 #endif
1262  if (!Rewriter.wasValidSCEV())
1263  return false;
1264 
1265  // TODO: isKnownPredicate doesnt work well when one SCEV is loop carried (by
1266  // L0) and the other is not. We could check if it is monotone and test
1267  // the beginning and end value instead.
1268 
1269  BasicBlock *L0Header = L0.getHeader();
1270  auto HasNonLinearDominanceRelation = [&](const SCEV *S) {
1271  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
1272  if (!AddRec)
1273  return false;
1274  return !DT.dominates(L0Header, AddRec->getLoop()->getHeader()) &&
1275  !DT.dominates(AddRec->getLoop()->getHeader(), L0Header);
1276  };
1277  if (SCEVExprContains(SCEVPtr1, HasNonLinearDominanceRelation))
1278  return false;
1279 
1280  ICmpInst::Predicate Pred =
1281  EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;
1282  bool IsAlwaysGE = SE.isKnownPredicate(Pred, SCEVPtr0, SCEVPtr1);
1283 #ifndef NDEBUG
1285  LLVM_DEBUG(dbgs() << " Relation: " << *SCEVPtr0
1286  << (IsAlwaysGE ? " >= " : " may < ") << *SCEVPtr1
1287  << "\n");
1288 #endif
1289  return IsAlwaysGE;
1290  }
1291 
1292  /// Return true if the dependences between @p I0 (in @p L0) and @p I1 (in
1293  /// @p L1) allow loop fusion of @p L0 and @p L1. The dependence analyses
1294  /// specified by @p DepChoice are used to determine this.
1295  bool dependencesAllowFusion(const FusionCandidate &FC0,
1296  const FusionCandidate &FC1, Instruction &I0,
1297  Instruction &I1, bool AnyDep,
1298  FusionDependenceAnalysisChoice DepChoice) {
1299 #ifndef NDEBUG
1300  if (VerboseFusionDebugging) {
1301  LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : "
1302  << DepChoice << "\n");
1303  }
1304 #endif
1305  switch (DepChoice) {
1307  return accessDiffIsPositive(*FC0.L, *FC1.L, I0, I1, AnyDep);
1309  auto DepResult = DI.depends(&I0, &I1, true);
1310  if (!DepResult)
1311  return true;
1312 #ifndef NDEBUG
1313  if (VerboseFusionDebugging) {
1314  LLVM_DEBUG(dbgs() << "DA res: "; DepResult->dump(dbgs());
1315  dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: "
1316  << (DepResult->isOrdered() ? "true" : "false")
1317  << "]\n");
1318  LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels()
1319  << "\n");
1320  }
1321 #endif
1322 
1323  if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
1324  LLVM_DEBUG(
1325  dbgs() << "TODO: Implement pred/succ dependence handling!\n");
1326 
1327  // TODO: Can we actually use the dependence info analysis here?
1328  return false;
1329  }
1330 
1332  return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
1334  dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
1336  }
1337 
1338  llvm_unreachable("Unknown fusion dependence analysis choice!");
1339  }
1340 
1341  /// Perform a dependence check and return if @p FC0 and @p FC1 can be fused.
1342  bool dependencesAllowFusion(const FusionCandidate &FC0,
1343  const FusionCandidate &FC1) {
1344  LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
1345  << "\n");
1346  assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
1347  assert(DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()));
1348 
1349  for (Instruction *WriteL0 : FC0.MemWrites) {
1350  for (Instruction *WriteL1 : FC1.MemWrites)
1351  if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
1352  /* AnyDep */ false,
1354  InvalidDependencies++;
1355  return false;
1356  }
1357  for (Instruction *ReadL1 : FC1.MemReads)
1358  if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1,
1359  /* AnyDep */ false,
1361  InvalidDependencies++;
1362  return false;
1363  }
1364  }
1365 
1366  for (Instruction *WriteL1 : FC1.MemWrites) {
1367  for (Instruction *WriteL0 : FC0.MemWrites)
1368  if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
1369  /* AnyDep */ false,
1371  InvalidDependencies++;
1372  return false;
1373  }
1374  for (Instruction *ReadL0 : FC0.MemReads)
1375  if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1,
1376  /* AnyDep */ false,
1378  InvalidDependencies++;
1379  return false;
1380  }
1381  }
1382 
1383  // Walk through all uses in FC1. For each use, find the reaching def. If the
1384  // def is located in FC0 then it is is not safe to fuse.
1385  for (BasicBlock *BB : FC1.L->blocks())
1386  for (Instruction &I : *BB)
1387  for (auto &Op : I.operands())
1388  if (Instruction *Def = dyn_cast<Instruction>(Op))
1389  if (FC0.L->contains(Def->getParent())) {
1390  InvalidDependencies++;
1391  return false;
1392  }
1393 
1394  return true;
1395  }
1396 
1397  /// Determine if two fusion candidates are adjacent in the CFG.
1398  ///
1399  /// This method will determine if there are additional basic blocks in the CFG
1400  /// between the exit of \p FC0 and the entry of \p FC1.
1401  /// If the two candidates are guarded loops, then it checks whether the
1402  /// non-loop successor of the \p FC0 guard branch is the entry block of \p
1403  /// FC1. If not, then the loops are not adjacent. If the two candidates are
1404  /// not guarded loops, then it checks whether the exit block of \p FC0 is the
1405  /// preheader of \p FC1.
1406  bool isAdjacent(const FusionCandidate &FC0,
1407  const FusionCandidate &FC1) const {
1408  // If the successor of the guard branch is FC1, then the loops are adjacent
1409  if (FC0.GuardBranch)
1410  return FC0.getNonLoopBlock() == FC1.getEntryBlock();
1411  else
1412  return FC0.ExitBlock == FC1.getEntryBlock();
1413  }
1414 
1415  bool isEmptyPreheader(const FusionCandidate &FC) const {
1416  return FC.Preheader->size() == 1;
1417  }
1418 
1419  /// Hoist \p FC1 Preheader instructions to \p FC0 Preheader
1420  /// and sink others into the body of \p FC1.
1421  void movePreheaderInsts(const FusionCandidate &FC0,
1422  const FusionCandidate &FC1,
1423  SmallVector<Instruction *, 4> &HoistInsts,
1424  SmallVector<Instruction *, 4> &SinkInsts) const {
1425  // All preheader instructions except the branch must be hoisted or sunk
1426  assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - 1 &&
1427  "Attempting to sink and hoist preheader instructions, but not all "
1428  "the preheader instructions are accounted for.");
1429 
1430  NumHoistedInsts += HoistInsts.size();
1431  NumSunkInsts += SinkInsts.size();
1432 
1434  if (!HoistInsts.empty())
1435  dbgs() << "Hoisting: \n";
1436  for (Instruction *I : HoistInsts)
1437  dbgs() << *I << "\n";
1438  if (!SinkInsts.empty())
1439  dbgs() << "Sinking: \n";
1440  for (Instruction *I : SinkInsts)
1441  dbgs() << *I << "\n";
1442  });
1443 
1444  for (Instruction *I : HoistInsts) {
1445  assert(I->getParent() == FC1.Preheader);
1446  I->moveBefore(FC0.Preheader->getTerminator());
1447  }
1448  // insert instructions in reverse order to maintain dominance relationship
1449  for (Instruction *I : reverse(SinkInsts)) {
1450  assert(I->getParent() == FC1.Preheader);
1451  I->moveBefore(&*FC1.ExitBlock->getFirstInsertionPt());
1452  }
1453  }
1454 
1455  /// Determine if two fusion candidates have identical guards
1456  ///
1457  /// This method will determine if two fusion candidates have the same guards.
1458  /// The guards are considered the same if:
1459  /// 1. The instructions to compute the condition used in the compare are
1460  /// identical.
1461  /// 2. The successors of the guard have the same flow into/around the loop.
1462  /// If the compare instructions are identical, then the first successor of the
1463  /// guard must go to the same place (either the preheader of the loop or the
1464  /// NonLoopBlock). In other words, the the first successor of both loops must
1465  /// both go into the loop (i.e., the preheader) or go around the loop (i.e.,
1466  /// the NonLoopBlock). The same must be true for the second successor.
1467  bool haveIdenticalGuards(const FusionCandidate &FC0,
1468  const FusionCandidate &FC1) const {
1469  assert(FC0.GuardBranch && FC1.GuardBranch &&
1470  "Expecting FC0 and FC1 to be guarded loops.");
1471 
1472  if (auto FC0CmpInst =
1473  dyn_cast<Instruction>(FC0.GuardBranch->getCondition()))
1474  if (auto FC1CmpInst =
1475  dyn_cast<Instruction>(FC1.GuardBranch->getCondition()))
1476  if (!FC0CmpInst->isIdenticalTo(FC1CmpInst))
1477  return false;
1478 
1479  // The compare instructions are identical.
1480  // Now make sure the successor of the guards have the same flow into/around
1481  // the loop
1482  if (FC0.GuardBranch->getSuccessor(0) == FC0.Preheader)
1483  return (FC1.GuardBranch->getSuccessor(0) == FC1.Preheader);
1484  else
1485  return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
1486  }
1487 
1488  /// Modify the latch branch of FC to be unconditional since successors of the
1489  /// branch are the same.
1490  void simplifyLatchBranch(const FusionCandidate &FC) const {
1491  BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
1492  if (FCLatchBranch) {
1493  assert(FCLatchBranch->isConditional() &&
1494  FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
1495  "Expecting the two successors of FCLatchBranch to be the same");
1496  BranchInst *NewBranch =
1497  BranchInst::Create(FCLatchBranch->getSuccessor(0));
1498  ReplaceInstWithInst(FCLatchBranch, NewBranch);
1499  }
1500  }
1501 
1502  /// Move instructions from FC0.Latch to FC1.Latch. If FC0.Latch has an unique
1503  /// successor, then merge FC0.Latch with its unique successor.
1504  void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {
1505  moveInstructionsToTheBeginning(*FC0.Latch, *FC1.Latch, DT, PDT, DI);
1506  if (BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) {
1507  MergeBlockIntoPredecessor(Succ, &DTU, &LI);
1508  DTU.flush();
1509  }
1510  }
1511 
1512  /// Fuse two fusion candidates, creating a new fused loop.
1513  ///
1514  /// This method contains the mechanics of fusing two loops, represented by \p
1515  /// FC0 and \p FC1. It is assumed that \p FC0 dominates \p FC1 and \p FC1
1516  /// postdominates \p FC0 (making them control flow equivalent). It also
1517  /// assumes that the other conditions for fusion have been met: adjacent,
1518  /// identical trip counts, and no negative distance dependencies exist that
1519  /// would prevent fusion. Thus, there is no checking for these conditions in
1520  /// this method.
1521  ///
1522  /// Fusion is performed by rewiring the CFG to update successor blocks of the
1523  /// components of tho loop. Specifically, the following changes are done:
1524  ///
1525  /// 1. The preheader of \p FC1 is removed as it is no longer necessary
1526  /// (because it is currently only a single statement block).
1527  /// 2. The latch of \p FC0 is modified to jump to the header of \p FC1.
1528  /// 3. The latch of \p FC1 i modified to jump to the header of \p FC0.
1529  /// 4. All blocks from \p FC1 are removed from FC1 and added to FC0.
1530  ///
1531  /// All of these modifications are done with dominator tree updates, thus
1532  /// keeping the dominator (and post dominator) information up-to-date.
1533  ///
1534  /// This can be improved in the future by actually merging blocks during
1535  /// fusion. For example, the preheader of \p FC1 can be merged with the
1536  /// preheader of \p FC0. This would allow loops with more than a single
1537  /// statement in the preheader to be fused. Similarly, the latch blocks of the
1538  /// two loops could also be fused into a single block. This will require
1539  /// analysis to prove it is safe to move the contents of the block past
1540  /// existing code, which currently has not been implemented.
1541  Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) {
1542  assert(FC0.isValid() && FC1.isValid() &&
1543  "Expecting valid fusion candidates");
1544 
1545  LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
1546  dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
1547 
1548  // Move instructions from the preheader of FC1 to the end of the preheader
1549  // of FC0.
1550  moveInstructionsToTheEnd(*FC1.Preheader, *FC0.Preheader, DT, PDT, DI);
1551 
1552  // Fusing guarded loops is handled slightly differently than non-guarded
1553  // loops and has been broken out into a separate method instead of trying to
1554  // intersperse the logic within a single method.
1555  if (FC0.GuardBranch)
1556  return fuseGuardedLoops(FC0, FC1);
1557 
1558  assert(FC1.Preheader ==
1559  (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
1560  assert(FC1.Preheader->size() == 1 &&
1561  FC1.Preheader->getSingleSuccessor() == FC1.Header);
1562 
1563  // Remember the phi nodes originally in the header of FC0 in order to rewire
1564  // them later. However, this is only necessary if the new loop carried
1565  // values might not dominate the exiting branch. While we do not generally
1566  // test if this is the case but simply insert intermediate phi nodes, we
1567  // need to make sure these intermediate phi nodes have different
1568  // predecessors. To this end, we filter the special case where the exiting
1569  // block is the latch block of the first loop. Nothing needs to be done
1570  // anyway as all loop carried values dominate the latch and thereby also the
1571  // exiting branch.
1572  SmallVector<PHINode *, 8> OriginalFC0PHIs;
1573  if (FC0.ExitingBlock != FC0.Latch)
1574  for (PHINode &PHI : FC0.Header->phis())
1575  OriginalFC0PHIs.push_back(&PHI);
1576 
1577  // Replace incoming blocks for header PHIs first.
1578  FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
1579  FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
1580 
1581  // Then modify the control flow and update DT and PDT.
1583 
1584  // The old exiting block of the first loop (FC0) has to jump to the header
1585  // of the second as we need to execute the code in the second header block
1586  // regardless of the trip count. That is, if the trip count is 0, so the
1587  // back edge is never taken, we still have to execute both loop headers,
1588  // especially (but not only!) if the second is a do-while style loop.
1589  // However, doing so might invalidate the phi nodes of the first loop as
1590  // the new values do only need to dominate their latch and not the exiting
1591  // predicate. To remedy this potential problem we always introduce phi
1592  // nodes in the header of the second loop later that select the loop carried
1593  // value, if the second header was reached through an old latch of the
1594  // first, or undef otherwise. This is sound as exiting the first implies the
1595  // second will exit too, __without__ taking the back-edge. [Their
1596  // trip-counts are equal after all.
1597  // KB: Would this sequence be simpler to just just make FC0.ExitingBlock go
1598  // to FC1.Header? I think this is basically what the three sequences are
1599  // trying to accomplish; however, doing this directly in the CFG may mean
1600  // the DT/PDT becomes invalid
1601  if (!FC0.Peeled) {
1602  FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
1603  FC1.Header);
1605  DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
1607  DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1608  } else {
1610  DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));
1611 
1612  // Remove the ExitBlock of the first Loop (also not needed)
1613  FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
1614  FC1.Header);
1616  DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
1617  FC0.ExitBlock->getTerminator()->eraseFromParent();
1619  DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1620  new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
1621  }
1622 
1623  // The pre-header of L1 is not necessary anymore.
1624  assert(pred_empty(FC1.Preheader));
1625  FC1.Preheader->getTerminator()->eraseFromParent();
1626  new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
1628  DominatorTree::Delete, FC1.Preheader, FC1.Header));
1629 
1630  // Moves the phi nodes from the second to the first loops header block.
1631  while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
1632  if (SE.isSCEVable(PHI->getType()))
1633  SE.forgetValue(PHI);
1634  if (PHI->hasNUsesOrMore(1))
1635  PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
1636  else
1637  PHI->eraseFromParent();
1638  }
1639 
1640  // Introduce new phi nodes in the second loop header to ensure
1641  // exiting the first and jumping to the header of the second does not break
1642  // the SSA property of the phis originally in the first loop. See also the
1643  // comment above.
1644  Instruction *L1HeaderIP = &FC1.Header->front();
1645  for (PHINode *LCPHI : OriginalFC0PHIs) {
1646  int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
1647  assert(L1LatchBBIdx >= 0 &&
1648  "Expected loop carried value to be rewired at this point!");
1649 
1650  Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
1651 
1652  PHINode *L1HeaderPHI = PHINode::Create(
1653  LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
1654  L1HeaderPHI->addIncoming(LCV, FC0.Latch);
1655  L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
1656  FC0.ExitingBlock);
1657 
1658  LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
1659  }
1660 
1661  // Replace latch terminator destinations.
1662  FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
1663  FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
1664 
1665  // Modify the latch branch of FC0 to be unconditional as both successors of
1666  // the branch are the same.
1667  simplifyLatchBranch(FC0);
1668 
1669  // If FC0.Latch and FC0.ExitingBlock are the same then we have already
1670  // performed the updates above.
1671  if (FC0.Latch != FC0.ExitingBlock)
1673  DominatorTree::Insert, FC0.Latch, FC1.Header));
1674 
1676  FC0.Latch, FC0.Header));
1678  FC1.Latch, FC0.Header));
1680  FC1.Latch, FC1.Header));
1681 
1682  // Update DT/PDT
1683  DTU.applyUpdates(TreeUpdates);
1684 
1685  LI.removeBlock(FC1.Preheader);
1686  DTU.deleteBB(FC1.Preheader);
1687  if (FC0.Peeled) {
1688  LI.removeBlock(FC0.ExitBlock);
1689  DTU.deleteBB(FC0.ExitBlock);
1690  }
1691 
1692  DTU.flush();
1693 
1694  // Is there a way to keep SE up-to-date so we don't need to forget the loops
1695  // and rebuild the information in subsequent passes of fusion?
1696  // Note: Need to forget the loops before merging the loop latches, as
1697  // mergeLatch may remove the only block in FC1.
1698  SE.forgetLoop(FC1.L);
1699  SE.forgetLoop(FC0.L);
1701 
1702  // Move instructions from FC0.Latch to FC1.Latch.
1703  // Note: mergeLatch requires an updated DT.
1704  mergeLatch(FC0, FC1);
1705 
1706  // Merge the loops.
1707  SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
1708  for (BasicBlock *BB : Blocks) {
1709  FC0.L->addBlockEntry(BB);
1710  FC1.L->removeBlockFromLoop(BB);
1711  if (LI.getLoopFor(BB) != FC1.L)
1712  continue;
1713  LI.changeLoopFor(BB, FC0.L);
1714  }
1715  while (!FC1.L->isInnermost()) {
1716  const auto &ChildLoopIt = FC1.L->begin();
1717  Loop *ChildLoop = *ChildLoopIt;
1718  FC1.L->removeChildLoop(ChildLoopIt);
1719  FC0.L->addChildLoop(ChildLoop);
1720  }
1721 
1722  // Delete the now empty loop L1.
1723  LI.erase(FC1.L);
1724 
1725 #ifndef NDEBUG
1726  assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
1728  assert(PDT.verify());
1729  LI.verify(DT);
1730  SE.verify();
1731 #endif
1732 
1733  LLVM_DEBUG(dbgs() << "Fusion done:\n");
1734 
1735  return FC0.L;
1736  }
1737 
1738  /// Report details on loop fusion opportunities.
1739  ///
1740  /// This template function can be used to report both successful and missed
1741  /// loop fusion opportunities, based on the RemarkKind. The RemarkKind should
1742  /// be one of:
1743  /// - OptimizationRemarkMissed to report when loop fusion is unsuccessful
1744  /// given two valid fusion candidates.
1745  /// - OptimizationRemark to report successful fusion of two fusion
1746  /// candidates.
1747  /// The remarks will be printed using the form:
1748  /// <path/filename>:<line number>:<column number>: [<function name>]:
1749  /// <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description>
1750  template <typename RemarkKind>
1751  void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
1752  llvm::Statistic &Stat) {
1753  assert(FC0.Preheader && FC1.Preheader &&
1754  "Expecting valid fusion candidates");
1755  using namespace ore;
1756 #if LLVM_ENABLE_STATS
1757  ++Stat;
1758  ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(),
1759  FC0.Preheader)
1760  << "[" << FC0.Preheader->getParent()->getName()
1761  << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName()))
1762  << " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))
1763  << ": " << Stat.getDesc());
1764 #endif
1765  }
1766 
1767  /// Fuse two guarded fusion candidates, creating a new fused loop.
1768  ///
1769  /// Fusing guarded loops is handled much the same way as fusing non-guarded
1770  /// loops. The rewiring of the CFG is slightly different though, because of
1771  /// the presence of the guards around the loops and the exit blocks after the
1772  /// loop body. As such, the new loop is rewired as follows:
1773  /// 1. Keep the guard branch from FC0 and use the non-loop block target
1774  /// from the FC1 guard branch.
1775  /// 2. Remove the exit block from FC0 (this exit block should be empty
1776  /// right now).
1777  /// 3. Remove the guard branch for FC1
1778  /// 4. Remove the preheader for FC1.
1779  /// The exit block successor for the latch of FC0 is updated to be the header
1780  /// of FC1 and the non-exit block successor of the latch of FC1 is updated to
1781  /// be the header of FC0, thus creating the fused loop.
1782  Loop *fuseGuardedLoops(const FusionCandidate &FC0,
1783  const FusionCandidate &FC1) {
1784  assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting guarded loops");
1785 
1786  BasicBlock *FC0GuardBlock = FC0.GuardBranch->getParent();
1787  BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
1788  BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
1789  BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
1790  BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor();
1791 
1792  // Move instructions from the exit block of FC0 to the beginning of the exit
1793  // block of FC1, in the case that the FC0 loop has not been peeled. In the
1794  // case that FC0 loop is peeled, then move the instructions of the successor
1795  // of the FC0 Exit block to the beginning of the exit block of FC1.
1797  (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,
1798  DT, PDT, DI);
1799 
1800  // Move instructions from the guard block of FC1 to the end of the guard
1801  // block of FC0.
1802  moveInstructionsToTheEnd(*FC1GuardBlock, *FC0GuardBlock, DT, PDT, DI);
1803 
1804  assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");
1805 
1807 
1808  ////////////////////////////////////////////////////////////////////////////
1809  // Update the Loop Guard
1810  ////////////////////////////////////////////////////////////////////////////
1811  // The guard for FC0 is updated to guard both FC0 and FC1. This is done by
1812  // changing the NonLoopGuardBlock for FC0 to the NonLoopGuardBlock for FC1.
1813  // Thus, one path from the guard goes to the preheader for FC0 (and thus
1814  // executes the new fused loop) and the other path goes to the NonLoopBlock
1815  // for FC1 (where FC1 guard would have gone if FC1 was not executed).
1816  FC1NonLoopBlock->replacePhiUsesWith(FC1GuardBlock, FC0GuardBlock);
1817  FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
1818 
1819  BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
1820  BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header);
1821 
1822  // The guard of FC1 is not necessary anymore.
1823  FC1.GuardBranch->eraseFromParent();
1824  new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock);
1825 
1827  DominatorTree::Delete, FC1GuardBlock, FC1.Preheader));
1829  DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock));
1831  DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock));
1833  DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
1834 
1835  if (FC0.Peeled) {
1836  // Remove the Block after the ExitBlock of FC0
1838  DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));
1839  FC0ExitBlockSuccessor->getTerminator()->eraseFromParent();
1840  new UnreachableInst(FC0ExitBlockSuccessor->getContext(),
1841  FC0ExitBlockSuccessor);
1842  }
1843 
1844  assert(pred_empty(FC1GuardBlock) &&
1845  "Expecting guard block to have no predecessors");
1846  assert(succ_empty(FC1GuardBlock) &&
1847  "Expecting guard block to have no successors");
1848 
1849  // Remember the phi nodes originally in the header of FC0 in order to rewire
1850  // them later. However, this is only necessary if the new loop carried
1851  // values might not dominate the exiting branch. While we do not generally
1852  // test if this is the case but simply insert intermediate phi nodes, we
1853  // need to make sure these intermediate phi nodes have different
1854  // predecessors. To this end, we filter the special case where the exiting
1855  // block is the latch block of the first loop. Nothing needs to be done
1856  // anyway as all loop carried values dominate the latch and thereby also the
1857  // exiting branch.
1858  // KB: This is no longer necessary because FC0.ExitingBlock == FC0.Latch
1859  // (because the loops are rotated. Thus, nothing will ever be added to
1860  // OriginalFC0PHIs.
1861  SmallVector<PHINode *, 8> OriginalFC0PHIs;
1862  if (FC0.ExitingBlock != FC0.Latch)
1863  for (PHINode &PHI : FC0.Header->phis())
1864  OriginalFC0PHIs.push_back(&PHI);
1865 
1866  assert(OriginalFC0PHIs.empty() && "Expecting OriginalFC0PHIs to be empty!");
1867 
1868  // Replace incoming blocks for header PHIs first.
1869  FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
1870  FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
1871 
1872  // The old exiting block of the first loop (FC0) has to jump to the header
1873  // of the second as we need to execute the code in the second header block
1874  // regardless of the trip count. That is, if the trip count is 0, so the
1875  // back edge is never taken, we still have to execute both loop headers,
1876  // especially (but not only!) if the second is a do-while style loop.
1877  // However, doing so might invalidate the phi nodes of the first loop as
1878  // the new values do only need to dominate their latch and not the exiting
1879  // predicate. To remedy this potential problem we always introduce phi
1880  // nodes in the header of the second loop later that select the loop carried
1881  // value, if the second header was reached through an old latch of the
1882  // first, or undef otherwise. This is sound as exiting the first implies the
1883  // second will exit too, __without__ taking the back-edge (their
1884  // trip-counts are equal after all).
1885  FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
1886  FC1.Header);
1887 
1889  DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
1891  DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1892 
1893  // Remove FC0 Exit Block
1894  // The exit block for FC0 is no longer needed since control will flow
1895  // directly to the header of FC1. Since it is an empty block, it can be
1896  // removed at this point.
1897  // TODO: In the future, we can handle non-empty exit blocks my merging any
1898  // instructions from FC0 exit block into FC1 exit block prior to removing
1899  // the block.
1900  assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty");
1901  FC0.ExitBlock->getTerminator()->eraseFromParent();
1902  new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
1903 
1904  // Remove FC1 Preheader
1905  // The pre-header of L1 is not necessary anymore.
1906  assert(pred_empty(FC1.Preheader));
1907  FC1.Preheader->getTerminator()->eraseFromParent();
1908  new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
1910  DominatorTree::Delete, FC1.Preheader, FC1.Header));
1911 
1912  // Moves the phi nodes from the second to the first loops header block.
1913  while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
1914  if (SE.isSCEVable(PHI->getType()))
1915  SE.forgetValue(PHI);
1916  if (PHI->hasNUsesOrMore(1))
1917  PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
1918  else
1919  PHI->eraseFromParent();
1920  }
1921 
1922  // Introduce new phi nodes in the second loop header to ensure
1923  // exiting the first and jumping to the header of the second does not break
1924  // the SSA property of the phis originally in the first loop. See also the
1925  // comment above.
1926  Instruction *L1HeaderIP = &FC1.Header->front();
1927  for (PHINode *LCPHI : OriginalFC0PHIs) {
1928  int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
1929  assert(L1LatchBBIdx >= 0 &&
1930  "Expected loop carried value to be rewired at this point!");
1931 
1932  Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
1933 
1934  PHINode *L1HeaderPHI = PHINode::Create(
1935  LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
1936  L1HeaderPHI->addIncoming(LCV, FC0.Latch);
1937  L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
1938  FC0.ExitingBlock);
1939 
1940  LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
1941  }
1942 
1943  // Update the latches
1944 
1945  // Replace latch terminator destinations.
1946  FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
1947  FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
1948 
1949  // Modify the latch branch of FC0 to be unconditional as both successors of
1950  // the branch are the same.
1951  simplifyLatchBranch(FC0);
1952 
1953  // If FC0.Latch and FC0.ExitingBlock are the same then we have already
1954  // performed the updates above.
1955  if (FC0.Latch != FC0.ExitingBlock)
1957  DominatorTree::Insert, FC0.Latch, FC1.Header));
1958 
1960  FC0.Latch, FC0.Header));
1962  FC1.Latch, FC0.Header));
1964  FC1.Latch, FC1.Header));
1965 
1966  // All done
1967  // Apply the updates to the Dominator Tree and cleanup.
1968 
1969  assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!");
1970  assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!");
1971 
1972  // Update DT/PDT
1973  DTU.applyUpdates(TreeUpdates);
1974 
1975  LI.removeBlock(FC1GuardBlock);
1976  LI.removeBlock(FC1.Preheader);
1977  LI.removeBlock(FC0.ExitBlock);
1978  if (FC0.Peeled) {
1979  LI.removeBlock(FC0ExitBlockSuccessor);
1980  DTU.deleteBB(FC0ExitBlockSuccessor);
1981  }
1982  DTU.deleteBB(FC1GuardBlock);
1983  DTU.deleteBB(FC1.Preheader);
1984  DTU.deleteBB(FC0.ExitBlock);
1985  DTU.flush();
1986 
1987  // Is there a way to keep SE up-to-date so we don't need to forget the loops
1988  // and rebuild the information in subsequent passes of fusion?
1989  // Note: Need to forget the loops before merging the loop latches, as
1990  // mergeLatch may remove the only block in FC1.
1991  SE.forgetLoop(FC1.L);
1992  SE.forgetLoop(FC0.L);
1994 
1995  // Move instructions from FC0.Latch to FC1.Latch.
1996  // Note: mergeLatch requires an updated DT.
1997  mergeLatch(FC0, FC1);
1998 
1999  // Merge the loops.
2000  SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
2001  for (BasicBlock *BB : Blocks) {
2002  FC0.L->addBlockEntry(BB);
2003  FC1.L->removeBlockFromLoop(BB);
2004  if (LI.getLoopFor(BB) != FC1.L)
2005  continue;
2006  LI.changeLoopFor(BB, FC0.L);
2007  }
2008  while (!FC1.L->isInnermost()) {
2009  const auto &ChildLoopIt = FC1.L->begin();
2010  Loop *ChildLoop = *ChildLoopIt;
2011  FC1.L->removeChildLoop(ChildLoopIt);
2012  FC0.L->addChildLoop(ChildLoop);
2013  }
2014 
2015  // Delete the now empty loop L1.
2016  LI.erase(FC1.L);
2017 
2018 #ifndef NDEBUG
2019  assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
2021  assert(PDT.verify());
2022  LI.verify(DT);
2023  SE.verify();
2024 #endif
2025 
2026  LLVM_DEBUG(dbgs() << "Fusion done:\n");
2027 
2028  return FC0.L;
2029  }
2030 };
2031 
2032 struct LoopFuseLegacy : public FunctionPass {
2033 
2034  static char ID;
2035 
2036  LoopFuseLegacy() : FunctionPass(ID) {
2038  }
2039 
2040  void getAnalysisUsage(AnalysisUsage &AU) const override {
2050 
2055  }
2056 
2057  bool runOnFunction(Function &F) override {
2058  if (skipFunction(F))
2059  return false;
2060 
2061  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2062  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2063  auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
2064  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2065  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2066  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
2067  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2068  const TargetTransformInfo &TTI =
2069  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2070  const DataLayout &DL = F.getParent()->getDataLayout();
2071 
2072  LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
2073  return LF.fuseLoops(F);
2074  }
2075 };
2076 } // namespace
2077 
2079  auto &LI = AM.getResult<LoopAnalysis>(F);
2080  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
2081  auto &DI = AM.getResult<DependenceAnalysis>(F);
2082  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
2083  auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
2085  auto &AC = AM.getResult<AssumptionAnalysis>(F);
2087  const DataLayout &DL = F.getParent()->getDataLayout();
2088 
2089  // Ensure loops are in simplifed form which is a pre-requisite for loop fusion
2090  // pass. Added only for new PM since the legacy PM has already added
2091  // LoopSimplify pass as a dependency.
2092  bool Changed = false;
2093  for (auto &L : LI) {
2094  Changed |=
2095  simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
2096  }
2097  if (Changed)
2098  PDT.recalculate(F);
2099 
2100  LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
2101  Changed |= LF.fuseLoops(F);
2102  if (!Changed)
2103  return PreservedAnalyses::all();
2104 
2105  PreservedAnalyses PA;
2109  PA.preserve<LoopAnalysis>();
2110  return PA;
2111 }
2112 
2113 char LoopFuseLegacy::ID = 0;
2114 
2115 INITIALIZE_PASS_BEGIN(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false,
2116  false)
2125 INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
2126 
2127 FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::ScalarEvolution::hasLoopInvariantBackedgeTakenCount
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
Definition: ScalarEvolution.cpp:13345
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2586
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2158
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LoopSimplify.h
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::DomTreeUpdater::deleteBB
void deleteBB(BasicBlock *DelBB)
Delete DelBB.
Definition: DomTreeUpdater.cpp:177
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:107
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Scalar.h
llvm::DomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Definition: DomTreeUpdater.cpp:230
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:1001
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Pass.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:699
Statistic.h
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3611
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
DomTreeUpdater.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
R600_InstFlag::FC
@ FC
Definition: R600Defines.h:32
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1030
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::LoopInfoBase::print
void print(raw_ostream &OS) const
Definition: LoopInfoImpl.h:630
llvm::verifyFunction
bool verifyFunction(const Function &F, raw_ostream *OS=nullptr)
Check a function for errors, useful for use when debugging a pass.
Definition: Verifier.cpp:6319
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:747
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1793
ScalarEvolution.h
llvm::LoopInfoBase::removeBlock
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
Definition: LoopInfo.h:1057
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::Optional< unsigned >
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:171
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
VerboseFusionDebugging
static cl::opt< bool > VerboseFusionDebugging("loop-fusion-verbose-debug", cl::desc("Enable verbose debugging for Loop Fusion"), cl::Hidden, cl::init(false))
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::canPeel
bool canPeel(const Loop *L)
Definition: LoopPeel.cpp:84
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
llvm::succ_empty
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
llvm::isControlFlowEquivalent
bool isControlFlowEquivalent(const Instruction &I0, const Instruction &I1, const DominatorTree &DT, const PostDominatorTree &PDT)
Return true if I0 and I1 are control flow equivalent.
Definition: CodeMoverUtils.cpp:229
llvm::SCEVNAryExpr::operands
op_range operands() const
Definition: ScalarEvolutionExpressions.h:211
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
FusionPeelMaxCount
static cl::opt< unsigned > FusionPeelMaxCount("loop-fusion-peel-max-count", cl::init(0), cl::Hidden, cl::desc("Max number of iterations to be peeled from a loop, such that " "fusion can take place"))
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
PostDominators.h
llvm::User
Definition: User.h:44
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::DominatorTreeBase< BasicBlock, false >::UpdateType
cfg::Update< NodePtr > UpdateType
Definition: GenericDomTree.h:240
SI
@ SI
Definition: SIInstrInfo.cpp:7882
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ReplaceInstWithInst
void ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Definition: BasicBlockUtils.cpp:559
llvm::LoopBase::end
iterator end() const
Definition: LoopInfo.h:172
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
false
Definition: StackSlotColoring.cpp:141
llvm::printLoop
void printLoop(Loop &L, raw_ostream &OS, const std::string &Banner="")
Function to print a loop's contents as LLVM's text IR assembly.
Definition: LoopInfo.cpp:977
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:10757
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::SCEVExprContains
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Definition: ScalarEvolutionExpressions.h:730
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:291
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
FUSION_DEPENDENCE_ANALYSIS_SCEV
@ FUSION_DEPENDENCE_ANALYSIS_SCEV
Definition: LoopFuse.cpp:109
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2188
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopFuse.cpp:74
llvm::LoopBase::getExitingBlock
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:48
Utils.h
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:891
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
LoopFuse.h
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:8016
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:291
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:81
llvm::moveInstructionsToTheEnd
void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB, DominatorTree &DT, const PostDominatorTree &PDT, DependenceInfo &DI)
Move instructions, in an order-preserving manner, from FromBB to the end of ToBB when proven safe.
Definition: CodeMoverUtils.cpp:422
llvm::ScalarEvolution::verify
void verify() const
Definition: ScalarEvolution.cpp:13893
llvm::createLoopFusePass
FunctionPass * createLoopFusePass()
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2642
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
llvm::SCEVNAryExpr::op_end
op_iterator op_end() const
Definition: ScalarEvolutionExpressions.h:210
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2849
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3190
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1843
llvm::SCEVRewriteVisitor
This visitor recursively visits a SCEV expression and re-writes it.
Definition: ScalarEvolutionExpressions.h:757
llvm::ScalarEvolution::forgetValue
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Definition: ScalarEvolution.cpp:8432
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
llvm::initializeLoopFuseLegacyPass
void initializeLoopFuseLegacyPass(PassRegistry &)
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:168
llvm::LoopInfo::erase
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:876
llvm::peelLoop
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
Definition: LoopPeel.cpp:839
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1690
FUSION_DEPENDENCE_ANALYSIS_ALL
@ FUSION_DEPENDENCE_ANALYSIS_ALL
Definition: LoopFuse.cpp:111
llvm::ScalarEvolution::forgetLoopDispositions
void forgetLoopDispositions()
Called when the client has changed the disposition of values in this loop.
Definition: ScalarEvolution.cpp:8459
llvm::DominatorTreeBase::recalculate
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Definition: GenericDomTree.h:778
llvm::SCEVNAryExpr::getNoWrapFlags
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Definition: ScalarEvolutionExpressions.h:213
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:179
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::DependenceAnalysis
AnalysisPass to compute dependence information in a function.
Definition: DependenceAnalysis.h:976
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
llvm::moveInstructionsToTheBeginning
void moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB, DominatorTree &DT, const PostDominatorTree &PDT, DependenceInfo &DI)
Move instructions, in an order-preserving manner, from FromBB to the beginning of ToBB when proven sa...
Definition: CodeMoverUtils.cpp:409
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
CodeMoverUtils.h
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
reportInvalidCandidate
static bool reportInvalidCandidate(const Instruction &I, llvm::Statistic &Stat)
Definition: CodeMoverUtils.cpp:267
llvm::TrackingStatistic
Definition: Statistic.h:50
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:780
llvm::DomTreeUpdater::flush
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Definition: DomTreeUpdater.cpp:73
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::User::replaceUsesOfWith
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
llvm::ScalarEvolution::isKnownPositive
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Definition: ScalarEvolution.cpp:10678
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
llvm::LoopFusePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopFuse.cpp:2078
llvm::BasicBlock::replacePhiUsesWith
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: BasicBlock.cpp:471
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::LoopSimplifyID
char & LoopSimplifyID
Definition: LoopSimplify.cpp:789
Verifier.h
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:8372
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false) FunctionPass *llvm
Definition: LoopFuse.cpp:2115
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2741
llvm::LoopBase::isInvalid
bool isInvalid() const
Return true if this loop is no longer valid.
Definition: LoopInfo.h:232
llvm::SCEVNAryExpr::op_begin
op_iterator op_begin() const
Definition: ScalarEvolutionExpressions.h:209
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
llvm::LoopInfoBase::rbegin
reverse_iterator rbegin() const
Definition: LoopInfo.h:969
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4328
verify
ppc ctr loops verify
Definition: PPCCTRLoopsVerify.cpp:76
FusionDependenceAnalysis
static cl::opt< FusionDependenceAnalysisChoice > FusionDependenceAnalysis("loop-fusion-dependence-analysis", cl::desc("Which dependence analysis should loop fusion use?"), cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev", "Use the scalar evolution interface"), clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da", "Use the dependence analysis interface"), clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all", "Use all available analyses")), cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL))
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:708
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:748
ScalarEvolutionExpressions.h
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3590
llvm::LoopInfoBase::rend
reverse_iterator rend() const
Definition: LoopInfo.h:970
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:184
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
FusionDependenceAnalysisChoice
FusionDependenceAnalysisChoice
Definition: LoopFuse.cpp:108
llvm::PHINode
Definition: Instructions.h:2699
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::LoopInfoBase::empty
bool empty() const
Definition: LoopInfo.h:971
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5361
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::AnalysisUsage::addRequiredID
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:279
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4771
llvm::PostDominatorTreeAnalysis
Analysis pass which computes a PostDominatorTree.
Definition: PostDominators.h:47
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
llvm::isSafeToMoveBefore
bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, DominatorTree &DT, const PostDominatorTree *PDT=nullptr, DependenceInfo *DI=nullptr, bool CheckForEntireBlock=false)
Return true if I can be safely moved before InsertPoint.
Definition: CodeMoverUtils.cpp:310
llvm::Loop::isRotatedForm
bool isRotatedForm() const
Return true if the loop is in rotated form.
Definition: LoopInfo.h:810
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3134
raw_ostream.h
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:8242
FUSION_DEPENDENCE_ANALYSIS_DA
@ FUSION_DEPENDENCE_ANALYSIS_DA
Definition: LoopFuse.cpp:110
LoopPeel.h
BasicBlockUtils.h
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:9621
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:796
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3213
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3227
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941