LLVM  4.0.0
LoopRotation.cpp
Go to the documentation of this file.
1 //===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements Loop Rotation Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/LoopPass.h"
27 #include "llvm/IR/CFG.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Module.h"
33 #include "llvm/Support/Debug.h"
35 #include "llvm/Transforms/Scalar.h"
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "loop-rotate"
45 
47  "rotation-max-header-size", cl::init(16), cl::Hidden,
48  cl::desc("The default maximum header size for automatic loop rotation"));
49 
50 STATISTIC(NumRotated, "Number of loops rotated");
51 
52 namespace {
53 /// A simple loop rotation transformation.
54 class LoopRotate {
55  const unsigned MaxHeaderSize;
56  LoopInfo *LI;
57  const TargetTransformInfo *TTI;
58  AssumptionCache *AC;
59  DominatorTree *DT;
60  ScalarEvolution *SE;
61 
62 public:
63  LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
64  const TargetTransformInfo *TTI, AssumptionCache *AC,
66  : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE) {
67  }
68  bool processLoop(Loop *L);
69 
70 private:
71  bool rotateLoop(Loop *L, bool SimplifiedLatch);
72  bool simplifyLoopLatch(Loop *L);
73 };
74 } // end anonymous namespace
75 
76 /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
77 /// old header into the preheader. If there were uses of the values produced by
78 /// these instruction that were outside of the loop, we have to insert PHI nodes
79 /// to merge the two values. Do this now.
81  BasicBlock *OrigPreheader,
83  // Remove PHI node entries that are no longer live.
84  BasicBlock::iterator I, E = OrigHeader->end();
85  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
86  PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
87 
88  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
89  // as necessary.
91  for (I = OrigHeader->begin(); I != E; ++I) {
92  Value *OrigHeaderVal = &*I;
93 
94  // If there are no uses of the value (e.g. because it returns void), there
95  // is nothing to rewrite.
96  if (OrigHeaderVal->use_empty())
97  continue;
98 
99  Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
100 
101  // The value now exits in two versions: the initial value in the preheader
102  // and the loop "next" value in the original header.
103  SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
104  SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
105  SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
106 
107  // Visit each use of the OrigHeader instruction.
108  for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
109  UE = OrigHeaderVal->use_end();
110  UI != UE;) {
111  // Grab the use before incrementing the iterator.
112  Use &U = *UI;
113 
114  // Increment the iterator before removing the use from the list.
115  ++UI;
116 
117  // SSAUpdater can't handle a non-PHI use in the same block as an
118  // earlier def. We can easily handle those cases manually.
119  Instruction *UserInst = cast<Instruction>(U.getUser());
120  if (!isa<PHINode>(UserInst)) {
121  BasicBlock *UserBB = UserInst->getParent();
122 
123  // The original users in the OrigHeader are already using the
124  // original definitions.
125  if (UserBB == OrigHeader)
126  continue;
127 
128  // Users in the OrigPreHeader need to use the value to which the
129  // original definitions are mapped.
130  if (UserBB == OrigPreheader) {
131  U = OrigPreHeaderVal;
132  continue;
133  }
134  }
135 
136  // Anything else can be handled by SSAUpdater.
137  SSA.RewriteUse(U);
138  }
139 
140  // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
141  // intrinsics.
142  LLVMContext &C = OrigHeader->getContext();
143  if (auto *VAM = ValueAsMetadata::getIfExists(OrigHeaderVal)) {
144  if (auto *MAV = MetadataAsValue::getIfExists(C, VAM)) {
145  for (auto UI = MAV->use_begin(), E = MAV->use_end(); UI != E;) {
146  // Grab the use before incrementing the iterator. Otherwise, altering
147  // the Use will invalidate the iterator.
148  Use &U = *UI++;
150  if (!UserInst)
151  continue;
152 
153  // The original users in the OrigHeader are already using the original
154  // definitions.
155  BasicBlock *UserBB = UserInst->getParent();
156  if (UserBB == OrigHeader)
157  continue;
158 
159  // Users in the OrigPreHeader need to use the value to which the
160  // original definitions are mapped and anything else can be handled by
161  // the SSAUpdater. To avoid adding PHINodes, check if the value is
162  // available in UserBB, if not substitute undef.
163  Value *NewVal;
164  if (UserBB == OrigPreheader)
165  NewVal = OrigPreHeaderVal;
166  else if (SSA.HasValueForBlock(UserBB))
167  NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
168  else
169  NewVal = UndefValue::get(OrigHeaderVal->getType());
171  }
172  }
173  }
174  }
175 }
176 
177 /// Rotate loop LP. Return true if the loop is rotated.
178 ///
179 /// \param SimplifiedLatch is true if the latch was just folded into the final
180 /// loop exit. In this case we may want to rotate even though the new latch is
181 /// now an exiting branch. This rotation would have happened had the latch not
182 /// been simplified. However, if SimplifiedLatch is false, then we avoid
183 /// rotating loops in which the latch exits to avoid excessive or endless
184 /// rotation. LoopRotate should be repeatable and converge to a canonical
185 /// form. This property is satisfied because simplifying the loop latch can only
186 /// happen once across multiple invocations of the LoopRotate pass.
187 bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
188  // If the loop has only one block then there is not much to rotate.
189  if (L->getBlocks().size() == 1)
190  return false;
191 
192  BasicBlock *OrigHeader = L->getHeader();
193  BasicBlock *OrigLatch = L->getLoopLatch();
194 
195  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
196  if (!BI || BI->isUnconditional())
197  return false;
198 
199  // If the loop header is not one of the loop exiting blocks then
200  // either this loop is already rotated or it is not
201  // suitable for loop rotation transformations.
202  if (!L->isLoopExiting(OrigHeader))
203  return false;
204 
205  // If the loop latch already contains a branch that leaves the loop then the
206  // loop is already rotated.
207  if (!OrigLatch)
208  return false;
209 
210  // Rotate if either the loop latch does *not* exit the loop, or if the loop
211  // latch was just simplified.
212  if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
213  return false;
214 
215  // Check size of original header and reject loop if it is very big or we can't
216  // duplicate blocks inside it.
217  {
219  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
220 
222  Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
223  if (Metrics.notDuplicatable) {
224  DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
225  << " instructions: ";
226  L->dump());
227  return false;
228  }
229  if (Metrics.convergent) {
230  DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
231  "instructions: ";
232  L->dump());
233  return false;
234  }
235  if (Metrics.NumInsts > MaxHeaderSize)
236  return false;
237  }
238 
239  // Now, this loop is suitable for rotation.
240  BasicBlock *OrigPreheader = L->getLoopPreheader();
241 
242  // If the loop could not be converted to canonical form, it must have an
243  // indirectbr in it, just give up.
244  if (!OrigPreheader)
245  return false;
246 
247  // Anything ScalarEvolution may know about this loop or the PHI nodes
248  // in its header will soon be invalidated.
249  if (SE)
250  SE->forgetLoop(L);
251 
252  DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
253 
254  // Find new Loop header. NewHeader is a Header's one and only successor
255  // that is inside loop. Header's other successor is outside the
256  // loop. Otherwise loop is not suitable for rotation.
257  BasicBlock *Exit = BI->getSuccessor(0);
258  BasicBlock *NewHeader = BI->getSuccessor(1);
259  if (L->contains(Exit))
260  std::swap(Exit, NewHeader);
261  assert(NewHeader && "Unable to determine new loop header");
262  assert(L->contains(NewHeader) && !L->contains(Exit) &&
263  "Unable to determine loop header and exit blocks");
264 
265  // This code assumes that the new header has exactly one predecessor.
266  // Remove any single-entry PHI nodes in it.
267  assert(NewHeader->getSinglePredecessor() &&
268  "New header doesn't have one pred!");
269  FoldSingleEntryPHINodes(NewHeader);
270 
271  // Begin by walking OrigHeader and populating ValueMap with an entry for
272  // each Instruction.
273  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
275 
276  // For PHI nodes, the value available in OldPreHeader is just the
277  // incoming value from OldPreHeader.
278  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
279  ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
280 
281  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
282 
283  // For the rest of the instructions, either hoist to the OrigPreheader if
284  // possible or create a clone in the OldPreHeader if not.
285  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
286  while (I != E) {
287  Instruction *Inst = &*I++;
288 
289  // If the instruction's operands are invariant and it doesn't read or write
290  // memory, then it is safe to hoist. Doing this doesn't change the order of
291  // execution in the preheader, but does prevent the instruction from
292  // executing in each iteration of the loop. This means it is safe to hoist
293  // something that might trap, but isn't safe to hoist something that reads
294  // memory (without proving that the loop doesn't write).
295  if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
296  !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) &&
297  !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
298  Inst->moveBefore(LoopEntryBranch);
299  continue;
300  }
301 
302  // Otherwise, create a duplicate of the instruction.
303  Instruction *C = Inst->clone();
304 
305  // Eagerly remap the operands of the instruction.
306  RemapInstruction(C, ValueMap,
308 
309  // With the operands remapped, see if the instruction constant folds or is
310  // otherwise simplifyable. This commonly occurs because the entry from PHI
311  // nodes allows icmps and other instructions to fold.
312  // FIXME: Provide TLI, DT, AC to SimplifyInstruction.
313  Value *V = SimplifyInstruction(C, DL);
314  if (V && LI->replacementPreservesLCSSAForm(C, V)) {
315  // If so, then delete the temporary instruction and stick the folded value
316  // in the map.
317  ValueMap[Inst] = V;
318  if (!C->mayHaveSideEffects()) {
319  delete C;
320  C = nullptr;
321  }
322  } else {
323  ValueMap[Inst] = C;
324  }
325  if (C) {
326  // Otherwise, stick the new instruction into the new block!
327  C->setName(Inst->getName());
328  C->insertBefore(LoopEntryBranch);
329 
330  if (auto *II = dyn_cast<IntrinsicInst>(C))
331  if (II->getIntrinsicID() == Intrinsic::assume)
332  AC->registerAssumption(II);
333  }
334  }
335 
336  // Along with all the other instructions, we just cloned OrigHeader's
337  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
338  // successors by duplicating their incoming values for OrigHeader.
339  TerminatorInst *TI = OrigHeader->getTerminator();
340  for (BasicBlock *SuccBB : TI->successors())
341  for (BasicBlock::iterator BI = SuccBB->begin();
342  PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
343  PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
344 
345  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
346  // OrigPreHeader's old terminator (the original branch into the loop), and
347  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
348  LoopEntryBranch->eraseFromParent();
349 
350  // If there were any uses of instructions in the duplicated block outside the
351  // loop, update them, inserting PHI nodes as required
352  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
353 
354  // NewHeader is now the header of the loop.
355  L->moveToHeader(NewHeader);
356  assert(L->getHeader() == NewHeader && "Latch block is our new header");
357 
358  // At this point, we've finished our major CFG changes. As part of cloning
359  // the loop into the preheader we've simplified instructions and the
360  // duplicated conditional branch may now be branching on a constant. If it is
361  // branching on a constant and if that constant means that we enter the loop,
362  // then we fold away the cond branch to an uncond branch. This simplifies the
363  // loop in cases important for nested loops, and it also means we don't have
364  // to split as many edges.
365  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
366  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
367  if (!isa<ConstantInt>(PHBI->getCondition()) ||
368  PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
369  NewHeader) {
370  // The conditional branch can't be folded, handle the general case.
371  // Update DominatorTree to reflect the CFG change we just made. Then split
372  // edges as necessary to preserve LoopSimplify form.
373  if (DT) {
374  // Everything that was dominated by the old loop header is now dominated
375  // by the original loop preheader. Conceptually the header was merged
376  // into the preheader, even though we reuse the actual block as a new
377  // loop latch.
378  DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
379  SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
380  OrigHeaderNode->end());
381  DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
382  for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
383  DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
384 
385  assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
386  assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
387 
388  // Update OrigHeader to be dominated by the new header block.
389  DT->changeImmediateDominator(OrigHeader, OrigLatch);
390  }
391 
392  // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
393  // thus is not a preheader anymore.
394  // Split the edge to form a real preheader.
395  BasicBlock *NewPH = SplitCriticalEdge(
396  OrigPreheader, NewHeader,
397  CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
398  NewPH->setName(NewHeader->getName() + ".lr.ph");
399 
400  // Preserve canonical loop form, which means that 'Exit' should have only
401  // one predecessor. Note that Exit could be an exit block for multiple
402  // nested loops, causing both of the edges to now be critical and need to
403  // be split.
404  SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
405  bool SplitLatchEdge = false;
406  for (BasicBlock *ExitPred : ExitPreds) {
407  // We only need to split loop exit edges.
408  Loop *PredLoop = LI->getLoopFor(ExitPred);
409  if (!PredLoop || PredLoop->contains(Exit))
410  continue;
411  if (isa<IndirectBrInst>(ExitPred->getTerminator()))
412  continue;
413  SplitLatchEdge |= L->getLoopLatch() == ExitPred;
414  BasicBlock *ExitSplit = SplitCriticalEdge(
415  ExitPred, Exit,
416  CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
417  ExitSplit->moveBefore(Exit);
418  }
419  assert(SplitLatchEdge &&
420  "Despite splitting all preds, failed to split latch exit?");
421  } else {
422  // We can fold the conditional branch in the preheader, this makes things
423  // simpler. The first step is to remove the extra edge to the Exit block.
424  Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
425  BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
426  NewBI->setDebugLoc(PHBI->getDebugLoc());
427  PHBI->eraseFromParent();
428 
429  // With our CFG finalized, update DomTree if it is available.
430  if (DT) {
431  // Update OrigHeader to be dominated by the new header block.
432  DT->changeImmediateDominator(NewHeader, OrigPreheader);
433  DT->changeImmediateDominator(OrigHeader, OrigLatch);
434 
435  // Brute force incremental dominator tree update. Call
436  // findNearestCommonDominator on all CFG predecessors of each child of the
437  // original header.
438  DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
439  SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
440  OrigHeaderNode->end());
441  bool Changed;
442  do {
443  Changed = false;
444  for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) {
445  DomTreeNode *Node = HeaderChildren[I];
446  BasicBlock *BB = Node->getBlock();
447 
448  pred_iterator PI = pred_begin(BB);
449  BasicBlock *NearestDom = *PI;
450  for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
451  NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
452 
453  // Remember if this changes the DomTree.
454  if (Node->getIDom()->getBlock() != NearestDom) {
455  DT->changeImmediateDominator(BB, NearestDom);
456  Changed = true;
457  }
458  }
459 
460  // If the dominator changed, this may have an effect on other
461  // predecessors, continue until we reach a fixpoint.
462  } while (Changed);
463  }
464  }
465 
466  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
467  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
468 
469  // Now that the CFG and DomTree are in a consistent state again, try to merge
470  // the OrigHeader block into OrigLatch. This will succeed if they are
471  // connected by an unconditional branch. This is just a cleanup so the
472  // emitted code isn't too gross in this common case.
473  MergeBlockIntoPredecessor(OrigHeader, DT, LI);
474 
475  DEBUG(dbgs() << "LoopRotation: into "; L->dump());
476 
477  ++NumRotated;
478  return true;
479 }
480 
481 /// Determine whether the instructions in this range may be safely and cheaply
482 /// speculated. This is not an important enough situation to develop complex
483 /// heuristics. We handle a single arithmetic instruction along with any type
484 /// conversions.
487  bool seenIncrement = false;
488  bool MultiExitLoop = false;
489 
490  if (!L->getExitingBlock())
491  MultiExitLoop = true;
492 
493  for (BasicBlock::iterator I = Begin; I != End; ++I) {
494 
496  return false;
497 
498  if (isa<DbgInfoIntrinsic>(I))
499  continue;
500 
501  switch (I->getOpcode()) {
502  default:
503  return false;
504  case Instruction::GetElementPtr:
505  // GEPs are cheap if all indices are constant.
506  if (!cast<GEPOperator>(I)->hasAllConstantIndices())
507  return false;
508  // fall-thru to increment case
510  case Instruction::Add:
511  case Instruction::Sub:
512  case Instruction::And:
513  case Instruction::Or:
514  case Instruction::Xor:
515  case Instruction::Shl:
516  case Instruction::LShr:
517  case Instruction::AShr: {
518  Value *IVOpnd =
519  !isa<Constant>(I->getOperand(0))
520  ? I->getOperand(0)
521  : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr;
522  if (!IVOpnd)
523  return false;
524 
525  // If increment operand is used outside of the loop, this speculation
526  // could cause extra live range interference.
527  if (MultiExitLoop) {
528  for (User *UseI : IVOpnd->users()) {
529  auto *UserInst = cast<Instruction>(UseI);
530  if (!L->contains(UserInst))
531  return false;
532  }
533  }
534 
535  if (seenIncrement)
536  return false;
537  seenIncrement = true;
538  break;
539  }
540  case Instruction::Trunc:
541  case Instruction::ZExt:
542  case Instruction::SExt:
543  // ignore type conversions
544  break;
545  }
546  }
547  return true;
548 }
549 
550 /// Fold the loop tail into the loop exit by speculating the loop tail
551 /// instructions. Typically, this is a single post-increment. In the case of a
552 /// simple 2-block loop, hoisting the increment can be much better than
553 /// duplicating the entire loop header. In the case of loops with early exits,
554 /// rotation will not work anyway, but simplifyLoopLatch will put the loop in
555 /// canonical form so downstream passes can handle it.
556 ///
557 /// I don't believe this invalidates SCEV.
558 bool LoopRotate::simplifyLoopLatch(Loop *L) {
559  BasicBlock *Latch = L->getLoopLatch();
560  if (!Latch || Latch->hasAddressTaken())
561  return false;
562 
563  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
564  if (!Jmp || !Jmp->isUnconditional())
565  return false;
566 
567  BasicBlock *LastExit = Latch->getSinglePredecessor();
568  if (!LastExit || !L->isLoopExiting(LastExit))
569  return false;
570 
571  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
572  if (!BI)
573  return false;
574 
575  if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
576  return false;
577 
578  DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
579  << LastExit->getName() << "\n");
580 
581  // Hoist the instructions from Latch into LastExit.
582  LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
583  Latch->begin(), Jmp->getIterator());
584 
585  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
586  BasicBlock *Header = Jmp->getSuccessor(0);
587  assert(Header == L->getHeader() && "expected a backward branch");
588 
589  // Remove Latch from the CFG so that LastExit becomes the new Latch.
590  BI->setSuccessor(FallThruPath, Header);
591  Latch->replaceSuccessorsPhiUsesWith(LastExit);
592  Jmp->eraseFromParent();
593 
594  // Nuke the Latch block.
595  assert(Latch->empty() && "unable to evacuate Latch");
596  LI->removeBlock(Latch);
597  if (DT)
598  DT->eraseNode(Latch);
599  Latch->eraseFromParent();
600  return true;
601 }
602 
603 /// Rotate \c L, and return true if any modification was made.
604 bool LoopRotate::processLoop(Loop *L) {
605  // Save the loop metadata.
606  MDNode *LoopMD = L->getLoopID();
607 
608  // Simplify the loop latch before attempting to rotate the header
609  // upward. Rotation may not be needed if the loop tail can be folded into the
610  // loop exit.
611  bool SimplifiedLatch = simplifyLoopLatch(L);
612 
613  bool MadeChange = rotateLoop(L, SimplifiedLatch);
614  assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
615  "Loop latch should be exiting after loop-rotate.");
616 
617  // Restore the loop metadata.
618  // NB! We presume LoopRotation DOESN'T ADD its own metadata.
619  if ((MadeChange || SimplifiedLatch) && LoopMD)
620  L->setLoopID(LoopMD);
621 
622  return MadeChange;
623 }
624 
625 LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication)
626  : EnableHeaderDuplication(EnableHeaderDuplication) {}
627 
630  LPMUpdater &) {
631  int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
632  LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE);
633 
634  bool Changed = LR.processLoop(&L);
635  if (!Changed)
636  return PreservedAnalyses::all();
638 }
639 
640 namespace {
641 
642 class LoopRotateLegacyPass : public LoopPass {
643  unsigned MaxHeaderSize;
644 
645 public:
646  static char ID; // Pass ID, replacement for typeid
647  LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
649  if (SpecifiedMaxHeaderSize == -1)
650  MaxHeaderSize = DefaultRotationThreshold;
651  else
652  MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
653  }
654 
655  // LCSSA form makes instruction renaming easier.
656  void getAnalysisUsage(AnalysisUsage &AU) const override {
660  }
661 
662  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
663  if (skipLoop(L))
664  return false;
665  Function &F = *L->getHeader()->getParent();
666 
667  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
668  const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
669  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
670  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
671  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
672  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
673  auto *SE = SEWP ? &SEWP->getSE() : nullptr;
674  LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE);
675  return LR.processLoop(L);
676  }
677 };
678 }
679 
680 char LoopRotateLegacyPass::ID = 0;
681 INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops",
682  false, false)
686 INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
687  false)
688 
689 Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
690  return new LoopRotateLegacyPass(MaxHeaderSize);
691 }
MachineLoop * L
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:76
use_iterator use_end()
Definition: Value.h:318
use_iterator_impl< Use > use_iterator
Definition: Value.h:304
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
STATISTIC(NumFunctions,"Total number of functions")
loop Rotate false
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
This is the interface for a simple mod/ref and alias analysis over globals.
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:57
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:45
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:167
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value...
Definition: SSAUpdater.cpp:58
The main scalar evolution driver.
An immutable pass that tracks lazily created AssumptionCache objects.
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:450
A cache of .assume calls within a function.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:160
Metadata node.
Definition: Metadata.h:830
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:61
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:54
const std::vector< BlockT * > & getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:139
BlockT * getHeader() const
Definition: LoopInfo.h:102
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
DomTreeNodeBase< NodeT > * getIDom() const
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
bool isUnconditional() const
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
This is the interface for a SCEV-based alias analysis.
Option class for critical edge splitting.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:308
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:257
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following: ...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
#define F(x, y, z)
Definition: MD5.cpp:51
bool mayReadFromMemory() const
Return true if this instruction may read memory.
bool empty() const
Definition: BasicBlock.h:239
BasicBlock * getSuccessor(unsigned i) const
Memory SSA
Definition: MemorySSA.cpp:55
Base class for the actual dominator tree node.
static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, BasicBlock::iterator End, Loop *L)
Determine whether the instructions in this range may be safely and cheaply speculated.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
NodeT * getBlock() const
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:62
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:74
void replaceSuccessorsPhiUsesWith(BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of to it...
Definition: BasicBlock.cpp:414
BasicBlock * SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions())
If this edge is a critical edge, insert a new node to split the critical edge.
succ_range successors()
Definition: InstrTypes.h:280
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block...
Definition: SSAUpdater.cpp:86
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:52
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:256
void dump() const
Definition: LoopInfo.cpp:408
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
static ValueAsMetadata * getIfExists(Value *V)
Definition: Metadata.cpp:328
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:82
static MetadataAsValue * getIfExists(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:82
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, BasicBlock *OrigPreheader, ValueToValueMapTy &ValueMap)
RewriteUsesOfClonedInstructions - We just cloned the instructions from the old header into the prehea...
Conditional or Unconditional Branch instruction.
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1947
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1952
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:116
INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass,"loop-rotate","Rotate Loops", false, false) INITIALIZE_PASS_END(LoopRotateLegacyPass
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:212
Represent the analysis usage information of a pass.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:342
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:249
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
static const unsigned End
User * getUser() const
Returns the User that contains this Use.
Definition: Use.cpp:41
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:52
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:119
self_iterator getIterator()
Definition: ilist_node.h:81
bool HasValueForBlock(BasicBlock *BB) const
Return true if the SSAUpdater already has a value for the specified block.
Definition: SSAUpdater.cpp:54
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:113
void initializeLoopRotateLegacyPassPass(PassRegistry &)
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:246
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Iterator for intrusive lists based on ilist_node.
machine trace Machine Trace Metrics
loop Rotate Loops
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:230
loop rotate
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:309
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
This is the common base class for debug info intrinsics.
Definition: IntrinsicInst.h:67
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
Value * getIncomingValueForBlock(const BasicBlock *BB) const
iterator_range< user_iterator > users()
Definition: Value.h:370
BasicBlock * getSinglePredecessor()
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:226
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM...
Definition: ValueMapper.h:243
void FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
If this flag is set, the remapper ignores missing function-local entries (Argument, Instruction, BasicBlock) that are not in the value map.
Definition: ValueMapper.h:80
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
use_iterator use_begin()
Definition: Value.h:310
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
bool MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemoryDependenceResults *MemDep=nullptr)
Attempts to merge a block into its predecessor, if possible.
void moveToHeader(BlockT *BB)
This method is used to move BB (which must be part of this loop) to be the loop header of the loop (t...
Definition: LoopInfo.h:316
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:97
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:938
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
LoopRotatePass(bool EnableHeaderDuplication=true)
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues)
Add information about a block to the current state.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
bool use_empty() const
Definition: Value.h:299
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM Value Representation.
Definition: Value.h:71
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:95
#define DEBUG(X)
Definition: Debug.h:100
This is the interface for LLVM's primary stateless and local alias analysis.
A container for analyses that lazily runs them and caches their results.
Value * SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
See if we can compute a simplified version of this instruction.
This pass exposes codegen information to IR-level passes.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:178
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:103
const BasicBlock * getParent() const
Definition: Instruction.h:62
Pass * createLoopRotatePass(int MaxHeaderSize=-1)
static cl::opt< unsigned > DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, cl::desc("The default maximum header size for automatic loop rotation"))