LLVM  6.0.0svn
MachineLICM.cpp
Go to the documentation of this file.
1 //===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs loop invariant code motion on machine instructions. We
11 // attempt to remove as much code from the body of a loop as possible.
12 //
13 // This pass is not intended to be a replacement or a complete alternative
14 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
15 // constructs that are not exposed before lowering and instruction selection.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/MC/MCInstrDesc.h"
44 #include "llvm/MC/MCRegisterInfo.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <limits>
53 #include <vector>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "machinelicm"
58 
59 static cl::opt<bool>
60 AvoidSpeculation("avoid-speculation",
61  cl::desc("MachineLICM should avoid speculation"),
62  cl::init(true), cl::Hidden);
63 
64 static cl::opt<bool>
65 HoistCheapInsts("hoist-cheap-insts",
66  cl::desc("MachineLICM should hoist even cheap instructions"),
67  cl::init(false), cl::Hidden);
68 
69 static cl::opt<bool>
70 SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
71  cl::desc("MachineLICM should sink instructions into "
72  "loops to avoid register spills"),
73  cl::init(false), cl::Hidden);
74 
75 STATISTIC(NumHoisted,
76  "Number of machine instructions hoisted out of loops");
77 STATISTIC(NumLowRP,
78  "Number of instructions hoisted in low reg pressure situation");
79 STATISTIC(NumHighLatency,
80  "Number of high latency instructions hoisted");
81 STATISTIC(NumCSEed,
82  "Number of hoisted machine instructions CSEed");
83 STATISTIC(NumPostRAHoisted,
84  "Number of machine instructions hoisted out of loops post regalloc");
85 
86 namespace {
87 
88  class MachineLICM : public MachineFunctionPass {
89  const TargetInstrInfo *TII;
90  const TargetLoweringBase *TLI;
91  const TargetRegisterInfo *TRI;
92  const MachineFrameInfo *MFI;
94  TargetSchedModel SchedModel;
95  bool PreRegAlloc = true;
96 
97  // Various analyses that we use...
98  AliasAnalysis *AA; // Alias analysis info.
99  MachineLoopInfo *MLI; // Current MachineLoopInfo
100  MachineDominatorTree *DT; // Machine dominator tree for the cur loop
101 
102  // State that is updated as we process loops
103  bool Changed; // True if a loop is changed.
104  bool FirstInLoop; // True if it's the first LICM in the loop.
105  MachineLoop *CurLoop; // The current loop we are working on.
106  MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
107 
108  // Exit blocks for CurLoop.
110 
111  bool isExitBlock(const MachineBasicBlock *MBB) const {
112  return is_contained(ExitBlocks, MBB);
113  }
114 
115  // Track 'estimated' register pressure.
116  SmallSet<unsigned, 32> RegSeen;
118 
119  // Register pressure "limit" per register pressure set. If the pressure
120  // is higher than the limit, then it's considered high.
121  SmallVector<unsigned, 8> RegLimit;
122 
123  // Register pressure on path leading from loop preheader to current BB.
125 
126  // For each opcode, keep a list of potential CSE instructions.
128 
129  enum {
130  SpeculateFalse = 0,
131  SpeculateTrue = 1,
132  SpeculateUnknown = 2
133  };
134 
135  // If a MBB does not dominate loop exiting blocks then it may not safe
136  // to hoist loads from this block.
137  // Tri-state: 0 - false, 1 - true, 2 - unknown
138  unsigned SpeculationState;
139 
140  public:
141  static char ID; // Pass identification, replacement for typeid
142 
143  MachineLICM() : MachineFunctionPass(ID) {
145  }
146 
147  explicit MachineLICM(bool PreRA)
148  : MachineFunctionPass(ID), PreRegAlloc(PreRA) {
150  }
151 
152  bool runOnMachineFunction(MachineFunction &MF) override;
153 
154  void getAnalysisUsage(AnalysisUsage &AU) const override {
161  }
162 
163  void releaseMemory() override {
164  RegSeen.clear();
165  RegPressure.clear();
166  RegLimit.clear();
167  BackTrace.clear();
168  CSEMap.clear();
169  }
170 
171  private:
172  /// Keep track of information about hoisting candidates.
173  struct CandidateInfo {
174  MachineInstr *MI;
175  unsigned Def;
176  int FI;
177 
178  CandidateInfo(MachineInstr *mi, unsigned def, int fi)
179  : MI(mi), Def(def), FI(fi) {}
180  };
181 
182  void HoistRegionPostRA();
183 
184  void HoistPostRA(MachineInstr *MI, unsigned Def);
185 
186  void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
187  BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
188  SmallVectorImpl<CandidateInfo> &Candidates);
189 
190  void AddToLiveIns(unsigned Reg);
191 
192  bool IsLICMCandidate(MachineInstr &I);
193 
194  bool IsLoopInvariantInst(MachineInstr &I);
195 
196  bool HasLoopPHIUse(const MachineInstr *MI) const;
197 
198  bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
199  unsigned Reg) const;
200 
201  bool IsCheapInstruction(MachineInstr &MI) const;
202 
203  bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
204  bool Cheap);
205 
206  void UpdateBackTraceRegPressure(const MachineInstr *MI);
207 
208  bool IsProfitableToHoist(MachineInstr &MI);
209 
210  bool IsGuaranteedToExecute(MachineBasicBlock *BB);
211 
212  void EnterScope(MachineBasicBlock *MBB);
213 
214  void ExitScope(MachineBasicBlock *MBB);
215 
216  void ExitScopeIfDone(
217  MachineDomTreeNode *Node,
220 
221  void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
222 
223  void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
224 
225  void SinkIntoLoop();
226 
227  void InitRegPressure(MachineBasicBlock *BB);
228 
229  DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
230  bool ConsiderSeen,
231  bool ConsiderUnseenAsDef);
232 
233  void UpdateRegPressure(const MachineInstr *MI,
234  bool ConsiderUnseenAsDef = false);
235 
236  MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
237 
238  const MachineInstr *
239  LookForDuplicate(const MachineInstr *MI,
240  std::vector<const MachineInstr *> &PrevMIs);
241 
242  bool EliminateCSE(
243  MachineInstr *MI,
244  DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
245 
246  bool MayCSE(MachineInstr *MI);
247 
248  bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
249 
250  void InitCSEMap(MachineBasicBlock *BB);
251 
252  MachineBasicBlock *getCurPreheader();
253  };
254 
255 } // end anonymous namespace
256 
257 char MachineLICM::ID = 0;
258 
260 
261 INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
262  "Machine Loop Invariant Code Motion", false, false)
267  "Machine Loop Invariant Code Motion", false, false)
268 
269 /// Test if the given loop is the outer-most loop that has a unique predecessor.
271  // Check whether this loop even has a unique predecessor.
272  if (!CurLoop->getLoopPredecessor())
273  return false;
274  // Ok, now check to see if any of its outer loops do.
275  for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
276  if (L->getLoopPredecessor())
277  return false;
278  // None of them did, so this is the outermost with a unique predecessor.
279  return true;
280 }
281 
282 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
283  if (skipFunction(*MF.getFunction()))
284  return false;
285 
286  Changed = FirstInLoop = false;
287  const TargetSubtargetInfo &ST = MF.getSubtarget();
288  TII = ST.getInstrInfo();
289  TLI = ST.getTargetLowering();
290  TRI = ST.getRegisterInfo();
291  MFI = &MF.getFrameInfo();
292  MRI = &MF.getRegInfo();
293  SchedModel.init(ST.getSchedModel(), &ST, TII);
294 
295  PreRegAlloc = MRI->isSSA();
296 
297  if (PreRegAlloc)
298  DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
299  else
300  DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
301  DEBUG(dbgs() << MF.getName() << " ********\n");
302 
303  if (PreRegAlloc) {
304  // Estimate register pressure during pre-regalloc pass.
305  unsigned NumRPS = TRI->getNumRegPressureSets();
306  RegPressure.resize(NumRPS);
307  std::fill(RegPressure.begin(), RegPressure.end(), 0);
308  RegLimit.resize(NumRPS);
309  for (unsigned i = 0, e = NumRPS; i != e; ++i)
310  RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
311  }
312 
313  // Get our Loop information...
314  MLI = &getAnalysis<MachineLoopInfo>();
315  DT = &getAnalysis<MachineDominatorTree>();
316  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
317 
318  SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
319  while (!Worklist.empty()) {
320  CurLoop = Worklist.pop_back_val();
321  CurPreheader = nullptr;
322  ExitBlocks.clear();
323 
324  // If this is done before regalloc, only visit outer-most preheader-sporting
325  // loops.
326  if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
327  Worklist.append(CurLoop->begin(), CurLoop->end());
328  continue;
329  }
330 
331  CurLoop->getExitBlocks(ExitBlocks);
332 
333  if (!PreRegAlloc)
334  HoistRegionPostRA();
335  else {
336  // CSEMap is initialized for loop header when the first instruction is
337  // being hoisted.
338  MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
339  FirstInLoop = true;
340  HoistOutOfLoop(N);
341  CSEMap.clear();
342 
344  SinkIntoLoop();
345  }
346  }
347 
348  return Changed;
349 }
350 
351 /// Return true if instruction stores to the specified frame.
352 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
353  // If we lost memory operands, conservatively assume that the instruction
354  // writes to all slots.
355  if (MI->memoperands_empty())
356  return true;
357  for (const MachineMemOperand *MemOp : MI->memoperands()) {
358  if (!MemOp->isStore() || !MemOp->getPseudoValue())
359  continue;
361  dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
362  if (Value->getFrameIndex() == FI)
363  return true;
364  }
365  }
366  return false;
367 }
368 
369 /// Examine the instruction for potentai LICM candidate. Also
370 /// gather register def and frame object update information.
371 void MachineLICM::ProcessMI(MachineInstr *MI,
372  BitVector &PhysRegDefs,
373  BitVector &PhysRegClobbers,
374  SmallSet<int, 32> &StoredFIs,
375  SmallVectorImpl<CandidateInfo> &Candidates) {
376  bool RuledOut = false;
377  bool HasNonInvariantUse = false;
378  unsigned Def = 0;
379  for (const MachineOperand &MO : MI->operands()) {
380  if (MO.isFI()) {
381  // Remember if the instruction stores to the frame index.
382  int FI = MO.getIndex();
383  if (!StoredFIs.count(FI) &&
384  MFI->isSpillSlotObjectIndex(FI) &&
385  InstructionStoresToFI(MI, FI))
386  StoredFIs.insert(FI);
387  HasNonInvariantUse = true;
388  continue;
389  }
390 
391  // We can't hoist an instruction defining a physreg that is clobbered in
392  // the loop.
393  if (MO.isRegMask()) {
394  PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
395  continue;
396  }
397 
398  if (!MO.isReg())
399  continue;
400  unsigned Reg = MO.getReg();
401  if (!Reg)
402  continue;
404  "Not expecting virtual register!");
405 
406  if (!MO.isDef()) {
407  if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
408  // If it's using a non-loop-invariant register, then it's obviously not
409  // safe to hoist.
410  HasNonInvariantUse = true;
411  continue;
412  }
413 
414  if (MO.isImplicit()) {
415  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
416  PhysRegClobbers.set(*AI);
417  if (!MO.isDead())
418  // Non-dead implicit def? This cannot be hoisted.
419  RuledOut = true;
420  // No need to check if a dead implicit def is also defined by
421  // another instruction.
422  continue;
423  }
424 
425  // FIXME: For now, avoid instructions with multiple defs, unless
426  // it's a dead implicit def.
427  if (Def)
428  RuledOut = true;
429  else
430  Def = Reg;
431 
432  // If we have already seen another instruction that defines the same
433  // register, then this is not safe. Two defs is indicated by setting a
434  // PhysRegClobbers bit.
435  for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
436  if (PhysRegDefs.test(*AS))
437  PhysRegClobbers.set(*AS);
438  PhysRegDefs.set(*AS);
439  }
440  if (PhysRegClobbers.test(Reg))
441  // MI defined register is seen defined by another instruction in
442  // the loop, it cannot be a LICM candidate.
443  RuledOut = true;
444  }
445 
446  // Only consider reloads for now and remats which do not have register
447  // operands. FIXME: Consider unfold load folding instructions.
448  if (Def && !RuledOut) {
449  int FI = std::numeric_limits<int>::min();
450  if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
451  (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
452  Candidates.push_back(CandidateInfo(MI, Def, FI));
453  }
454 }
455 
456 /// Walk the specified region of the CFG and hoist loop invariants out to the
457 /// preheader.
458 void MachineLICM::HoistRegionPostRA() {
459  MachineBasicBlock *Preheader = getCurPreheader();
460  if (!Preheader)
461  return;
462 
463  unsigned NumRegs = TRI->getNumRegs();
464  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
465  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
466 
468  SmallSet<int, 32> StoredFIs;
469 
470  // Walk the entire region, count number of defs for each register, and
471  // collect potential LICM candidates.
472  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
473  for (MachineBasicBlock *BB : Blocks) {
474  // If the header of the loop containing this basic block is a landing pad,
475  // then don't try to hoist instructions out of this loop.
476  const MachineLoop *ML = MLI->getLoopFor(BB);
477  if (ML && ML->getHeader()->isEHPad()) continue;
478 
479  // Conservatively treat live-in's as an external def.
480  // FIXME: That means a reload that're reused in successor block(s) will not
481  // be LICM'ed.
482  for (const auto &LI : BB->liveins()) {
483  for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
484  PhysRegDefs.set(*AI);
485  }
486 
487  SpeculationState = SpeculateUnknown;
488  for (MachineInstr &MI : *BB)
489  ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
490  }
491 
492  // Gather the registers read / clobbered by the terminator.
493  BitVector TermRegs(NumRegs);
495  if (TI != Preheader->end()) {
496  for (const MachineOperand &MO : TI->operands()) {
497  if (!MO.isReg())
498  continue;
499  unsigned Reg = MO.getReg();
500  if (!Reg)
501  continue;
502  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
503  TermRegs.set(*AI);
504  }
505  }
506 
507  // Now evaluate whether the potential candidates qualify.
508  // 1. Check if the candidate defined register is defined by another
509  // instruction in the loop.
510  // 2. If the candidate is a load from stack slot (always true for now),
511  // check if the slot is stored anywhere in the loop.
512  // 3. Make sure candidate def should not clobber
513  // registers read by the terminator. Similarly its def should not be
514  // clobbered by the terminator.
515  for (CandidateInfo &Candidate : Candidates) {
516  if (Candidate.FI != std::numeric_limits<int>::min() &&
517  StoredFIs.count(Candidate.FI))
518  continue;
519 
520  unsigned Def = Candidate.Def;
521  if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
522  bool Safe = true;
523  MachineInstr *MI = Candidate.MI;
524  for (const MachineOperand &MO : MI->operands()) {
525  if (!MO.isReg() || MO.isDef() || !MO.getReg())
526  continue;
527  unsigned Reg = MO.getReg();
528  if (PhysRegDefs.test(Reg) ||
529  PhysRegClobbers.test(Reg)) {
530  // If it's using a non-loop-invariant register, then it's obviously
531  // not safe to hoist.
532  Safe = false;
533  break;
534  }
535  }
536  if (Safe)
537  HoistPostRA(MI, Candidate.Def);
538  }
539  }
540 }
541 
542 /// Add register 'Reg' to the livein sets of BBs in the current loop, and make
543 /// sure it is not killed by any instructions in the loop.
544 void MachineLICM::AddToLiveIns(unsigned Reg) {
545  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
546  for (MachineBasicBlock *BB : Blocks) {
547  if (!BB->isLiveIn(Reg))
548  BB->addLiveIn(Reg);
549  for (MachineInstr &MI : *BB) {
550  for (MachineOperand &MO : MI.operands()) {
551  if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
552  if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
553  MO.setIsKill(false);
554  }
555  }
556  }
557 }
558 
559 /// When an instruction is found to only use loop invariant operands that is
560 /// safe to hoist, this instruction is called to do the dirty work.
561 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
562  MachineBasicBlock *Preheader = getCurPreheader();
563 
564  // Now move the instructions to the predecessor, inserting it before any
565  // terminator instructions.
566  DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
567  << MI->getParent()->getNumber() << ": " << *MI);
568 
569  // Splice the instruction to the preheader.
570  MachineBasicBlock *MBB = MI->getParent();
571  Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
572 
573  // Add register to livein list to all the BBs in the current loop since a
574  // loop invariant must be kept live throughout the whole loop. This is
575  // important to ensure later passes do not scavenge the def register.
576  AddToLiveIns(Def);
577 
578  ++NumPostRAHoisted;
579  Changed = true;
580 }
581 
582 /// Check if this mbb is guaranteed to execute. If not then a load from this mbb
583 /// may not be safe to hoist.
584 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
585  if (SpeculationState != SpeculateUnknown)
586  return SpeculationState == SpeculateFalse;
587 
588  if (BB != CurLoop->getHeader()) {
589  // Check loop exiting blocks.
590  SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
591  CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
592  for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
593  if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
594  SpeculationState = SpeculateTrue;
595  return false;
596  }
597  }
598 
599  SpeculationState = SpeculateFalse;
600  return true;
601 }
602 
603 void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
604  DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n');
605 
606  // Remember livein register pressure.
607  BackTrace.push_back(RegPressure);
608 }
609 
610 void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
611  DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n');
612  BackTrace.pop_back();
613 }
614 
615 /// Destroy scope for the MBB that corresponds to the given dominator tree node
616 /// if its a leaf or all of its children are done. Walk up the dominator tree to
617 /// destroy ancestors which are now done.
618 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
621  if (OpenChildren[Node])
622  return;
623 
624  // Pop scope.
625  ExitScope(Node->getBlock());
626 
627  // Now traverse upwards to pop ancestors whose offsprings are all done.
628  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
629  unsigned Left = --OpenChildren[Parent];
630  if (Left != 0)
631  break;
632  ExitScope(Parent->getBlock());
633  Node = Parent;
634  }
635 }
636 
637 /// Walk the specified loop in the CFG (defined by all blocks dominated by the
638 /// specified header block, and that are in the current loop) in depth first
639 /// order w.r.t the DominatorTree. This allows us to visit definitions before
640 /// uses, allowing us to hoist a loop body in one pass without iteration.
641 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
642  MachineBasicBlock *Preheader = getCurPreheader();
643  if (!Preheader)
644  return;
645 
650 
651  // Perform a DFS walk to determine the order of visit.
652  WorkList.push_back(HeaderN);
653  while (!WorkList.empty()) {
654  MachineDomTreeNode *Node = WorkList.pop_back_val();
655  assert(Node && "Null dominator tree node?");
656  MachineBasicBlock *BB = Node->getBlock();
657 
658  // If the header of the loop containing this basic block is a landing pad,
659  // then don't try to hoist instructions out of this loop.
660  const MachineLoop *ML = MLI->getLoopFor(BB);
661  if (ML && ML->getHeader()->isEHPad())
662  continue;
663 
664  // If this subregion is not in the top level loop at all, exit.
665  if (!CurLoop->contains(BB))
666  continue;
667 
668  Scopes.push_back(Node);
669  const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
670  unsigned NumChildren = Children.size();
671 
672  // Don't hoist things out of a large switch statement. This often causes
673  // code to be hoisted that wasn't going to be executed, and increases
674  // register pressure in a situation where it's likely to matter.
675  if (BB->succ_size() >= 25)
676  NumChildren = 0;
677 
678  OpenChildren[Node] = NumChildren;
679  // Add children in reverse order as then the next popped worklist node is
680  // the first child of this node. This means we ultimately traverse the
681  // DOM tree in exactly the same order as if we'd recursed.
682  for (int i = (int)NumChildren-1; i >= 0; --i) {
683  MachineDomTreeNode *Child = Children[i];
684  ParentMap[Child] = Node;
685  WorkList.push_back(Child);
686  }
687  }
688 
689  if (Scopes.size() == 0)
690  return;
691 
692  // Compute registers which are livein into the loop headers.
693  RegSeen.clear();
694  BackTrace.clear();
695  InitRegPressure(Preheader);
696 
697  // Now perform LICM.
698  for (MachineDomTreeNode *Node : Scopes) {
699  MachineBasicBlock *MBB = Node->getBlock();
700 
701  EnterScope(MBB);
702 
703  // Process the block
704  SpeculationState = SpeculateUnknown;
706  MII = MBB->begin(), E = MBB->end(); MII != E; ) {
707  MachineBasicBlock::iterator NextMII = MII; ++NextMII;
708  MachineInstr *MI = &*MII;
709  if (!Hoist(MI, Preheader))
710  UpdateRegPressure(MI);
711  MII = NextMII;
712  }
713 
714  // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
715  ExitScopeIfDone(Node, OpenChildren, ParentMap);
716  }
717 }
718 
719 /// Sink instructions into loops if profitable. This especially tries to prevent
720 /// register spills caused by register pressure if there is little to no
721 /// overhead moving instructions into loops.
722 void MachineLICM::SinkIntoLoop() {
723  MachineBasicBlock *Preheader = getCurPreheader();
724  if (!Preheader)
725  return;
726 
728  for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
729  I != Preheader->instr_end(); ++I) {
730  // We need to ensure that we can safely move this instruction into the loop.
731  // As such, it must not have side-effects, e.g. such as a call has.
732  if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
733  Candidates.push_back(&*I);
734  }
735 
736  for (MachineInstr *I : Candidates) {
737  const MachineOperand &MO = I->getOperand(0);
738  if (!MO.isDef() || !MO.isReg() || !MO.getReg())
739  continue;
740  if (!MRI->hasOneDef(MO.getReg()))
741  continue;
742  bool CanSink = true;
743  MachineBasicBlock *B = nullptr;
744  for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
745  // FIXME: Come up with a proper cost model that estimates whether sinking
746  // the instruction (and thus possibly executing it on every loop
747  // iteration) is more expensive than a register.
748  // For now assumes that copies are cheap and thus almost always worth it.
749  if (!MI.isCopy()) {
750  CanSink = false;
751  break;
752  }
753  if (!B) {
754  B = MI.getParent();
755  continue;
756  }
757  B = DT->findNearestCommonDominator(B, MI.getParent());
758  if (!B) {
759  CanSink = false;
760  break;
761  }
762  }
763  if (!CanSink || !B || B == Preheader)
764  continue;
765  B->splice(B->getFirstNonPHI(), Preheader, I);
766  }
767 }
768 
769 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
770  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
771 }
772 
773 /// Find all virtual register references that are liveout of the preheader to
774 /// initialize the starting "register pressure". Note this does not count live
775 /// through (livein but not used) registers.
776 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
777  std::fill(RegPressure.begin(), RegPressure.end(), 0);
778 
779  // If the preheader has only a single predecessor and it ends with a
780  // fallthrough or an unconditional branch, then scan its predecessor for live
781  // defs as well. This happens whenever the preheader is created by splitting
782  // the critical edge from the loop predecessor to the loop header.
783  if (BB->pred_size() == 1) {
784  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
786  if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
787  InitRegPressure(*BB->pred_begin());
788  }
789 
790  for (const MachineInstr &MI : *BB)
791  UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
792 }
793 
794 /// Update estimate of register pressure after the specified instruction.
795 void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
796  bool ConsiderUnseenAsDef) {
797  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
798  for (const auto &RPIdAndCost : Cost) {
799  unsigned Class = RPIdAndCost.first;
800  if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
801  RegPressure[Class] = 0;
802  else
803  RegPressure[Class] += RPIdAndCost.second;
804  }
805 }
806 
807 /// Calculate the additional register pressure that the registers used in MI
808 /// cause.
809 ///
810 /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
811 /// figure out which usages are live-ins.
812 /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
814 MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
815  bool ConsiderUnseenAsDef) {
817  if (MI->isImplicitDef())
818  return Cost;
819  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
820  const MachineOperand &MO = MI->getOperand(i);
821  if (!MO.isReg() || MO.isImplicit())
822  continue;
823  unsigned Reg = MO.getReg();
825  continue;
826 
827  // FIXME: It seems bad to use RegSeen only for some of these calculations.
828  bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
829  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
830 
832  int RCCost = 0;
833  if (MO.isDef())
834  RCCost = W.RegWeight;
835  else {
836  bool isKill = isOperandKill(MO, MRI);
837  if (isNew && !isKill && ConsiderUnseenAsDef)
838  // Haven't seen this, it must be a livein.
839  RCCost = W.RegWeight;
840  else if (!isNew && isKill)
841  RCCost = -W.RegWeight;
842  }
843  if (RCCost == 0)
844  continue;
845  const int *PS = TRI->getRegClassPressureSets(RC);
846  for (; *PS != -1; ++PS) {
847  if (Cost.find(*PS) == Cost.end())
848  Cost[*PS] = RCCost;
849  else
850  Cost[*PS] += RCCost;
851  }
852  }
853  return Cost;
854 }
855 
856 /// Return true if this machine instruction loads from global offset table or
857 /// constant pool.
859  assert(MI.mayLoad() && "Expected MI that loads!");
860 
861  // If we lost memory operands, conservatively assume that the instruction
862  // reads from everything..
863  if (MI.memoperands_empty())
864  return true;
865 
866  for (MachineMemOperand *MemOp : MI.memoperands())
867  if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
868  if (PSV->isGOT() || PSV->isConstantPool())
869  return true;
870 
871  return false;
872 }
873 
874 /// Returns true if the instruction may be a suitable candidate for LICM.
875 /// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
876 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
877  // Check if it's safe to move the instruction.
878  bool DontMoveAcrossStore = true;
879  if (!I.isSafeToMove(AA, DontMoveAcrossStore))
880  return false;
881 
882  // If it is load then check if it is guaranteed to execute by making sure that
883  // it dominates all exiting blocks. If it doesn't, then there is a path out of
884  // the loop which does not execute this load, so we can't hoist it. Loads
885  // from constant memory are not safe to speculate all the time, for example
886  // indexed load from a jump table.
887  // Stores and side effects are already checked by isSafeToMove.
888  if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
889  !IsGuaranteedToExecute(I.getParent()))
890  return false;
891 
892  return true;
893 }
894 
895 /// Returns true if the instruction is loop invariant.
896 /// I.e., all virtual register operands are defined outside of the loop,
897 /// physical registers aren't accessed explicitly, and there are no side
898 /// effects that aren't captured by the operands or other flags.
899 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
900  if (!IsLICMCandidate(I))
901  return false;
902 
903  // The instruction is loop invariant if all of its operands are.
904  for (const MachineOperand &MO : I.operands()) {
905  if (!MO.isReg())
906  continue;
907 
908  unsigned Reg = MO.getReg();
909  if (Reg == 0) continue;
910 
911  // Don't hoist an instruction that uses or defines a physical register.
913  if (MO.isUse()) {
914  // If the physreg has no defs anywhere, it's just an ambient register
915  // and we can freely move its uses. Alternatively, if it's allocatable,
916  // it could get allocated to something with a def during allocation.
917  // However, if the physreg is known to always be caller saved/restored
918  // then this use is safe to hoist.
919  if (!MRI->isConstantPhysReg(Reg) &&
920  !(TRI->isCallerPreservedPhysReg(Reg, *I.getMF())))
921  return false;
922  // Otherwise it's safe to move.
923  continue;
924  } else if (!MO.isDead()) {
925  // A def that isn't dead. We can't move it.
926  return false;
927  } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
928  // If the reg is live into the loop, we can't hoist an instruction
929  // which would clobber it.
930  return false;
931  }
932  }
933 
934  if (!MO.isUse())
935  continue;
936 
937  assert(MRI->getVRegDef(Reg) &&
938  "Machine instr not mapped for this vreg?!");
939 
940  // If the loop contains the definition of an operand, then the instruction
941  // isn't loop invariant.
942  if (CurLoop->contains(MRI->getVRegDef(Reg)))
943  return false;
944  }
945 
946  // If we got this far, the instruction is loop invariant!
947  return true;
948 }
949 
950 /// Return true if the specified instruction is used by a phi node and hoisting
951 /// it could cause a copy to be inserted.
952 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
954  do {
955  MI = Work.pop_back_val();
956  for (const MachineOperand &MO : MI->operands()) {
957  if (!MO.isReg() || !MO.isDef())
958  continue;
959  unsigned Reg = MO.getReg();
961  continue;
962  for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
963  // A PHI may cause a copy to be inserted.
964  if (UseMI.isPHI()) {
965  // A PHI inside the loop causes a copy because the live range of Reg is
966  // extended across the PHI.
967  if (CurLoop->contains(&UseMI))
968  return true;
969  // A PHI in an exit block can cause a copy to be inserted if the PHI
970  // has multiple predecessors in the loop with different values.
971  // For now, approximate by rejecting all exit blocks.
972  if (isExitBlock(UseMI.getParent()))
973  return true;
974  continue;
975  }
976  // Look past copies as well.
977  if (UseMI.isCopy() && CurLoop->contains(&UseMI))
978  Work.push_back(&UseMI);
979  }
980  }
981  } while (!Work.empty());
982  return false;
983 }
984 
985 /// Compute operand latency between a def of 'Reg' and an use in the current
986 /// loop, return true if the target considered it high.
987 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
988  unsigned DefIdx, unsigned Reg) const {
989  if (MRI->use_nodbg_empty(Reg))
990  return false;
991 
992  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
993  if (UseMI.isCopyLike())
994  continue;
995  if (!CurLoop->contains(UseMI.getParent()))
996  continue;
997  for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
998  const MachineOperand &MO = UseMI.getOperand(i);
999  if (!MO.isReg() || !MO.isUse())
1000  continue;
1001  unsigned MOReg = MO.getReg();
1002  if (MOReg != Reg)
1003  continue;
1004 
1005  if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
1006  return true;
1007  }
1008 
1009  // Only look at the first in loop use.
1010  break;
1011  }
1012 
1013  return false;
1014 }
1015 
1016 /// Return true if the instruction is marked "cheap" or the operand latency
1017 /// between its def and a use is one or less.
1018 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
1019  if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
1020  return true;
1021 
1022  bool isCheap = false;
1023  unsigned NumDefs = MI.getDesc().getNumDefs();
1024  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1025  MachineOperand &DefMO = MI.getOperand(i);
1026  if (!DefMO.isReg() || !DefMO.isDef())
1027  continue;
1028  --NumDefs;
1029  unsigned Reg = DefMO.getReg();
1031  continue;
1032 
1033  if (!TII->hasLowDefLatency(SchedModel, MI, i))
1034  return false;
1035  isCheap = true;
1036  }
1037 
1038  return isCheap;
1039 }
1040 
1041 /// Visit BBs from header to current BB, check if hoisting an instruction of the
1042 /// given cost matrix can cause high register pressure.
1043 bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
1044  bool CheapInstr) {
1045  for (const auto &RPIdAndCost : Cost) {
1046  if (RPIdAndCost.second <= 0)
1047  continue;
1048 
1049  unsigned Class = RPIdAndCost.first;
1050  int Limit = RegLimit[Class];
1051 
1052  // Don't hoist cheap instructions if they would increase register pressure,
1053  // even if we're under the limit.
1054  if (CheapInstr && !HoistCheapInsts)
1055  return true;
1056 
1057  for (const auto &RP : BackTrace)
1058  if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1059  return true;
1060  }
1061 
1062  return false;
1063 }
1064 
1065 /// Traverse the back trace from header to the current block and update their
1066 /// register pressures to reflect the effect of hoisting MI from the current
1067 /// block to the preheader.
1068 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1069  // First compute the 'cost' of the instruction, i.e. its contribution
1070  // to register pressure.
1071  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1072  /*ConsiderUnseenAsDef=*/false);
1073 
1074  // Update register pressure of blocks from loop header to current block.
1075  for (auto &RP : BackTrace)
1076  for (const auto &RPIdAndCost : Cost)
1077  RP[RPIdAndCost.first] += RPIdAndCost.second;
1078 }
1079 
1080 /// Return true if it is potentially profitable to hoist the given loop
1081 /// invariant.
1082 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
1083  if (MI.isImplicitDef())
1084  return true;
1085 
1086  // Besides removing computation from the loop, hoisting an instruction has
1087  // these effects:
1088  //
1089  // - The value defined by the instruction becomes live across the entire
1090  // loop. This increases register pressure in the loop.
1091  //
1092  // - If the value is used by a PHI in the loop, a copy will be required for
1093  // lowering the PHI after extending the live range.
1094  //
1095  // - When hoisting the last use of a value in the loop, that value no longer
1096  // needs to be live in the loop. This lowers register pressure in the loop.
1097 
1098  bool CheapInstr = IsCheapInstruction(MI);
1099  bool CreatesCopy = HasLoopPHIUse(&MI);
1100 
1101  // Don't hoist a cheap instruction if it would create a copy in the loop.
1102  if (CheapInstr && CreatesCopy) {
1103  DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1104  return false;
1105  }
1106 
1107  // Rematerializable instructions should always be hoisted since the register
1108  // allocator can just pull them down again when needed.
1109  if (TII->isTriviallyReMaterializable(MI, AA))
1110  return true;
1111 
1112  // FIXME: If there are long latency loop-invariant instructions inside the
1113  // loop at this point, why didn't the optimizer's LICM hoist them?
1114  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1115  const MachineOperand &MO = MI.getOperand(i);
1116  if (!MO.isReg() || MO.isImplicit())
1117  continue;
1118  unsigned Reg = MO.getReg();
1120  continue;
1121  if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
1122  DEBUG(dbgs() << "Hoist High Latency: " << MI);
1123  ++NumHighLatency;
1124  return true;
1125  }
1126  }
1127 
1128  // Estimate register pressure to determine whether to LICM the instruction.
1129  // In low register pressure situation, we can be more aggressive about
1130  // hoisting. Also, favors hoisting long latency instructions even in
1131  // moderately high pressure situation.
1132  // Cheap instructions will only be hoisted if they don't increase register
1133  // pressure at all.
1134  auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1135  /*ConsiderUnseenAsDef=*/false);
1136 
1137  // Visit BBs from header to current BB, if hoisting this doesn't cause
1138  // high register pressure, then it's safe to proceed.
1139  if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1140  DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1141  ++NumLowRP;
1142  return true;
1143  }
1144 
1145  // Don't risk increasing register pressure if it would create copies.
1146  if (CreatesCopy) {
1147  DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1148  return false;
1149  }
1150 
1151  // Do not "speculate" in high register pressure situation. If an
1152  // instruction is not guaranteed to be executed in the loop, it's best to be
1153  // conservative.
1154  if (AvoidSpeculation &&
1155  (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
1156  DEBUG(dbgs() << "Won't speculate: " << MI);
1157  return false;
1158  }
1159 
1160  // High register pressure situation, only hoist if the instruction is going
1161  // to be remat'ed.
1162  if (!TII->isTriviallyReMaterializable(MI, AA) &&
1164  DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1165  return false;
1166  }
1167 
1168  return true;
1169 }
1170 
1171 /// Unfold a load from the given machineinstr if the load itself could be
1172 /// hoisted. Return the unfolded and hoistable load, or null if the load
1173 /// couldn't be unfolded or if it wouldn't be hoistable.
1174 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
1175  // Don't unfold simple loads.
1176  if (MI->canFoldAsLoad())
1177  return nullptr;
1178 
1179  // If not, we may be able to unfold a load and hoist that.
1180  // First test whether the instruction is loading from an amenable
1181  // memory location.
1182  if (!MI->isDereferenceableInvariantLoad(AA))
1183  return nullptr;
1184 
1185  // Next determine the register class for a temporary register.
1186  unsigned LoadRegIndex;
1187  unsigned NewOpc =
1189  /*UnfoldLoad=*/true,
1190  /*UnfoldStore=*/false,
1191  &LoadRegIndex);
1192  if (NewOpc == 0) return nullptr;
1193  const MCInstrDesc &MID = TII->get(NewOpc);
1194  MachineFunction &MF = *MI->getMF();
1195  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1196  // Ok, we're unfolding. Create a temporary register and do the unfold.
1197  unsigned Reg = MRI->createVirtualRegister(RC);
1198 
1200  bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1201  /*UnfoldLoad=*/true,
1202  /*UnfoldStore=*/false, NewMIs);
1203  (void)Success;
1204  assert(Success &&
1205  "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1206  "succeeded!");
1207  assert(NewMIs.size() == 2 &&
1208  "Unfolded a load into multiple instructions!");
1209  MachineBasicBlock *MBB = MI->getParent();
1211  MBB->insert(Pos, NewMIs[0]);
1212  MBB->insert(Pos, NewMIs[1]);
1213  // If unfolding produced a load that wasn't loop-invariant or profitable to
1214  // hoist, discard the new instructions and bail.
1215  if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
1216  NewMIs[0]->eraseFromParent();
1217  NewMIs[1]->eraseFromParent();
1218  return nullptr;
1219  }
1220 
1221  // Update register pressure for the unfolded instruction.
1222  UpdateRegPressure(NewMIs[1]);
1223 
1224  // Otherwise we successfully unfolded a load that we can hoist.
1225  MI->eraseFromParent();
1226  return NewMIs[0];
1227 }
1228 
1229 /// Initialize the CSE map with instructions that are in the current loop
1230 /// preheader that may become duplicates of instructions that are hoisted
1231 /// out of the loop.
1232 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
1233  for (MachineInstr &MI : *BB)
1234  CSEMap[MI.getOpcode()].push_back(&MI);
1235 }
1236 
1237 /// Find an instruction amount PrevMIs that is a duplicate of MI.
1238 /// Return this instruction if it's found.
1239 const MachineInstr*
1240 MachineLICM::LookForDuplicate(const MachineInstr *MI,
1241  std::vector<const MachineInstr*> &PrevMIs) {
1242  for (const MachineInstr *PrevMI : PrevMIs)
1243  if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
1244  return PrevMI;
1245 
1246  return nullptr;
1247 }
1248 
1249 /// Given a LICM'ed instruction, look for an instruction on the preheader that
1250 /// computes the same value. If it's found, do a RAU on with the definition of
1251 /// the existing instruction rather than hoisting the instruction to the
1252 /// preheader.
1253 bool MachineLICM::EliminateCSE(MachineInstr *MI,
1254  DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) {
1255  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1256  // the undef property onto uses.
1257  if (CI == CSEMap.end() || MI->isImplicitDef())
1258  return false;
1259 
1260  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1261  DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1262 
1263  // Replace virtual registers defined by MI by their counterparts defined
1264  // by Dup.
1266  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1267  const MachineOperand &MO = MI->getOperand(i);
1268 
1269  // Physical registers may not differ here.
1270  assert((!MO.isReg() || MO.getReg() == 0 ||
1272  MO.getReg() == Dup->getOperand(i).getReg()) &&
1273  "Instructions with different phys regs are not identical!");
1274 
1275  if (MO.isReg() && MO.isDef() &&
1277  Defs.push_back(i);
1278  }
1279 
1281  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1282  unsigned Idx = Defs[i];
1283  unsigned Reg = MI->getOperand(Idx).getReg();
1284  unsigned DupReg = Dup->getOperand(Idx).getReg();
1285  OrigRCs.push_back(MRI->getRegClass(DupReg));
1286 
1287  if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1288  // Restore old RCs if more than one defs.
1289  for (unsigned j = 0; j != i; ++j)
1290  MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1291  return false;
1292  }
1293  }
1294 
1295  for (unsigned Idx : Defs) {
1296  unsigned Reg = MI->getOperand(Idx).getReg();
1297  unsigned DupReg = Dup->getOperand(Idx).getReg();
1298  MRI->replaceRegWith(Reg, DupReg);
1299  MRI->clearKillFlags(DupReg);
1300  }
1301 
1302  MI->eraseFromParent();
1303  ++NumCSEed;
1304  return true;
1305  }
1306  return false;
1307 }
1308 
1309 /// Return true if the given instruction will be CSE'd if it's hoisted out of
1310 /// the loop.
1311 bool MachineLICM::MayCSE(MachineInstr *MI) {
1312  unsigned Opcode = MI->getOpcode();
1314  CI = CSEMap.find(Opcode);
1315  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1316  // the undef property onto uses.
1317  if (CI == CSEMap.end() || MI->isImplicitDef())
1318  return false;
1319 
1320  return LookForDuplicate(MI, CI->second) != nullptr;
1321 }
1322 
1323 /// When an instruction is found to use only loop invariant operands
1324 /// that are safe to hoist, this instruction is called to do the dirty work.
1325 /// It returns true if the instruction is hoisted.
1326 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1327  // First check whether we should hoist this instruction.
1328  if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
1329  // If not, try unfolding a hoistable load.
1330  MI = ExtractHoistableLoad(MI);
1331  if (!MI) return false;
1332  }
1333 
1334  // Now move the instructions to the predecessor, inserting it before any
1335  // terminator instructions.
1336  DEBUG({
1337  dbgs() << "Hoisting " << *MI;
1338  if (MI->getParent()->getBasicBlock())
1339  dbgs() << " from BB#" << MI->getParent()->getNumber();
1340  if (Preheader->getBasicBlock())
1341  dbgs() << " to BB#" << Preheader->getNumber();
1342  dbgs() << "\n";
1343  });
1344 
1345  // If this is the first instruction being hoisted to the preheader,
1346  // initialize the CSE map with potential common expressions.
1347  if (FirstInLoop) {
1348  InitCSEMap(Preheader);
1349  FirstInLoop = false;
1350  }
1351 
1352  // Look for opportunity to CSE the hoisted instruction.
1353  unsigned Opcode = MI->getOpcode();
1355  CI = CSEMap.find(Opcode);
1356  if (!EliminateCSE(MI, CI)) {
1357  // Otherwise, splice the instruction to the preheader.
1358  Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1359 
1360  // Since we are moving the instruction out of its basic block, we do not
1361  // retain its debug location. Doing so would degrade the debugging
1362  // experience and adversely affect the accuracy of profiling information.
1363  MI->setDebugLoc(DebugLoc());
1364 
1365  // Update register pressure for BBs from header to this block.
1366  UpdateBackTraceRegPressure(MI);
1367 
1368  // Clear the kill flags of any register this instruction defines,
1369  // since they may need to be live throughout the entire loop
1370  // rather than just live for part of it.
1371  for (MachineOperand &MO : MI->operands())
1372  if (MO.isReg() && MO.isDef() && !MO.isDead())
1373  MRI->clearKillFlags(MO.getReg());
1374 
1375  // Add to the CSE map.
1376  if (CI != CSEMap.end())
1377  CI->second.push_back(MI);
1378  else
1379  CSEMap[Opcode].push_back(MI);
1380  }
1381 
1382  ++NumHoisted;
1383  Changed = true;
1384 
1385  return true;
1386 }
1387 
1388 /// Get the preheader for the current loop, splitting a critical edge if needed.
1389 MachineBasicBlock *MachineLICM::getCurPreheader() {
1390  // Determine the block to which to hoist instructions. If we can't find a
1391  // suitable loop predecessor, we can't do any hoisting.
1392 
1393  // If we've tried to get a preheader and failed, don't try again.
1394  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1395  return nullptr;
1396 
1397  if (!CurPreheader) {
1398  CurPreheader = CurLoop->getLoopPreheader();
1399  if (!CurPreheader) {
1400  MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1401  if (!Pred) {
1402  CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1403  return nullptr;
1404  }
1405 
1406  CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
1407  if (!CurPreheader) {
1408  CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1409  return nullptr;
1410  }
1411  }
1412  }
1413  return CurPreheader;
1414 }
BitVector & set()
Definition: BitVector.h:398
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
instr_iterator instr_end()
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
char & MachineLICMID
MachineLICM - This pass performs LICM on machine instructions.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
virtual const TargetLowering * getTargetLowering() const
bool test(unsigned Idx) const
Definition: BitVector.h:502
void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsNotInMask - Add a bit to this vector for every &#39;0&#39; bit in Mask.
Definition: BitVector.h:771
virtual const int * getRegClassPressureSets(const TargetRegisterClass *RC) const =0
Get the dimensions of register pressure impacted by this register class.
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl< BasicBlock *> &ExitBlocks)
Return true if the specified block is in the list.
Definition: LCSSA.cpp:64
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:396
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:332
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:871
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI)
Return true if this machine instruction loads from global offset table or constant pool...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
A description of a memory reference used in the backend.
Each TargetRegisterClass has a per register weight, and weight limit which must be less than the limi...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI)
virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl< MachineInstr *> &NewMIs) const
unfoldMemoryOperand - Separate a single instruction which folded a load or a store or a load and a st...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
virtual bool hasLowDefLatency(const TargetSchedModel &SchedModel, const MachineInstr &DefMI, unsigned DefIdx) const
Compute operand latency of a def of &#39;Reg&#39;.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
static cl::opt< bool > AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden)
BlockT * getHeader() const
Definition: LoopInfo.h:100
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
void getExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:63
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
void clear()
Definition: SmallSet.h:119
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
#define DEBUG_TYPE
Definition: MachineLICM.cpp:57
Base class for the actual dominator tree node.
const std::vector< DomTreeNodeBase * > & getChildren() const
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
virtual const TargetInstrInfo * getInstrInfo() const
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
TargetInstrInfo - Interface to description of machine instruction set.
iterator begin() const
bool isSuperRegister(unsigned RegA, unsigned RegB) const
Returns true if RegB is a super-register of RegA.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
NodeT * getBlock() const
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
static cl::opt< bool > HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden)
MCRegAliasIterator enumerates all registers aliasing Reg.
Represent the analysis usage information of a pass.
static bool InstructionStoresToFI(const MachineInstr *MI, int FI)
Return true if instruction stores to the specified frame.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
bool isCopy() const
Definition: MachineInstr.h:857
bool isImplicitDef() const
Definition: MachineInstr.h:831
const AMDGPUAS & AS
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
void initializeMachineLICMPass(PassRegistry &)
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
virtual bool isAsCheapAsAMove(const MachineInstr &MI) const
Return true if the instruction is as cheap as a move instruction.
BlockT * getLoopPredecessor() const
If the given loop&#39;s header has exactly one unique predecessor outside the loop, return it...
Definition: LoopInfoImpl.h:134
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
Iterator for intrusive lists based on ilist_node.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
virtual const RegClassWeight & getRegClassWeight(const TargetRegisterClass *RC) const =0
Get the weight in units of pressure for this register class.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
virtual bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const
Compute operand latency between a def of &#39;Reg&#39; and a use in the current loop.
iterator begin() const
Definition: LoopInfo.h:142
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
unsigned pred_size() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
Special value supplied for machine level alias analysis.
Machine Loop Invariant Code static false bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop)
Test if the given loop is the outer-most loop that has a unique predecessor.
iterator end() const
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
unsigned succ_size() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
#define Success
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Representation of each machine instruction.
Definition: MachineInstr.h:59
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
virtual unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
virtual unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const =0
Get the register unit pressure limit for this dimension.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
bool canFoldAsLoad(QueryType Type=IgnoreBundle) const
Return true for instructions that can be folded as memory operands in other instructions.
Definition: MachineInstr.h:572
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
iterator end()
Definition: DenseMap.h:79
INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(MachineLICM
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator end() const
Definition: LoopInfo.h:143
virtual bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const
Physical registers that may be modified within a function but are guaranteed to be restored before an...
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< use_instr_iterator > use_instructions(unsigned Reg) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
Machine Loop Invariant Code Motion
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:626
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the the memory access done by this i...
Definition: MachineInstr.h:394
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P)
Split the critical edge from this block to the given successor block, and return the newly created bl...
LLVM Value Representation.
Definition: Value.h:73
static cl::opt< bool > SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", cl::desc("MachineLICM should sink instructions into " "loops to avoid register spills"), cl::init(false), cl::Hidden)
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
A specialized PseudoSourceValue for holding FixedStack values, which must include a frame index...
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(unsigned Reg) const
bool isTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA=nullptr) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex=nullptr) const
Returns the opcode of the would be new instruction after load / store are unfolded from an instructio...
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
This file describes how to lower LLVM code to machine code.
bool isImplicit() const
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65
void resize(size_type N)
Definition: SmallVector.h:355
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:821