LLVM  4.0.0
MachineLICM.cpp
Go to the documentation of this file.
1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs loop invariant code motion on machine instructions. We
11 // attempt to remove as much code from the body of a loop as possible.
12 //
13 // This pass is not intended to be a replacement or a complete alternative
14 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
15 // constructs that are not exposed before lowering and instruction selection.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/CodeGen/Passes.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Support/Debug.h"
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "machine-licm"
42 
43 static cl::opt<bool>
44 AvoidSpeculation("avoid-speculation",
45  cl::desc("MachineLICM should avoid speculation"),
46  cl::init(true), cl::Hidden);
47 
48 static cl::opt<bool>
49 HoistCheapInsts("hoist-cheap-insts",
50  cl::desc("MachineLICM should hoist even cheap instructions"),
51  cl::init(false), cl::Hidden);
52 
53 static cl::opt<bool>
54 SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
55  cl::desc("MachineLICM should sink instructions into "
56  "loops to avoid register spills"),
57  cl::init(false), cl::Hidden);
58 
59 STATISTIC(NumHoisted,
60  "Number of machine instructions hoisted out of loops");
61 STATISTIC(NumLowRP,
62  "Number of instructions hoisted in low reg pressure situation");
63 STATISTIC(NumHighLatency,
64  "Number of high latency instructions hoisted");
65 STATISTIC(NumCSEed,
66  "Number of hoisted machine instructions CSEed");
67 STATISTIC(NumPostRAHoisted,
68  "Number of machine instructions hoisted out of loops post regalloc");
69 
70 namespace {
71  class MachineLICM : public MachineFunctionPass {
72  const TargetInstrInfo *TII;
73  const TargetLoweringBase *TLI;
74  const TargetRegisterInfo *TRI;
75  const MachineFrameInfo *MFI;
77  TargetSchedModel SchedModel;
78  bool PreRegAlloc;
79 
80  // Various analyses that we use...
81  AliasAnalysis *AA; // Alias analysis info.
82  MachineLoopInfo *MLI; // Current MachineLoopInfo
83  MachineDominatorTree *DT; // Machine dominator tree for the cur loop
84 
85  // State that is updated as we process loops
86  bool Changed; // True if a loop is changed.
87  bool FirstInLoop; // True if it's the first LICM in the loop.
88  MachineLoop *CurLoop; // The current loop we are working on.
89  MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
90 
91  // Exit blocks for CurLoop.
93 
94  bool isExitBlock(const MachineBasicBlock *MBB) const {
95  return is_contained(ExitBlocks, MBB);
96  }
97 
98  // Track 'estimated' register pressure.
99  SmallSet<unsigned, 32> RegSeen;
101 
102  // Register pressure "limit" per register pressure set. If the pressure
103  // is higher than the limit, then it's considered high.
104  SmallVector<unsigned, 8> RegLimit;
105 
106  // Register pressure on path leading from loop preheader to current BB.
108 
109  // For each opcode, keep a list of potential CSE instructions.
111 
112  enum {
113  SpeculateFalse = 0,
114  SpeculateTrue = 1,
115  SpeculateUnknown = 2
116  };
117 
118  // If a MBB does not dominate loop exiting blocks then it may not safe
119  // to hoist loads from this block.
120  // Tri-state: 0 - false, 1 - true, 2 - unknown
121  unsigned SpeculationState;
122 
123  public:
124  static char ID; // Pass identification, replacement for typeid
125  MachineLICM() :
126  MachineFunctionPass(ID), PreRegAlloc(true) {
128  }
129 
130  explicit MachineLICM(bool PreRA) :
131  MachineFunctionPass(ID), PreRegAlloc(PreRA) {
133  }
134 
135  bool runOnMachineFunction(MachineFunction &MF) override;
136 
137  void getAnalysisUsage(AnalysisUsage &AU) const override {
144  }
145 
146  void releaseMemory() override {
147  RegSeen.clear();
148  RegPressure.clear();
149  RegLimit.clear();
150  BackTrace.clear();
151  CSEMap.clear();
152  }
153 
154  private:
155  /// Keep track of information about hoisting candidates.
156  struct CandidateInfo {
157  MachineInstr *MI;
158  unsigned Def;
159  int FI;
160  CandidateInfo(MachineInstr *mi, unsigned def, int fi)
161  : MI(mi), Def(def), FI(fi) {}
162  };
163 
164  void HoistRegionPostRA();
165 
166  void HoistPostRA(MachineInstr *MI, unsigned Def);
167 
168  void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
169  BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
170  SmallVectorImpl<CandidateInfo> &Candidates);
171 
172  void AddToLiveIns(unsigned Reg);
173 
174  bool IsLICMCandidate(MachineInstr &I);
175 
176  bool IsLoopInvariantInst(MachineInstr &I);
177 
178  bool HasLoopPHIUse(const MachineInstr *MI) const;
179 
180  bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
181  unsigned Reg) const;
182 
183  bool IsCheapInstruction(MachineInstr &MI) const;
184 
185  bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
186  bool Cheap);
187 
188  void UpdateBackTraceRegPressure(const MachineInstr *MI);
189 
190  bool IsProfitableToHoist(MachineInstr &MI);
191 
192  bool IsGuaranteedToExecute(MachineBasicBlock *BB);
193 
194  void EnterScope(MachineBasicBlock *MBB);
195 
196  void ExitScope(MachineBasicBlock *MBB);
197 
198  void ExitScopeIfDone(
199  MachineDomTreeNode *Node,
202 
203  void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
204 
205  void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
206 
207  void SinkIntoLoop();
208 
209  void InitRegPressure(MachineBasicBlock *BB);
210 
211  DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
212  bool ConsiderSeen,
213  bool ConsiderUnseenAsDef);
214 
215  void UpdateRegPressure(const MachineInstr *MI,
216  bool ConsiderUnseenAsDef = false);
217 
218  MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
219 
220  const MachineInstr *
221  LookForDuplicate(const MachineInstr *MI,
222  std::vector<const MachineInstr *> &PrevMIs);
223 
224  bool EliminateCSE(
225  MachineInstr *MI,
226  DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
227 
228  bool MayCSE(MachineInstr *MI);
229 
230  bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
231 
232  void InitCSEMap(MachineBasicBlock *BB);
233 
234  MachineBasicBlock *getCurPreheader();
235  };
236 } // end anonymous namespace
237 
238 char MachineLICM::ID = 0;
240 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
241  "Machine Loop Invariant Code Motion", false, false)
246  "Machine Loop Invariant Code Motion", false, false)
247 
248 /// Test if the given loop is the outer-most loop that has a unique predecessor.
250  // Check whether this loop even has a unique predecessor.
251  if (!CurLoop->getLoopPredecessor())
252  return false;
253  // Ok, now check to see if any of its outer loops do.
254  for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
255  if (L->getLoopPredecessor())
256  return false;
257  // None of them did, so this is the outermost with a unique predecessor.
258  return true;
259 }
260 
261 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
262  if (skipFunction(*MF.getFunction()))
263  return false;
264 
265  Changed = FirstInLoop = false;
266  const TargetSubtargetInfo &ST = MF.getSubtarget();
267  TII = ST.getInstrInfo();
268  TLI = ST.getTargetLowering();
269  TRI = ST.getRegisterInfo();
270  MFI = &MF.getFrameInfo();
271  MRI = &MF.getRegInfo();
272  SchedModel.init(ST.getSchedModel(), &ST, TII);
273 
274  PreRegAlloc = MRI->isSSA();
275 
276  if (PreRegAlloc)
277  DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
278  else
279  DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
280  DEBUG(dbgs() << MF.getName() << " ********\n");
281 
282  if (PreRegAlloc) {
283  // Estimate register pressure during pre-regalloc pass.
284  unsigned NumRPS = TRI->getNumRegPressureSets();
285  RegPressure.resize(NumRPS);
286  std::fill(RegPressure.begin(), RegPressure.end(), 0);
287  RegLimit.resize(NumRPS);
288  for (unsigned i = 0, e = NumRPS; i != e; ++i)
289  RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
290  }
291 
292  // Get our Loop information...
293  MLI = &getAnalysis<MachineLoopInfo>();
294  DT = &getAnalysis<MachineDominatorTree>();
295  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
296 
297  SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
298  while (!Worklist.empty()) {
299  CurLoop = Worklist.pop_back_val();
300  CurPreheader = nullptr;
301  ExitBlocks.clear();
302 
303  // If this is done before regalloc, only visit outer-most preheader-sporting
304  // loops.
305  if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
306  Worklist.append(CurLoop->begin(), CurLoop->end());
307  continue;
308  }
309 
310  CurLoop->getExitBlocks(ExitBlocks);
311 
312  if (!PreRegAlloc)
313  HoistRegionPostRA();
314  else {
315  // CSEMap is initialized for loop header when the first instruction is
316  // being hoisted.
317  MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
318  FirstInLoop = true;
319  HoistOutOfLoop(N);
320  CSEMap.clear();
321 
323  SinkIntoLoop();
324  }
325  }
326 
327  return Changed;
328 }
329 
330 /// Return true if instruction stores to the specified frame.
331 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
332  // If we lost memory operands, conservatively assume that the instruction
333  // writes to all slots.
334  if (MI->memoperands_empty())
335  return true;
336  for (const MachineMemOperand *MemOp : MI->memoperands()) {
337  if (!MemOp->isStore() || !MemOp->getPseudoValue())
338  continue;
340  dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
341  if (Value->getFrameIndex() == FI)
342  return true;
343  }
344  }
345  return false;
346 }
347 
348 /// Examine the instruction for potentai LICM candidate. Also
349 /// gather register def and frame object update information.
350 void MachineLICM::ProcessMI(MachineInstr *MI,
351  BitVector &PhysRegDefs,
352  BitVector &PhysRegClobbers,
353  SmallSet<int, 32> &StoredFIs,
354  SmallVectorImpl<CandidateInfo> &Candidates) {
355  bool RuledOut = false;
356  bool HasNonInvariantUse = false;
357  unsigned Def = 0;
358  for (const MachineOperand &MO : MI->operands()) {
359  if (MO.isFI()) {
360  // Remember if the instruction stores to the frame index.
361  int FI = MO.getIndex();
362  if (!StoredFIs.count(FI) &&
363  MFI->isSpillSlotObjectIndex(FI) &&
364  InstructionStoresToFI(MI, FI))
365  StoredFIs.insert(FI);
366  HasNonInvariantUse = true;
367  continue;
368  }
369 
370  // We can't hoist an instruction defining a physreg that is clobbered in
371  // the loop.
372  if (MO.isRegMask()) {
373  PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
374  continue;
375  }
376 
377  if (!MO.isReg())
378  continue;
379  unsigned Reg = MO.getReg();
380  if (!Reg)
381  continue;
383  "Not expecting virtual register!");
384 
385  if (!MO.isDef()) {
386  if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
387  // If it's using a non-loop-invariant register, then it's obviously not
388  // safe to hoist.
389  HasNonInvariantUse = true;
390  continue;
391  }
392 
393  if (MO.isImplicit()) {
394  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
395  PhysRegClobbers.set(*AI);
396  if (!MO.isDead())
397  // Non-dead implicit def? This cannot be hoisted.
398  RuledOut = true;
399  // No need to check if a dead implicit def is also defined by
400  // another instruction.
401  continue;
402  }
403 
404  // FIXME: For now, avoid instructions with multiple defs, unless
405  // it's a dead implicit def.
406  if (Def)
407  RuledOut = true;
408  else
409  Def = Reg;
410 
411  // If we have already seen another instruction that defines the same
412  // register, then this is not safe. Two defs is indicated by setting a
413  // PhysRegClobbers bit.
414  for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
415  if (PhysRegDefs.test(*AS))
416  PhysRegClobbers.set(*AS);
417  PhysRegDefs.set(*AS);
418  }
419  if (PhysRegClobbers.test(Reg))
420  // MI defined register is seen defined by another instruction in
421  // the loop, it cannot be a LICM candidate.
422  RuledOut = true;
423  }
424 
425  // Only consider reloads for now and remats which do not have register
426  // operands. FIXME: Consider unfold load folding instructions.
427  if (Def && !RuledOut) {
428  int FI = INT_MIN;
429  if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
430  (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
431  Candidates.push_back(CandidateInfo(MI, Def, FI));
432  }
433 }
434 
435 /// Walk the specified region of the CFG and hoist loop invariants out to the
436 /// preheader.
437 void MachineLICM::HoistRegionPostRA() {
438  MachineBasicBlock *Preheader = getCurPreheader();
439  if (!Preheader)
440  return;
441 
442  unsigned NumRegs = TRI->getNumRegs();
443  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
444  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
445 
447  SmallSet<int, 32> StoredFIs;
448 
449  // Walk the entire region, count number of defs for each register, and
450  // collect potential LICM candidates.
451  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
452  for (MachineBasicBlock *BB : Blocks) {
453  // If the header of the loop containing this basic block is a landing pad,
454  // then don't try to hoist instructions out of this loop.
455  const MachineLoop *ML = MLI->getLoopFor(BB);
456  if (ML && ML->getHeader()->isEHPad()) continue;
457 
458  // Conservatively treat live-in's as an external def.
459  // FIXME: That means a reload that're reused in successor block(s) will not
460  // be LICM'ed.
461  for (const auto &LI : BB->liveins()) {
462  for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
463  PhysRegDefs.set(*AI);
464  }
465 
466  SpeculationState = SpeculateUnknown;
467  for (MachineInstr &MI : *BB)
468  ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
469  }
470 
471  // Gather the registers read / clobbered by the terminator.
472  BitVector TermRegs(NumRegs);
474  if (TI != Preheader->end()) {
475  for (const MachineOperand &MO : TI->operands()) {
476  if (!MO.isReg())
477  continue;
478  unsigned Reg = MO.getReg();
479  if (!Reg)
480  continue;
481  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
482  TermRegs.set(*AI);
483  }
484  }
485 
486  // Now evaluate whether the potential candidates qualify.
487  // 1. Check if the candidate defined register is defined by another
488  // instruction in the loop.
489  // 2. If the candidate is a load from stack slot (always true for now),
490  // check if the slot is stored anywhere in the loop.
491  // 3. Make sure candidate def should not clobber
492  // registers read by the terminator. Similarly its def should not be
493  // clobbered by the terminator.
494  for (CandidateInfo &Candidate : Candidates) {
495  if (Candidate.FI != INT_MIN &&
496  StoredFIs.count(Candidate.FI))
497  continue;
498 
499  unsigned Def = Candidate.Def;
500  if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
501  bool Safe = true;
502  MachineInstr *MI = Candidate.MI;
503  for (const MachineOperand &MO : MI->operands()) {
504  if (!MO.isReg() || MO.isDef() || !MO.getReg())
505  continue;
506  unsigned Reg = MO.getReg();
507  if (PhysRegDefs.test(Reg) ||
508  PhysRegClobbers.test(Reg)) {
509  // If it's using a non-loop-invariant register, then it's obviously
510  // not safe to hoist.
511  Safe = false;
512  break;
513  }
514  }
515  if (Safe)
516  HoistPostRA(MI, Candidate.Def);
517  }
518  }
519 }
520 
521 /// Add register 'Reg' to the livein sets of BBs in the current loop, and make
522 /// sure it is not killed by any instructions in the loop.
523 void MachineLICM::AddToLiveIns(unsigned Reg) {
524  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
525  for (MachineBasicBlock *BB : Blocks) {
526  if (!BB->isLiveIn(Reg))
527  BB->addLiveIn(Reg);
528  for (MachineInstr &MI : *BB) {
529  for (MachineOperand &MO : MI.operands()) {
530  if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
531  if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
532  MO.setIsKill(false);
533  }
534  }
535  }
536 }
537 
538 /// When an instruction is found to only use loop invariant operands that is
539 /// safe to hoist, this instruction is called to do the dirty work.
540 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
541  MachineBasicBlock *Preheader = getCurPreheader();
542 
543  // Now move the instructions to the predecessor, inserting it before any
544  // terminator instructions.
545  DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
546  << MI->getParent()->getNumber() << ": " << *MI);
547 
548  // Splice the instruction to the preheader.
550  Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
551 
552  // Add register to livein list to all the BBs in the current loop since a
553  // loop invariant must be kept live throughout the whole loop. This is
554  // important to ensure later passes do not scavenge the def register.
555  AddToLiveIns(Def);
556 
557  ++NumPostRAHoisted;
558  Changed = true;
559 }
560 
561 /// Check if this mbb is guaranteed to execute. If not then a load from this mbb
562 /// may not be safe to hoist.
563 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
564  if (SpeculationState != SpeculateUnknown)
565  return SpeculationState == SpeculateFalse;
566 
567  if (BB != CurLoop->getHeader()) {
568  // Check loop exiting blocks.
569  SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
570  CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
571  for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
572  if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
573  SpeculationState = SpeculateTrue;
574  return false;
575  }
576  }
577 
578  SpeculationState = SpeculateFalse;
579  return true;
580 }
581 
582 void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
583  DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n');
584 
585  // Remember livein register pressure.
586  BackTrace.push_back(RegPressure);
587 }
588 
589 void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
590  DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n');
591  BackTrace.pop_back();
592 }
593 
594 /// Destroy scope for the MBB that corresponds to the given dominator tree node
595 /// if its a leaf or all of its children are done. Walk up the dominator tree to
596 /// destroy ancestors which are now done.
597 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
600  if (OpenChildren[Node])
601  return;
602 
603  // Pop scope.
604  ExitScope(Node->getBlock());
605 
606  // Now traverse upwards to pop ancestors whose offsprings are all done.
607  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
608  unsigned Left = --OpenChildren[Parent];
609  if (Left != 0)
610  break;
611  ExitScope(Parent->getBlock());
612  Node = Parent;
613  }
614 }
615 
616 /// Walk the specified loop in the CFG (defined by all blocks dominated by the
617 /// specified header block, and that are in the current loop) in depth first
618 /// order w.r.t the DominatorTree. This allows us to visit definitions before
619 /// uses, allowing us to hoist a loop body in one pass without iteration.
620 ///
621 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
622  MachineBasicBlock *Preheader = getCurPreheader();
623  if (!Preheader)
624  return;
625 
630 
631  // Perform a DFS walk to determine the order of visit.
632  WorkList.push_back(HeaderN);
633  while (!WorkList.empty()) {
634  MachineDomTreeNode *Node = WorkList.pop_back_val();
635  assert(Node && "Null dominator tree node?");
636  MachineBasicBlock *BB = Node->getBlock();
637 
638  // If the header of the loop containing this basic block is a landing pad,
639  // then don't try to hoist instructions out of this loop.
640  const MachineLoop *ML = MLI->getLoopFor(BB);
641  if (ML && ML->getHeader()->isEHPad())
642  continue;
643 
644  // If this subregion is not in the top level loop at all, exit.
645  if (!CurLoop->contains(BB))
646  continue;
647 
648  Scopes.push_back(Node);
649  const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
650  unsigned NumChildren = Children.size();
651 
652  // Don't hoist things out of a large switch statement. This often causes
653  // code to be hoisted that wasn't going to be executed, and increases
654  // register pressure in a situation where it's likely to matter.
655  if (BB->succ_size() >= 25)
656  NumChildren = 0;
657 
658  OpenChildren[Node] = NumChildren;
659  // Add children in reverse order as then the next popped worklist node is
660  // the first child of this node. This means we ultimately traverse the
661  // DOM tree in exactly the same order as if we'd recursed.
662  for (int i = (int)NumChildren-1; i >= 0; --i) {
663  MachineDomTreeNode *Child = Children[i];
664  ParentMap[Child] = Node;
665  WorkList.push_back(Child);
666  }
667  }
668 
669  if (Scopes.size() == 0)
670  return;
671 
672  // Compute registers which are livein into the loop headers.
673  RegSeen.clear();
674  BackTrace.clear();
675  InitRegPressure(Preheader);
676 
677  // Now perform LICM.
678  for (MachineDomTreeNode *Node : Scopes) {
679  MachineBasicBlock *MBB = Node->getBlock();
680 
681  EnterScope(MBB);
682 
683  // Process the block
684  SpeculationState = SpeculateUnknown;
686  MII = MBB->begin(), E = MBB->end(); MII != E; ) {
687  MachineBasicBlock::iterator NextMII = MII; ++NextMII;
688  MachineInstr *MI = &*MII;
689  if (!Hoist(MI, Preheader))
690  UpdateRegPressure(MI);
691  MII = NextMII;
692  }
693 
694  // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
695  ExitScopeIfDone(Node, OpenChildren, ParentMap);
696  }
697 }
698 
699 /// Sink instructions into loops if profitable. This especially tries to prevent
700 /// register spills caused by register pressure if there is little to no
701 /// overhead moving instructions into loops.
702 void MachineLICM::SinkIntoLoop() {
703  MachineBasicBlock *Preheader = getCurPreheader();
704  if (!Preheader)
705  return;
706 
708  for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
709  I != Preheader->instr_end(); ++I) {
710  // We need to ensure that we can safely move this instruction into the loop.
711  // As such, it must not have side-effects, e.g. such as a call has.
712  if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
713  Candidates.push_back(&*I);
714  }
715 
716  for (MachineInstr *I : Candidates) {
717  const MachineOperand &MO = I->getOperand(0);
718  if (!MO.isDef() || !MO.isReg() || !MO.getReg())
719  continue;
720  if (!MRI->hasOneDef(MO.getReg()))
721  continue;
722  bool CanSink = true;
723  MachineBasicBlock *B = nullptr;
724  for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
725  // FIXME: Come up with a proper cost model that estimates whether sinking
726  // the instruction (and thus possibly executing it on every loop
727  // iteration) is more expensive than a register.
728  // For now assumes that copies are cheap and thus almost always worth it.
729  if (!MI.isCopy()) {
730  CanSink = false;
731  break;
732  }
733  if (!B) {
734  B = MI.getParent();
735  continue;
736  }
737  B = DT->findNearestCommonDominator(B, MI.getParent());
738  if (!B) {
739  CanSink = false;
740  break;
741  }
742  }
743  if (!CanSink || !B || B == Preheader)
744  continue;
745  B->splice(B->getFirstNonPHI(), Preheader, I);
746  }
747 }
748 
750  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
751 }
752 
753 /// Find all virtual register references that are liveout of the preheader to
754 /// initialize the starting "register pressure". Note this does not count live
755 /// through (livein but not used) registers.
756 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
757  std::fill(RegPressure.begin(), RegPressure.end(), 0);
758 
759  // If the preheader has only a single predecessor and it ends with a
760  // fallthrough or an unconditional branch, then scan its predecessor for live
761  // defs as well. This happens whenever the preheader is created by splitting
762  // the critical edge from the loop predecessor to the loop header.
763  if (BB->pred_size() == 1) {
764  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
766  if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
767  InitRegPressure(*BB->pred_begin());
768  }
769 
770  for (const MachineInstr &MI : *BB)
771  UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
772 }
773 
774 /// Update estimate of register pressure after the specified instruction.
775 void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
776  bool ConsiderUnseenAsDef) {
777  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
778  for (const auto &RPIdAndCost : Cost) {
779  unsigned Class = RPIdAndCost.first;
780  if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
781  RegPressure[Class] = 0;
782  else
783  RegPressure[Class] += RPIdAndCost.second;
784  }
785 }
786 
787 /// Calculate the additional register pressure that the registers used in MI
788 /// cause.
789 ///
790 /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
791 /// figure out which usages are live-ins.
792 /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
794 MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
795  bool ConsiderUnseenAsDef) {
797  if (MI->isImplicitDef())
798  return Cost;
799  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
800  const MachineOperand &MO = MI->getOperand(i);
801  if (!MO.isReg() || MO.isImplicit())
802  continue;
803  unsigned Reg = MO.getReg();
805  continue;
806 
807  // FIXME: It seems bad to use RegSeen only for some of these calculations.
808  bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
809  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
810 
811  RegClassWeight W = TRI->getRegClassWeight(RC);
812  int RCCost = 0;
813  if (MO.isDef())
814  RCCost = W.RegWeight;
815  else {
816  bool isKill = isOperandKill(MO, MRI);
817  if (isNew && !isKill && ConsiderUnseenAsDef)
818  // Haven't seen this, it must be a livein.
819  RCCost = W.RegWeight;
820  else if (!isNew && isKill)
821  RCCost = -W.RegWeight;
822  }
823  if (RCCost == 0)
824  continue;
825  const int *PS = TRI->getRegClassPressureSets(RC);
826  for (; *PS != -1; ++PS) {
827  if (Cost.find(*PS) == Cost.end())
828  Cost[*PS] = RCCost;
829  else
830  Cost[*PS] += RCCost;
831  }
832  }
833  return Cost;
834 }
835 
836 /// Return true if this machine instruction loads from global offset table or
837 /// constant pool.
839  assert (MI.mayLoad() && "Expected MI that loads!");
840 
841  // If we lost memory operands, conservatively assume that the instruction
842  // reads from everything..
843  if (MI.memoperands_empty())
844  return true;
845 
846  for (MachineMemOperand *MemOp : MI.memoperands())
847  if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
848  if (PSV->isGOT() || PSV->isConstantPool())
849  return true;
850 
851  return false;
852 }
853 
854 /// Returns true if the instruction may be a suitable candidate for LICM.
855 /// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
856 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
857  // Check if it's safe to move the instruction.
858  bool DontMoveAcrossStore = true;
859  if (!I.isSafeToMove(AA, DontMoveAcrossStore))
860  return false;
861 
862  // If it is load then check if it is guaranteed to execute by making sure that
863  // it dominates all exiting blocks. If it doesn't, then there is a path out of
864  // the loop which does not execute this load, so we can't hoist it. Loads
865  // from constant memory are not safe to speculate all the time, for example
866  // indexed load from a jump table.
867  // Stores and side effects are already checked by isSafeToMove.
868  if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
869  !IsGuaranteedToExecute(I.getParent()))
870  return false;
871 
872  return true;
873 }
874 
875 /// Returns true if the instruction is loop invariant.
876 /// I.e., all virtual register operands are defined outside of the loop,
877 /// physical registers aren't accessed explicitly, and there are no side
878 /// effects that aren't captured by the operands or other flags.
879 ///
880 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
881  if (!IsLICMCandidate(I))
882  return false;
883 
884  // The instruction is loop invariant if all of its operands are.
885  for (const MachineOperand &MO : I.operands()) {
886  if (!MO.isReg())
887  continue;
888 
889  unsigned Reg = MO.getReg();
890  if (Reg == 0) continue;
891 
892  // Don't hoist an instruction that uses or defines a physical register.
893  if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
894  if (MO.isUse()) {
895  // If the physreg has no defs anywhere, it's just an ambient register
896  // and we can freely move its uses. Alternatively, if it's allocatable,
897  // it could get allocated to something with a def during allocation.
898  if (!MRI->isConstantPhysReg(Reg))
899  return false;
900  // Otherwise it's safe to move.
901  continue;
902  } else if (!MO.isDead()) {
903  // A def that isn't dead. We can't move it.
904  return false;
905  } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
906  // If the reg is live into the loop, we can't hoist an instruction
907  // which would clobber it.
908  return false;
909  }
910  }
911 
912  if (!MO.isUse())
913  continue;
914 
915  assert(MRI->getVRegDef(Reg) &&
916  "Machine instr not mapped for this vreg?!");
917 
918  // If the loop contains the definition of an operand, then the instruction
919  // isn't loop invariant.
920  if (CurLoop->contains(MRI->getVRegDef(Reg)))
921  return false;
922  }
923 
924  // If we got this far, the instruction is loop invariant!
925  return true;
926 }
927 
928 
929 /// Return true if the specified instruction is used by a phi node and hoisting
930 /// it could cause a copy to be inserted.
931 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
933  do {
934  MI = Work.pop_back_val();
935  for (const MachineOperand &MO : MI->operands()) {
936  if (!MO.isReg() || !MO.isDef())
937  continue;
938  unsigned Reg = MO.getReg();
939  if (!TargetRegisterInfo::isVirtualRegister(Reg))
940  continue;
941  for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
942  // A PHI may cause a copy to be inserted.
943  if (UseMI.isPHI()) {
944  // A PHI inside the loop causes a copy because the live range of Reg is
945  // extended across the PHI.
946  if (CurLoop->contains(&UseMI))
947  return true;
948  // A PHI in an exit block can cause a copy to be inserted if the PHI
949  // has multiple predecessors in the loop with different values.
950  // For now, approximate by rejecting all exit blocks.
951  if (isExitBlock(UseMI.getParent()))
952  return true;
953  continue;
954  }
955  // Look past copies as well.
956  if (UseMI.isCopy() && CurLoop->contains(&UseMI))
957  Work.push_back(&UseMI);
958  }
959  }
960  } while (!Work.empty());
961  return false;
962 }
963 
964 /// Compute operand latency between a def of 'Reg' and an use in the current
965 /// loop, return true if the target considered it high.
966 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
967  unsigned DefIdx, unsigned Reg) const {
968  if (MRI->use_nodbg_empty(Reg))
969  return false;
970 
971  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
972  if (UseMI.isCopyLike())
973  continue;
974  if (!CurLoop->contains(UseMI.getParent()))
975  continue;
976  for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
977  const MachineOperand &MO = UseMI.getOperand(i);
978  if (!MO.isReg() || !MO.isUse())
979  continue;
980  unsigned MOReg = MO.getReg();
981  if (MOReg != Reg)
982  continue;
983 
984  if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
985  return true;
986  }
987 
988  // Only look at the first in loop use.
989  break;
990  }
991 
992  return false;
993 }
994 
995 /// Return true if the instruction is marked "cheap" or the operand latency
996 /// between its def and a use is one or less.
997 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
998  if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
999  return true;
1000 
1001  bool isCheap = false;
1002  unsigned NumDefs = MI.getDesc().getNumDefs();
1003  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1004  MachineOperand &DefMO = MI.getOperand(i);
1005  if (!DefMO.isReg() || !DefMO.isDef())
1006  continue;
1007  --NumDefs;
1008  unsigned Reg = DefMO.getReg();
1009  if (TargetRegisterInfo::isPhysicalRegister(Reg))
1010  continue;
1011 
1012  if (!TII->hasLowDefLatency(SchedModel, MI, i))
1013  return false;
1014  isCheap = true;
1015  }
1016 
1017  return isCheap;
1018 }
1019 
1020 /// Visit BBs from header to current BB, check if hoisting an instruction of the
1021 /// given cost matrix can cause high register pressure.
1022 bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
1023  bool CheapInstr) {
1024  for (const auto &RPIdAndCost : Cost) {
1025  if (RPIdAndCost.second <= 0)
1026  continue;
1027 
1028  unsigned Class = RPIdAndCost.first;
1029  int Limit = RegLimit[Class];
1030 
1031  // Don't hoist cheap instructions if they would increase register pressure,
1032  // even if we're under the limit.
1033  if (CheapInstr && !HoistCheapInsts)
1034  return true;
1035 
1036  for (const auto &RP : BackTrace)
1037  if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1038  return true;
1039  }
1040 
1041  return false;
1042 }
1043 
1044 /// Traverse the back trace from header to the current block and update their
1045 /// register pressures to reflect the effect of hoisting MI from the current
1046 /// block to the preheader.
1047 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1048  // First compute the 'cost' of the instruction, i.e. its contribution
1049  // to register pressure.
1050  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1051  /*ConsiderUnseenAsDef=*/false);
1052 
1053  // Update register pressure of blocks from loop header to current block.
1054  for (auto &RP : BackTrace)
1055  for (const auto &RPIdAndCost : Cost)
1056  RP[RPIdAndCost.first] += RPIdAndCost.second;
1057 }
1058 
1059 /// Return true if it is potentially profitable to hoist the given loop
1060 /// invariant.
1061 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
1062  if (MI.isImplicitDef())
1063  return true;
1064 
1065  // Besides removing computation from the loop, hoisting an instruction has
1066  // these effects:
1067  //
1068  // - The value defined by the instruction becomes live across the entire
1069  // loop. This increases register pressure in the loop.
1070  //
1071  // - If the value is used by a PHI in the loop, a copy will be required for
1072  // lowering the PHI after extending the live range.
1073  //
1074  // - When hoisting the last use of a value in the loop, that value no longer
1075  // needs to be live in the loop. This lowers register pressure in the loop.
1076 
1077  bool CheapInstr = IsCheapInstruction(MI);
1078  bool CreatesCopy = HasLoopPHIUse(&MI);
1079 
1080  // Don't hoist a cheap instruction if it would create a copy in the loop.
1081  if (CheapInstr && CreatesCopy) {
1082  DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1083  return false;
1084  }
1085 
1086  // Rematerializable instructions should always be hoisted since the register
1087  // allocator can just pull them down again when needed.
1088  if (TII->isTriviallyReMaterializable(MI, AA))
1089  return true;
1090 
1091  // FIXME: If there are long latency loop-invariant instructions inside the
1092  // loop at this point, why didn't the optimizer's LICM hoist them?
1093  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1094  const MachineOperand &MO = MI.getOperand(i);
1095  if (!MO.isReg() || MO.isImplicit())
1096  continue;
1097  unsigned Reg = MO.getReg();
1098  if (!TargetRegisterInfo::isVirtualRegister(Reg))
1099  continue;
1100  if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
1101  DEBUG(dbgs() << "Hoist High Latency: " << MI);
1102  ++NumHighLatency;
1103  return true;
1104  }
1105  }
1106 
1107  // Estimate register pressure to determine whether to LICM the instruction.
1108  // In low register pressure situation, we can be more aggressive about
1109  // hoisting. Also, favors hoisting long latency instructions even in
1110  // moderately high pressure situation.
1111  // Cheap instructions will only be hoisted if they don't increase register
1112  // pressure at all.
1113  auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1114  /*ConsiderUnseenAsDef=*/false);
1115 
1116  // Visit BBs from header to current BB, if hoisting this doesn't cause
1117  // high register pressure, then it's safe to proceed.
1118  if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1119  DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1120  ++NumLowRP;
1121  return true;
1122  }
1123 
1124  // Don't risk increasing register pressure if it would create copies.
1125  if (CreatesCopy) {
1126  DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1127  return false;
1128  }
1129 
1130  // Do not "speculate" in high register pressure situation. If an
1131  // instruction is not guaranteed to be executed in the loop, it's best to be
1132  // conservative.
1133  if (AvoidSpeculation &&
1134  (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
1135  DEBUG(dbgs() << "Won't speculate: " << MI);
1136  return false;
1137  }
1138 
1139  // High register pressure situation, only hoist if the instruction is going
1140  // to be remat'ed.
1141  if (!TII->isTriviallyReMaterializable(MI, AA) &&
1143  DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1144  return false;
1145  }
1146 
1147  return true;
1148 }
1149 
1150 /// Unfold a load from the given machineinstr if the load itself could be
1151 /// hoisted. Return the unfolded and hoistable load, or null if the load
1152 /// couldn't be unfolded or if it wouldn't be hoistable.
1153 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
1154  // Don't unfold simple loads.
1155  if (MI->canFoldAsLoad())
1156  return nullptr;
1157 
1158  // If not, we may be able to unfold a load and hoist that.
1159  // First test whether the instruction is loading from an amenable
1160  // memory location.
1161  if (!MI->isDereferenceableInvariantLoad(AA))
1162  return nullptr;
1163 
1164  // Next determine the register class for a temporary register.
1165  unsigned LoadRegIndex;
1166  unsigned NewOpc =
1167  TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1168  /*UnfoldLoad=*/true,
1169  /*UnfoldStore=*/false,
1170  &LoadRegIndex);
1171  if (NewOpc == 0) return nullptr;
1172  const MCInstrDesc &MID = TII->get(NewOpc);
1173  MachineFunction &MF = *MI->getParent()->getParent();
1174  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1175  // Ok, we're unfolding. Create a temporary register and do the unfold.
1176  unsigned Reg = MRI->createVirtualRegister(RC);
1177 
1179  bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1180  /*UnfoldLoad=*/true,
1181  /*UnfoldStore=*/false, NewMIs);
1182  (void)Success;
1183  assert(Success &&
1184  "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1185  "succeeded!");
1186  assert(NewMIs.size() == 2 &&
1187  "Unfolded a load into multiple instructions!");
1188  MachineBasicBlock *MBB = MI->getParent();
1190  MBB->insert(Pos, NewMIs[0]);
1191  MBB->insert(Pos, NewMIs[1]);
1192  // If unfolding produced a load that wasn't loop-invariant or profitable to
1193  // hoist, discard the new instructions and bail.
1194  if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
1195  NewMIs[0]->eraseFromParent();
1196  NewMIs[1]->eraseFromParent();
1197  return nullptr;
1198  }
1199 
1200  // Update register pressure for the unfolded instruction.
1201  UpdateRegPressure(NewMIs[1]);
1202 
1203  // Otherwise we successfully unfolded a load that we can hoist.
1204  MI->eraseFromParent();
1205  return NewMIs[0];
1206 }
1207 
1208 /// Initialize the CSE map with instructions that are in the current loop
1209 /// preheader that may become duplicates of instructions that are hoisted
1210 /// out of the loop.
1211 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
1212  for (MachineInstr &MI : *BB)
1213  CSEMap[MI.getOpcode()].push_back(&MI);
1214 }
1215 
1216 /// Find an instruction amount PrevMIs that is a duplicate of MI.
1217 /// Return this instruction if it's found.
1218 const MachineInstr*
1219 MachineLICM::LookForDuplicate(const MachineInstr *MI,
1220  std::vector<const MachineInstr*> &PrevMIs) {
1221  for (const MachineInstr *PrevMI : PrevMIs)
1222  if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
1223  return PrevMI;
1224 
1225  return nullptr;
1226 }
1227 
1228 /// Given a LICM'ed instruction, look for an instruction on the preheader that
1229 /// computes the same value. If it's found, do a RAU on with the definition of
1230 /// the existing instruction rather than hoisting the instruction to the
1231 /// preheader.
1232 bool MachineLICM::EliminateCSE(MachineInstr *MI,
1233  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
1234  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1235  // the undef property onto uses.
1236  if (CI == CSEMap.end() || MI->isImplicitDef())
1237  return false;
1238 
1239  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1240  DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1241 
1242  // Replace virtual registers defined by MI by their counterparts defined
1243  // by Dup.
1245  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1246  const MachineOperand &MO = MI->getOperand(i);
1247 
1248  // Physical registers may not differ here.
1249  assert((!MO.isReg() || MO.getReg() == 0 ||
1250  !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
1251  MO.getReg() == Dup->getOperand(i).getReg()) &&
1252  "Instructions with different phys regs are not identical!");
1253 
1254  if (MO.isReg() && MO.isDef() &&
1255  !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
1256  Defs.push_back(i);
1257  }
1258 
1260  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1261  unsigned Idx = Defs[i];
1262  unsigned Reg = MI->getOperand(Idx).getReg();
1263  unsigned DupReg = Dup->getOperand(Idx).getReg();
1264  OrigRCs.push_back(MRI->getRegClass(DupReg));
1265 
1266  if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1267  // Restore old RCs if more than one defs.
1268  for (unsigned j = 0; j != i; ++j)
1269  MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1270  return false;
1271  }
1272  }
1273 
1274  for (unsigned Idx : Defs) {
1275  unsigned Reg = MI->getOperand(Idx).getReg();
1276  unsigned DupReg = Dup->getOperand(Idx).getReg();
1277  MRI->replaceRegWith(Reg, DupReg);
1278  MRI->clearKillFlags(DupReg);
1279  }
1280 
1281  MI->eraseFromParent();
1282  ++NumCSEed;
1283  return true;
1284  }
1285  return false;
1286 }
1287 
1288 /// Return true if the given instruction will be CSE'd if it's hoisted out of
1289 /// the loop.
1290 bool MachineLICM::MayCSE(MachineInstr *MI) {
1291  unsigned Opcode = MI->getOpcode();
1293  CI = CSEMap.find(Opcode);
1294  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1295  // the undef property onto uses.
1296  if (CI == CSEMap.end() || MI->isImplicitDef())
1297  return false;
1298 
1299  return LookForDuplicate(MI, CI->second) != nullptr;
1300 }
1301 
1302 /// When an instruction is found to use only loop invariant operands
1303 /// that are safe to hoist, this instruction is called to do the dirty work.
1304 /// It returns true if the instruction is hoisted.
1305 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1306  // First check whether we should hoist this instruction.
1307  if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
1308  // If not, try unfolding a hoistable load.
1309  MI = ExtractHoistableLoad(MI);
1310  if (!MI) return false;
1311  }
1312 
1313  // Now move the instructions to the predecessor, inserting it before any
1314  // terminator instructions.
1315  DEBUG({
1316  dbgs() << "Hoisting " << *MI;
1317  if (MI->getParent()->getBasicBlock())
1318  dbgs() << " from BB#" << MI->getParent()->getNumber();
1319  if (Preheader->getBasicBlock())
1320  dbgs() << " to BB#" << Preheader->getNumber();
1321  dbgs() << "\n";
1322  });
1323 
1324  // If this is the first instruction being hoisted to the preheader,
1325  // initialize the CSE map with potential common expressions.
1326  if (FirstInLoop) {
1327  InitCSEMap(Preheader);
1328  FirstInLoop = false;
1329  }
1330 
1331  // Look for opportunity to CSE the hoisted instruction.
1332  unsigned Opcode = MI->getOpcode();
1334  CI = CSEMap.find(Opcode);
1335  if (!EliminateCSE(MI, CI)) {
1336  // Otherwise, splice the instruction to the preheader.
1337  Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1338 
1339  // Since we are moving the instruction out of its basic block, we do not
1340  // retain its debug location. Doing so would degrade the debugging
1341  // experience and adversely affect the accuracy of profiling information.
1342  MI->setDebugLoc(DebugLoc());
1343 
1344  // Update register pressure for BBs from header to this block.
1345  UpdateBackTraceRegPressure(MI);
1346 
1347  // Clear the kill flags of any register this instruction defines,
1348  // since they may need to be live throughout the entire loop
1349  // rather than just live for part of it.
1350  for (MachineOperand &MO : MI->operands())
1351  if (MO.isReg() && MO.isDef() && !MO.isDead())
1352  MRI->clearKillFlags(MO.getReg());
1353 
1354  // Add to the CSE map.
1355  if (CI != CSEMap.end())
1356  CI->second.push_back(MI);
1357  else
1358  CSEMap[Opcode].push_back(MI);
1359  }
1360 
1361  ++NumHoisted;
1362  Changed = true;
1363 
1364  return true;
1365 }
1366 
1367 /// Get the preheader for the current loop, splitting a critical edge if needed.
1368 MachineBasicBlock *MachineLICM::getCurPreheader() {
1369  // Determine the block to which to hoist instructions. If we can't find a
1370  // suitable loop predecessor, we can't do any hoisting.
1371 
1372  // If we've tried to get a preheader and failed, don't try again.
1373  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1374  return nullptr;
1375 
1376  if (!CurPreheader) {
1377  CurPreheader = CurLoop->getLoopPreheader();
1378  if (!CurPreheader) {
1379  MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1380  if (!Pred) {
1381  CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1382  return nullptr;
1383  }
1384 
1385  CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
1386  if (!CurPreheader) {
1387  CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1388  return nullptr;
1389  }
1390  }
1391  }
1392  return CurPreheader;
1393 }
MachineLoop * L
bool isImplicit() const
unsigned succ_size() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
BitVector & set()
Definition: BitVector.h:219
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
instr_iterator instr_end()
const std::vector< DomTreeNodeBase< NodeT > * > & getChildren() const
STATISTIC(NumFunctions,"Total number of functions")
size_t i
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:216
char & MachineLICMID
MachineLICM - This pass performs LICM on machine instructions.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
bool isDead() const
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
INITIALIZE_PASS_BEGIN(MachineLICM,"machinelicm","Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(MachineLICM
void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask.
Definition: BitVector.h:495
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
A debug info location.
Definition: DebugLoc.h:34
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:365
bool canFoldAsLoad(QueryType Type=IgnoreBundle) const
Return true for instructions that can be folded as memory operands in other instructions.
Definition: MachineInstr.h:538
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:301
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI)
Return true if this machine instruction loads from global offset table or constant pool...
BlockT * getHeader() const
Definition: LoopInfo.h:102
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
A description of a memory reference used in the backend.
Each TargetRegisterClass has a per register weight, and weight limit which must be less than the limi...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI)
Machine Loop Invariant Code false
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:592
Reg
All possible values of the reg field in the ModR/M byte.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
static cl::opt< bool > AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
bool isKill() const
MachineBasicBlock * MBB
Base class for the actual dominator tree node.
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:819
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:303
NodeT * getBlock() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
TargetInstrInfo - Interface to description of machine instruction set.
bool isImplicitDef() const
Definition: MachineInstr.h:788
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
static cl::opt< bool > SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", cl::desc("MachineLICM should sink instructions into ""loops to avoid register spills"), cl::init(false), cl::Hidden)
unsigned const MachineRegisterInfo * MRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder & UseMI
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
static cl::opt< bool > HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden)
bool isCopy() const
Definition: MachineInstr.h:807
MCRegAliasIterator enumerates all registers aliasing Reg.
Represent the analysis usage information of a pass.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
static bool InstructionStoresToFI(const MachineInstr *MI, int FI)
Return true if instruction stores to the specified frame.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:80
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void initializeMachineLICMPass(PassRegistry &)
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:64
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the the memory access done by this i...
Definition: MachineInstr.h:363
Iterator for intrusive lists based on ilist_node.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
bool test(unsigned Idx) const
Definition: BitVector.h:323
virtual const TargetLowering * getTargetLowering() const
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
Special value supplied for machine level alias analysis.
Machine Loop Invariant Code static false bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop)
Test if the given loop is the outer-most loop that has a unique predecessor.
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
#define Success
Representation of each machine instruction.
Definition: MachineInstr.h:52
Basic Alias true
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
iterator end()
Definition: DenseMap.h:69
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it...
Definition: LoopInfoImpl.h:131
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
machinelicm
unsigned getReg() const
getReg - Returns the register number.
Machine Loop Invariant Code Motion
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P)
Split the critical edge from this block to the given successor block, and return the newly created bl...
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:71
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
#define DEBUG(X)
Definition: Debug.h:100
IRTranslator LLVM IR MI
static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &ExitBlocks)
Return true if the specified block is in the list.
Definition: LCSSA.cpp:64
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
A specialized PseudoSourceValue for holding FixedStack values, which must include a frame index...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
This file describes how to lower LLVM code to machine code.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:783