LLVM  3.7.0
MachineLICM.cpp
Go to the documentation of this file.
1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs loop invariant code motion on machine instructions. We
11 // attempt to remove as much code from the body of a loop as possible.
12 //
13 // This pass is not intended to be a replacement or a complete alternative
14 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
15 // constructs that are not exposed before lowering and instruction selection.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/CodeGen/Passes.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Support/Debug.h"
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "machine-licm"
42 
43 static cl::opt<bool>
44 AvoidSpeculation("avoid-speculation",
45  cl::desc("MachineLICM should avoid speculation"),
46  cl::init(true), cl::Hidden);
47 
48 static cl::opt<bool>
49 HoistCheapInsts("hoist-cheap-insts",
50  cl::desc("MachineLICM should hoist even cheap instructions"),
51  cl::init(false), cl::Hidden);
52 
53 static cl::opt<bool>
54 SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
55  cl::desc("MachineLICM should sink instructions into "
56  "loops to avoid register spills"),
57  cl::init(false), cl::Hidden);
58 
59 STATISTIC(NumHoisted,
60  "Number of machine instructions hoisted out of loops");
61 STATISTIC(NumLowRP,
62  "Number of instructions hoisted in low reg pressure situation");
63 STATISTIC(NumHighLatency,
64  "Number of high latency instructions hoisted");
65 STATISTIC(NumCSEed,
66  "Number of hoisted machine instructions CSEed");
67 STATISTIC(NumPostRAHoisted,
68  "Number of machine instructions hoisted out of loops post regalloc");
69 
70 namespace {
71  class MachineLICM : public MachineFunctionPass {
72  const TargetInstrInfo *TII;
73  const TargetLoweringBase *TLI;
74  const TargetRegisterInfo *TRI;
75  const MachineFrameInfo *MFI;
77  TargetSchedModel SchedModel;
78  bool PreRegAlloc;
79 
80  // Various analyses that we use...
81  AliasAnalysis *AA; // Alias analysis info.
82  MachineLoopInfo *MLI; // Current MachineLoopInfo
83  MachineDominatorTree *DT; // Machine dominator tree for the cur loop
84 
85  // State that is updated as we process loops
86  bool Changed; // True if a loop is changed.
87  bool FirstInLoop; // True if it's the first LICM in the loop.
88  MachineLoop *CurLoop; // The current loop we are working on.
89  MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
90 
91  // Exit blocks for CurLoop.
93 
94  bool isExitBlock(const MachineBasicBlock *MBB) const {
95  return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
96  ExitBlocks.end();
97  }
98 
99  // Track 'estimated' register pressure.
100  SmallSet<unsigned, 32> RegSeen;
102 
103  // Register pressure "limit" per register pressure set. If the pressure
104  // is higher than the limit, then it's considered high.
105  SmallVector<unsigned, 8> RegLimit;
106 
107  // Register pressure on path leading from loop preheader to current BB.
109 
110  // For each opcode, keep a list of potential CSE instructions.
112 
113  enum {
114  SpeculateFalse = 0,
115  SpeculateTrue = 1,
116  SpeculateUnknown = 2
117  };
118 
119  // If a MBB does not dominate loop exiting blocks then it may not safe
120  // to hoist loads from this block.
121  // Tri-state: 0 - false, 1 - true, 2 - unknown
122  unsigned SpeculationState;
123 
124  public:
125  static char ID; // Pass identification, replacement for typeid
126  MachineLICM() :
127  MachineFunctionPass(ID), PreRegAlloc(true) {
129  }
130 
131  explicit MachineLICM(bool PreRA) :
132  MachineFunctionPass(ID), PreRegAlloc(PreRA) {
134  }
135 
136  bool runOnMachineFunction(MachineFunction &MF) override;
137 
138  void getAnalysisUsage(AnalysisUsage &AU) const override {
145  }
146 
147  void releaseMemory() override {
148  RegSeen.clear();
149  RegPressure.clear();
150  RegLimit.clear();
151  BackTrace.clear();
152  CSEMap.clear();
153  }
154 
155  private:
156  /// CandidateInfo - Keep track of information about hoisting candidates.
157  struct CandidateInfo {
158  MachineInstr *MI;
159  unsigned Def;
160  int FI;
161  CandidateInfo(MachineInstr *mi, unsigned def, int fi)
162  : MI(mi), Def(def), FI(fi) {}
163  };
164 
165  /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
166  /// invariants out to the preheader.
167  void HoistRegionPostRA();
168 
169  /// HoistPostRA - When an instruction is found to only use loop invariant
170  /// operands that is safe to hoist, this instruction is called to do the
171  /// dirty work.
172  void HoistPostRA(MachineInstr *MI, unsigned Def);
173 
174  /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
175  /// gather register def and frame object update information.
176  void ProcessMI(MachineInstr *MI,
177  BitVector &PhysRegDefs,
178  BitVector &PhysRegClobbers,
179  SmallSet<int, 32> &StoredFIs,
180  SmallVectorImpl<CandidateInfo> &Candidates);
181 
182  /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
183  /// current loop.
184  void AddToLiveIns(unsigned Reg);
185 
186  /// IsLICMCandidate - Returns true if the instruction may be a suitable
187  /// candidate for LICM. e.g. If the instruction is a call, then it's
188  /// obviously not safe to hoist it.
189  bool IsLICMCandidate(MachineInstr &I);
190 
191  /// IsLoopInvariantInst - Returns true if the instruction is loop
192  /// invariant. I.e., all virtual register operands are defined outside of
193  /// the loop, physical registers aren't accessed (explicitly or implicitly),
194  /// and the instruction is hoistable.
195  ///
196  bool IsLoopInvariantInst(MachineInstr &I);
197 
198  /// HasLoopPHIUse - Return true if the specified instruction is used by any
199  /// phi node in the current loop.
200  bool HasLoopPHIUse(const MachineInstr *MI) const;
201 
202  /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
203  /// and an use in the current loop, return true if the target considered
204  /// it 'high'.
205  bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
206  unsigned Reg) const;
207 
208  bool IsCheapInstruction(MachineInstr &MI) const;
209 
210  /// CanCauseHighRegPressure - Visit BBs from header to current BB,
211  /// check if hoisting an instruction of the given cost matrix can cause high
212  /// register pressure.
213  bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
214  bool Cheap);
215 
216  /// UpdateBackTraceRegPressure - Traverse the back trace from header to
217  /// the current block and update their register pressures to reflect the
218  /// effect of hoisting MI from the current block to the preheader.
219  void UpdateBackTraceRegPressure(const MachineInstr *MI);
220 
221  /// IsProfitableToHoist - Return true if it is potentially profitable to
222  /// hoist the given loop invariant.
223  bool IsProfitableToHoist(MachineInstr &MI);
224 
225  /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
226  /// If not then a load from this mbb may not be safe to hoist.
227  bool IsGuaranteedToExecute(MachineBasicBlock *BB);
228 
229  void EnterScope(MachineBasicBlock *MBB);
230 
231  void ExitScope(MachineBasicBlock *MBB);
232 
233  /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
234  /// dominator tree node if its a leaf or all of its children are done. Walk
235  /// up the dominator tree to destroy ancestors which are now done.
236  void ExitScopeIfDone(MachineDomTreeNode *Node,
239 
240  /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
241  /// blocks dominated by the specified header block, and that are in the
242  /// current loop) in depth first order w.r.t the DominatorTree. This allows
243  /// us to visit definitions before uses, allowing us to hoist a loop body in
244  /// one pass without iteration.
245  ///
246  void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
247  void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
248 
249  /// SinkIntoLoop - Sink instructions into loops if profitable. This
250  /// especially tries to prevent register spills caused by register pressure
251  /// if there is little to no overhead moving instructions into loops.
252  void SinkIntoLoop();
253 
254  /// InitRegPressure - Find all virtual register references that are liveout
255  /// of the preheader to initialize the starting "register pressure". Note
256  /// this does not count live through (livein but not used) registers.
257  void InitRegPressure(MachineBasicBlock *BB);
258 
259  /// calcRegisterCost - Calculate the additional register pressure that the
260  /// registers used in MI cause.
261  ///
262  /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
263  /// figure out which usages are live-ins.
264  /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
265  DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
266  bool ConsiderSeen,
267  bool ConsiderUnseenAsDef);
268 
269  /// UpdateRegPressure - Update estimate of register pressure after the
270  /// specified instruction.
271  void UpdateRegPressure(const MachineInstr *MI,
272  bool ConsiderUnseenAsDef = false);
273 
274  /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
275  /// the load itself could be hoisted. Return the unfolded and hoistable
276  /// load, or null if the load couldn't be unfolded or if it wouldn't
277  /// be hoistable.
278  MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
279 
280  /// LookForDuplicate - Find an instruction amount PrevMIs that is a
281  /// duplicate of MI. Return this instruction if it's found.
282  const MachineInstr *LookForDuplicate(const MachineInstr *MI,
283  std::vector<const MachineInstr*> &PrevMIs);
284 
285  /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
286  /// the preheader that compute the same value. If it's found, do a RAU on
287  /// with the definition of the existing instruction rather than hoisting
288  /// the instruction to the preheader.
289  bool EliminateCSE(MachineInstr *MI,
290  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
291 
292  /// MayCSE - Return true if the given instruction will be CSE'd if it's
293  /// hoisted out of the loop.
294  bool MayCSE(MachineInstr *MI);
295 
296  /// Hoist - When an instruction is found to only use loop invariant operands
297  /// that is safe to hoist, this instruction is called to do the dirty work.
298  /// It returns true if the instruction is hoisted.
299  bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
300 
301  /// InitCSEMap - Initialize the CSE map with instructions that are in the
302  /// current loop preheader that may become duplicates of instructions that
303  /// are hoisted out of the loop.
304  void InitCSEMap(MachineBasicBlock *BB);
305 
306  /// getCurPreheader - Get the preheader for the current loop, splitting
307  /// a critical edge if needed.
308  MachineBasicBlock *getCurPreheader();
309  };
310 } // end anonymous namespace
311 
312 char MachineLICM::ID = 0;
314 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
315  "Machine Loop Invariant Code Motion", false, false)
320  "Machine Loop Invariant Code Motion", false, false)
321 
322 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
323 /// loop that has a unique predecessor.
325  // Check whether this loop even has a unique predecessor.
326  if (!CurLoop->getLoopPredecessor())
327  return false;
328  // Ok, now check to see if any of its outer loops do.
329  for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
330  if (L->getLoopPredecessor())
331  return false;
332  // None of them did, so this is the outermost with a unique predecessor.
333  return true;
334 }
335 
336 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
337  if (skipOptnoneFunction(*MF.getFunction()))
338  return false;
339 
340  Changed = FirstInLoop = false;
341  const TargetSubtargetInfo &ST = MF.getSubtarget();
342  TII = ST.getInstrInfo();
343  TLI = ST.getTargetLowering();
344  TRI = ST.getRegisterInfo();
345  MFI = MF.getFrameInfo();
346  MRI = &MF.getRegInfo();
347  SchedModel.init(ST.getSchedModel(), &ST, TII);
348 
349  PreRegAlloc = MRI->isSSA();
350 
351  if (PreRegAlloc)
352  DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
353  else
354  DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
355  DEBUG(dbgs() << MF.getName() << " ********\n");
356 
357  if (PreRegAlloc) {
358  // Estimate register pressure during pre-regalloc pass.
359  unsigned NumRPS = TRI->getNumRegPressureSets();
360  RegPressure.resize(NumRPS);
361  std::fill(RegPressure.begin(), RegPressure.end(), 0);
362  RegLimit.resize(NumRPS);
363  for (unsigned i = 0, e = NumRPS; i != e; ++i)
364  RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
365  }
366 
367  // Get our Loop information...
368  MLI = &getAnalysis<MachineLoopInfo>();
369  DT = &getAnalysis<MachineDominatorTree>();
370  AA = &getAnalysis<AliasAnalysis>();
371 
372  SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
373  while (!Worklist.empty()) {
374  CurLoop = Worklist.pop_back_val();
375  CurPreheader = nullptr;
376  ExitBlocks.clear();
377 
378  // If this is done before regalloc, only visit outer-most preheader-sporting
379  // loops.
380  if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
381  Worklist.append(CurLoop->begin(), CurLoop->end());
382  continue;
383  }
384 
385  CurLoop->getExitBlocks(ExitBlocks);
386 
387  if (!PreRegAlloc)
388  HoistRegionPostRA();
389  else {
390  // CSEMap is initialized for loop header when the first instruction is
391  // being hoisted.
392  MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
393  FirstInLoop = true;
394  HoistOutOfLoop(N);
395  CSEMap.clear();
396 
398  SinkIntoLoop();
399  }
400  }
401 
402  return Changed;
403 }
404 
405 /// InstructionStoresToFI - Return true if instruction stores to the
406 /// specified frame.
407 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
409  oe = MI->memoperands_end(); o != oe; ++o) {
410  if (!(*o)->isStore() || !(*o)->getPseudoValue())
411  continue;
413  dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) {
414  if (Value->getFrameIndex() == FI)
415  return true;
416  }
417  }
418  return false;
419 }
420 
421 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
422 /// gather register def and frame object update information.
423 void MachineLICM::ProcessMI(MachineInstr *MI,
424  BitVector &PhysRegDefs,
425  BitVector &PhysRegClobbers,
426  SmallSet<int, 32> &StoredFIs,
427  SmallVectorImpl<CandidateInfo> &Candidates) {
428  bool RuledOut = false;
429  bool HasNonInvariantUse = false;
430  unsigned Def = 0;
431  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
432  const MachineOperand &MO = MI->getOperand(i);
433  if (MO.isFI()) {
434  // Remember if the instruction stores to the frame index.
435  int FI = MO.getIndex();
436  if (!StoredFIs.count(FI) &&
437  MFI->isSpillSlotObjectIndex(FI) &&
438  InstructionStoresToFI(MI, FI))
439  StoredFIs.insert(FI);
440  HasNonInvariantUse = true;
441  continue;
442  }
443 
444  // We can't hoist an instruction defining a physreg that is clobbered in
445  // the loop.
446  if (MO.isRegMask()) {
447  PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
448  continue;
449  }
450 
451  if (!MO.isReg())
452  continue;
453  unsigned Reg = MO.getReg();
454  if (!Reg)
455  continue;
457  "Not expecting virtual register!");
458 
459  if (!MO.isDef()) {
460  if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
461  // If it's using a non-loop-invariant register, then it's obviously not
462  // safe to hoist.
463  HasNonInvariantUse = true;
464  continue;
465  }
466 
467  if (MO.isImplicit()) {
468  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
469  PhysRegClobbers.set(*AI);
470  if (!MO.isDead())
471  // Non-dead implicit def? This cannot be hoisted.
472  RuledOut = true;
473  // No need to check if a dead implicit def is also defined by
474  // another instruction.
475  continue;
476  }
477 
478  // FIXME: For now, avoid instructions with multiple defs, unless
479  // it's a dead implicit def.
480  if (Def)
481  RuledOut = true;
482  else
483  Def = Reg;
484 
485  // If we have already seen another instruction that defines the same
486  // register, then this is not safe. Two defs is indicated by setting a
487  // PhysRegClobbers bit.
488  for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
489  if (PhysRegDefs.test(*AS))
490  PhysRegClobbers.set(*AS);
491  PhysRegDefs.set(*AS);
492  }
493  if (PhysRegClobbers.test(Reg))
494  // MI defined register is seen defined by another instruction in
495  // the loop, it cannot be a LICM candidate.
496  RuledOut = true;
497  }
498 
499  // Only consider reloads for now and remats which do not have register
500  // operands. FIXME: Consider unfold load folding instructions.
501  if (Def && !RuledOut) {
502  int FI = INT_MIN;
503  if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
504  (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
505  Candidates.push_back(CandidateInfo(MI, Def, FI));
506  }
507 }
508 
509 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
510 /// invariants out to the preheader.
511 void MachineLICM::HoistRegionPostRA() {
512  MachineBasicBlock *Preheader = getCurPreheader();
513  if (!Preheader)
514  return;
515 
516  unsigned NumRegs = TRI->getNumRegs();
517  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
518  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
519 
521  SmallSet<int, 32> StoredFIs;
522 
523  // Walk the entire region, count number of defs for each register, and
524  // collect potential LICM candidates.
525  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
526  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
527  MachineBasicBlock *BB = Blocks[i];
528 
529  // If the header of the loop containing this basic block is a landing pad,
530  // then don't try to hoist instructions out of this loop.
531  const MachineLoop *ML = MLI->getLoopFor(BB);
532  if (ML && ML->getHeader()->isLandingPad()) continue;
533 
534  // Conservatively treat live-in's as an external def.
535  // FIXME: That means a reload that're reused in successor block(s) will not
536  // be LICM'ed.
538  E = BB->livein_end(); I != E; ++I) {
539  unsigned Reg = *I;
540  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
541  PhysRegDefs.set(*AI);
542  }
543 
544  SpeculationState = SpeculateUnknown;
546  MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
547  MachineInstr *MI = &*MII;
548  ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
549  }
550  }
551 
552  // Gather the registers read / clobbered by the terminator.
553  BitVector TermRegs(NumRegs);
555  if (TI != Preheader->end()) {
556  for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
557  const MachineOperand &MO = TI->getOperand(i);
558  if (!MO.isReg())
559  continue;
560  unsigned Reg = MO.getReg();
561  if (!Reg)
562  continue;
563  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
564  TermRegs.set(*AI);
565  }
566  }
567 
568  // Now evaluate whether the potential candidates qualify.
569  // 1. Check if the candidate defined register is defined by another
570  // instruction in the loop.
571  // 2. If the candidate is a load from stack slot (always true for now),
572  // check if the slot is stored anywhere in the loop.
573  // 3. Make sure candidate def should not clobber
574  // registers read by the terminator. Similarly its def should not be
575  // clobbered by the terminator.
576  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
577  if (Candidates[i].FI != INT_MIN &&
578  StoredFIs.count(Candidates[i].FI))
579  continue;
580 
581  unsigned Def = Candidates[i].Def;
582  if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
583  bool Safe = true;
584  MachineInstr *MI = Candidates[i].MI;
585  for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
586  const MachineOperand &MO = MI->getOperand(j);
587  if (!MO.isReg() || MO.isDef() || !MO.getReg())
588  continue;
589  unsigned Reg = MO.getReg();
590  if (PhysRegDefs.test(Reg) ||
591  PhysRegClobbers.test(Reg)) {
592  // If it's using a non-loop-invariant register, then it's obviously
593  // not safe to hoist.
594  Safe = false;
595  break;
596  }
597  }
598  if (Safe)
599  HoistPostRA(MI, Candidates[i].Def);
600  }
601  }
602 }
603 
604 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
605 /// loop, and make sure it is not killed by any instructions in the loop.
606 void MachineLICM::AddToLiveIns(unsigned Reg) {
607  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
608  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
609  MachineBasicBlock *BB = Blocks[i];
610  if (!BB->isLiveIn(Reg))
611  BB->addLiveIn(Reg);
613  MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
614  MachineInstr *MI = &*MII;
615  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
616  MachineOperand &MO = MI->getOperand(i);
617  if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
618  if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
619  MO.setIsKill(false);
620  }
621  }
622  }
623 }
624 
625 /// HoistPostRA - When an instruction is found to only use loop invariant
626 /// operands that is safe to hoist, this instruction is called to do the
627 /// dirty work.
628 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
629  MachineBasicBlock *Preheader = getCurPreheader();
630 
631  // Now move the instructions to the predecessor, inserting it before any
632  // terminator instructions.
633  DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
634  << MI->getParent()->getNumber() << ": " << *MI);
635 
636  // Splice the instruction to the preheader.
637  MachineBasicBlock *MBB = MI->getParent();
638  Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
639 
640  // Add register to livein list to all the BBs in the current loop since a
641  // loop invariant must be kept live throughout the whole loop. This is
642  // important to ensure later passes do not scavenge the def register.
643  AddToLiveIns(Def);
644 
645  ++NumPostRAHoisted;
646  Changed = true;
647 }
648 
649 // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
650 // If not then a load from this mbb may not be safe to hoist.
651 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
652  if (SpeculationState != SpeculateUnknown)
653  return SpeculationState == SpeculateFalse;
654 
655  if (BB != CurLoop->getHeader()) {
656  // Check loop exiting blocks.
657  SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
658  CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
659  for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
660  if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
661  SpeculationState = SpeculateTrue;
662  return false;
663  }
664  }
665 
666  SpeculationState = SpeculateFalse;
667  return true;
668 }
669 
670 void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
671  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
672 
673  // Remember livein register pressure.
674  BackTrace.push_back(RegPressure);
675 }
676 
677 void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
678  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
679  BackTrace.pop_back();
680 }
681 
682 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
683 /// dominator tree node if its a leaf or all of its children are done. Walk
684 /// up the dominator tree to destroy ancestors which are now done.
685 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
688  if (OpenChildren[Node])
689  return;
690 
691  // Pop scope.
692  ExitScope(Node->getBlock());
693 
694  // Now traverse upwards to pop ancestors whose offsprings are all done.
695  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
696  unsigned Left = --OpenChildren[Parent];
697  if (Left != 0)
698  break;
699  ExitScope(Parent->getBlock());
700  Node = Parent;
701  }
702 }
703 
704 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
705 /// blocks dominated by the specified header block, and that are in the
706 /// current loop) in depth first order w.r.t the DominatorTree. This allows
707 /// us to visit definitions before uses, allowing us to hoist a loop body in
708 /// one pass without iteration.
709 ///
710 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
711  MachineBasicBlock *Preheader = getCurPreheader();
712  if (!Preheader)
713  return;
714 
719 
720  // Perform a DFS walk to determine the order of visit.
721  WorkList.push_back(HeaderN);
722  while (!WorkList.empty()) {
723  MachineDomTreeNode *Node = WorkList.pop_back_val();
724  assert(Node && "Null dominator tree node?");
725  MachineBasicBlock *BB = Node->getBlock();
726 
727  // If the header of the loop containing this basic block is a landing pad,
728  // then don't try to hoist instructions out of this loop.
729  const MachineLoop *ML = MLI->getLoopFor(BB);
730  if (ML && ML->getHeader()->isLandingPad())
731  continue;
732 
733  // If this subregion is not in the top level loop at all, exit.
734  if (!CurLoop->contains(BB))
735  continue;
736 
737  Scopes.push_back(Node);
738  const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
739  unsigned NumChildren = Children.size();
740 
741  // Don't hoist things out of a large switch statement. This often causes
742  // code to be hoisted that wasn't going to be executed, and increases
743  // register pressure in a situation where it's likely to matter.
744  if (BB->succ_size() >= 25)
745  NumChildren = 0;
746 
747  OpenChildren[Node] = NumChildren;
748  // Add children in reverse order as then the next popped worklist node is
749  // the first child of this node. This means we ultimately traverse the
750  // DOM tree in exactly the same order as if we'd recursed.
751  for (int i = (int)NumChildren-1; i >= 0; --i) {
752  MachineDomTreeNode *Child = Children[i];
753  ParentMap[Child] = Node;
754  WorkList.push_back(Child);
755  }
756  }
757 
758  if (Scopes.size() == 0)
759  return;
760 
761  // Compute registers which are livein into the loop headers.
762  RegSeen.clear();
763  BackTrace.clear();
764  InitRegPressure(Preheader);
765 
766  // Now perform LICM.
767  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
768  MachineDomTreeNode *Node = Scopes[i];
769  MachineBasicBlock *MBB = Node->getBlock();
770 
771  EnterScope(MBB);
772 
773  // Process the block
774  SpeculationState = SpeculateUnknown;
776  MII = MBB->begin(), E = MBB->end(); MII != E; ) {
777  MachineBasicBlock::iterator NextMII = MII; ++NextMII;
778  MachineInstr *MI = &*MII;
779  if (!Hoist(MI, Preheader))
780  UpdateRegPressure(MI);
781  MII = NextMII;
782  }
783 
784  // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
785  ExitScopeIfDone(Node, OpenChildren, ParentMap);
786  }
787 }
788 
789 void MachineLICM::SinkIntoLoop() {
790  MachineBasicBlock *Preheader = getCurPreheader();
791  if (!Preheader)
792  return;
793 
795  for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
796  I != Preheader->instr_end(); ++I) {
797  // We need to ensure that we can safely move this instruction into the loop.
798  // As such, it must not have side-effects, e.g. such as a call has.
799  if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
800  Candidates.push_back(I);
801  }
802 
803  for (MachineInstr *I : Candidates) {
804  const MachineOperand &MO = I->getOperand(0);
805  if (!MO.isDef() || !MO.isReg() || !MO.getReg())
806  continue;
807  if (!MRI->hasOneDef(MO.getReg()))
808  continue;
809  bool CanSink = true;
810  MachineBasicBlock *B = nullptr;
811  for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
812  // FIXME: Come up with a proper cost model that estimates whether sinking
813  // the instruction (and thus possibly executing it on every loop
814  // iteration) is more expensive than a register.
815  // For now assumes that copies are cheap and thus almost always worth it.
816  if (!MI.isCopy()) {
817  CanSink = false;
818  break;
819  }
820  if (!B) {
821  B = MI.getParent();
822  continue;
823  }
824  B = DT->findNearestCommonDominator(B, MI.getParent());
825  if (!B) {
826  CanSink = false;
827  break;
828  }
829  }
830  if (!CanSink || !B || B == Preheader)
831  continue;
832  B->splice(B->getFirstNonPHI(), Preheader, I);
833  }
834 }
835 
836 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
837  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
838 }
839 
840 /// InitRegPressure - Find all virtual register references that are liveout of
841 /// the preheader to initialize the starting "register pressure". Note this
842 /// does not count live through (livein but not used) registers.
843 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
844  std::fill(RegPressure.begin(), RegPressure.end(), 0);
845 
846  // If the preheader has only a single predecessor and it ends with a
847  // fallthrough or an unconditional branch, then scan its predecessor for live
848  // defs as well. This happens whenever the preheader is created by splitting
849  // the critical edge from the loop predecessor to the loop header.
850  if (BB->pred_size() == 1) {
851  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
853  if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
854  InitRegPressure(*BB->pred_begin());
855  }
856 
857  for (const MachineInstr &MI : *BB)
858  UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
859 }
860 
861 /// UpdateRegPressure - Update estimate of register pressure after the
862 /// specified instruction.
863 void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
864  bool ConsiderUnseenAsDef) {
865  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
866  for (const auto &RPIdAndCost : Cost) {
867  unsigned Class = RPIdAndCost.first;
868  if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
869  RegPressure[Class] = 0;
870  else
871  RegPressure[Class] += RPIdAndCost.second;
872  }
873 }
874 
876 MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
877  bool ConsiderUnseenAsDef) {
879  if (MI->isImplicitDef())
880  return Cost;
881  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
882  const MachineOperand &MO = MI->getOperand(i);
883  if (!MO.isReg() || MO.isImplicit())
884  continue;
885  unsigned Reg = MO.getReg();
887  continue;
888 
889  // FIXME: It seems bad to use RegSeen only for some of these calculations.
890  bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
891  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
892 
893  RegClassWeight W = TRI->getRegClassWeight(RC);
894  int RCCost = 0;
895  if (MO.isDef())
896  RCCost = W.RegWeight;
897  else {
898  bool isKill = isOperandKill(MO, MRI);
899  if (isNew && !isKill && ConsiderUnseenAsDef)
900  // Haven't seen this, it must be a livein.
901  RCCost = W.RegWeight;
902  else if (!isNew && isKill)
903  RCCost = -W.RegWeight;
904  }
905  if (RCCost == 0)
906  continue;
907  const int *PS = TRI->getRegClassPressureSets(RC);
908  for (; *PS != -1; ++PS) {
909  if (Cost.find(*PS) == Cost.end())
910  Cost[*PS] = RCCost;
911  else
912  Cost[*PS] += RCCost;
913  }
914  }
915  return Cost;
916 }
917 
918 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction
919 /// loads from global offset table or constant pool.
921  assert (MI.mayLoad() && "Expected MI that loads!");
923  E = MI.memoperands_end(); I != E; ++I) {
924  if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
925  if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
926  return true;
927  }
928  }
929  return false;
930 }
931 
932 /// IsLICMCandidate - Returns true if the instruction may be a suitable
933 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
934 /// not safe to hoist it.
935 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
936  // Check if it's safe to move the instruction.
937  bool DontMoveAcrossStore = true;
938  if (!I.isSafeToMove(AA, DontMoveAcrossStore))
939  return false;
940 
941  // If it is load then check if it is guaranteed to execute by making sure that
942  // it dominates all exiting blocks. If it doesn't, then there is a path out of
943  // the loop which does not execute this load, so we can't hoist it. Loads
944  // from constant memory are not safe to speculate all the time, for example
945  // indexed load from a jump table.
946  // Stores and side effects are already checked by isSafeToMove.
947  if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
948  !IsGuaranteedToExecute(I.getParent()))
949  return false;
950 
951  return true;
952 }
953 
954 /// IsLoopInvariantInst - Returns true if the instruction is loop
955 /// invariant. I.e., all virtual register operands are defined outside of the
956 /// loop, physical registers aren't accessed explicitly, and there are no side
957 /// effects that aren't captured by the operands or other flags.
958 ///
959 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
960  if (!IsLICMCandidate(I))
961  return false;
962 
963  // The instruction is loop invariant if all of its operands are.
964  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
965  const MachineOperand &MO = I.getOperand(i);
966 
967  if (!MO.isReg())
968  continue;
969 
970  unsigned Reg = MO.getReg();
971  if (Reg == 0) continue;
972 
973  // Don't hoist an instruction that uses or defines a physical register.
974  if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
975  if (MO.isUse()) {
976  // If the physreg has no defs anywhere, it's just an ambient register
977  // and we can freely move its uses. Alternatively, if it's allocatable,
978  // it could get allocated to something with a def during allocation.
979  if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
980  return false;
981  // Otherwise it's safe to move.
982  continue;
983  } else if (!MO.isDead()) {
984  // A def that isn't dead. We can't move it.
985  return false;
986  } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
987  // If the reg is live into the loop, we can't hoist an instruction
988  // which would clobber it.
989  return false;
990  }
991  }
992 
993  if (!MO.isUse())
994  continue;
995 
996  assert(MRI->getVRegDef(Reg) &&
997  "Machine instr not mapped for this vreg?!");
998 
999  // If the loop contains the definition of an operand, then the instruction
1000  // isn't loop invariant.
1001  if (CurLoop->contains(MRI->getVRegDef(Reg)))
1002  return false;
1003  }
1004 
1005  // If we got this far, the instruction is loop invariant!
1006  return true;
1007 }
1008 
1009 
1010 /// HasLoopPHIUse - Return true if the specified instruction is used by a
1011 /// phi node and hoisting it could cause a copy to be inserted.
1012 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
1014  do {
1015  MI = Work.pop_back_val();
1016  for (const MachineOperand &MO : MI->operands()) {
1017  if (!MO.isReg() || !MO.isDef())
1018  continue;
1019  unsigned Reg = MO.getReg();
1020  if (!TargetRegisterInfo::isVirtualRegister(Reg))
1021  continue;
1022  for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
1023  // A PHI may cause a copy to be inserted.
1024  if (UseMI.isPHI()) {
1025  // A PHI inside the loop causes a copy because the live range of Reg is
1026  // extended across the PHI.
1027  if (CurLoop->contains(&UseMI))
1028  return true;
1029  // A PHI in an exit block can cause a copy to be inserted if the PHI
1030  // has multiple predecessors in the loop with different values.
1031  // For now, approximate by rejecting all exit blocks.
1032  if (isExitBlock(UseMI.getParent()))
1033  return true;
1034  continue;
1035  }
1036  // Look past copies as well.
1037  if (UseMI.isCopy() && CurLoop->contains(&UseMI))
1038  Work.push_back(&UseMI);
1039  }
1040  }
1041  } while (!Work.empty());
1042  return false;
1043 }
1044 
1045 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
1046 /// and an use in the current loop, return true if the target considered
1047 /// it 'high'.
1048 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
1049  unsigned DefIdx, unsigned Reg) const {
1050  if (MRI->use_nodbg_empty(Reg))
1051  return false;
1052 
1053  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
1054  if (UseMI.isCopyLike())
1055  continue;
1056  if (!CurLoop->contains(UseMI.getParent()))
1057  continue;
1058  for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1059  const MachineOperand &MO = UseMI.getOperand(i);
1060  if (!MO.isReg() || !MO.isUse())
1061  continue;
1062  unsigned MOReg = MO.getReg();
1063  if (MOReg != Reg)
1064  continue;
1065 
1066  if (TII->hasHighOperandLatency(SchedModel, MRI, &MI, DefIdx, &UseMI, i))
1067  return true;
1068  }
1069 
1070  // Only look at the first in loop use.
1071  break;
1072  }
1073 
1074  return false;
1075 }
1076 
1077 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or
1078 /// the operand latency between its def and a use is one or less.
1079 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
1080  if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike())
1081  return true;
1082 
1083  bool isCheap = false;
1084  unsigned NumDefs = MI.getDesc().getNumDefs();
1085  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1086  MachineOperand &DefMO = MI.getOperand(i);
1087  if (!DefMO.isReg() || !DefMO.isDef())
1088  continue;
1089  --NumDefs;
1090  unsigned Reg = DefMO.getReg();
1091  if (TargetRegisterInfo::isPhysicalRegister(Reg))
1092  continue;
1093 
1094  if (!TII->hasLowDefLatency(SchedModel, &MI, i))
1095  return false;
1096  isCheap = true;
1097  }
1098 
1099  return isCheap;
1100 }
1101 
1102 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
1103 /// if hoisting an instruction of the given cost matrix can cause high
1104 /// register pressure.
1105 bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
1106  bool CheapInstr) {
1107  for (const auto &RPIdAndCost : Cost) {
1108  if (RPIdAndCost.second <= 0)
1109  continue;
1110 
1111  unsigned Class = RPIdAndCost.first;
1112  int Limit = RegLimit[Class];
1113 
1114  // Don't hoist cheap instructions if they would increase register pressure,
1115  // even if we're under the limit.
1116  if (CheapInstr && !HoistCheapInsts)
1117  return true;
1118 
1119  for (const auto &RP : BackTrace)
1120  if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1121  return true;
1122  }
1123 
1124  return false;
1125 }
1126 
1127 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the
1128 /// current block and update their register pressures to reflect the effect
1129 /// of hoisting MI from the current block to the preheader.
1130 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1131  // First compute the 'cost' of the instruction, i.e. its contribution
1132  // to register pressure.
1133  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1134  /*ConsiderUnseenAsDef=*/false);
1135 
1136  // Update register pressure of blocks from loop header to current block.
1137  for (auto &RP : BackTrace)
1138  for (const auto &RPIdAndCost : Cost)
1139  RP[RPIdAndCost.first] += RPIdAndCost.second;
1140 }
1141 
1142 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
1143 /// the given loop invariant.
1144 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
1145  if (MI.isImplicitDef())
1146  return true;
1147 
1148  // Besides removing computation from the loop, hoisting an instruction has
1149  // these effects:
1150  //
1151  // - The value defined by the instruction becomes live across the entire
1152  // loop. This increases register pressure in the loop.
1153  //
1154  // - If the value is used by a PHI in the loop, a copy will be required for
1155  // lowering the PHI after extending the live range.
1156  //
1157  // - When hoisting the last use of a value in the loop, that value no longer
1158  // needs to be live in the loop. This lowers register pressure in the loop.
1159 
1160  bool CheapInstr = IsCheapInstruction(MI);
1161  bool CreatesCopy = HasLoopPHIUse(&MI);
1162 
1163  // Don't hoist a cheap instruction if it would create a copy in the loop.
1164  if (CheapInstr && CreatesCopy) {
1165  DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1166  return false;
1167  }
1168 
1169  // Rematerializable instructions should always be hoisted since the register
1170  // allocator can just pull them down again when needed.
1171  if (TII->isTriviallyReMaterializable(&MI, AA))
1172  return true;
1173 
1174  // FIXME: If there are long latency loop-invariant instructions inside the
1175  // loop at this point, why didn't the optimizer's LICM hoist them?
1176  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1177  const MachineOperand &MO = MI.getOperand(i);
1178  if (!MO.isReg() || MO.isImplicit())
1179  continue;
1180  unsigned Reg = MO.getReg();
1181  if (!TargetRegisterInfo::isVirtualRegister(Reg))
1182  continue;
1183  if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
1184  DEBUG(dbgs() << "Hoist High Latency: " << MI);
1185  ++NumHighLatency;
1186  return true;
1187  }
1188  }
1189 
1190  // Estimate register pressure to determine whether to LICM the instruction.
1191  // In low register pressure situation, we can be more aggressive about
1192  // hoisting. Also, favors hoisting long latency instructions even in
1193  // moderately high pressure situation.
1194  // Cheap instructions will only be hoisted if they don't increase register
1195  // pressure at all.
1196  auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1197  /*ConsiderUnseenAsDef=*/false);
1198 
1199  // Visit BBs from header to current BB, if hoisting this doesn't cause
1200  // high register pressure, then it's safe to proceed.
1201  if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1202  DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1203  ++NumLowRP;
1204  return true;
1205  }
1206 
1207  // Don't risk increasing register pressure if it would create copies.
1208  if (CreatesCopy) {
1209  DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1210  return false;
1211  }
1212 
1213  // Do not "speculate" in high register pressure situation. If an
1214  // instruction is not guaranteed to be executed in the loop, it's best to be
1215  // conservative.
1216  if (AvoidSpeculation &&
1217  (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
1218  DEBUG(dbgs() << "Won't speculate: " << MI);
1219  return false;
1220  }
1221 
1222  // High register pressure situation, only hoist if the instruction is going
1223  // to be remat'ed.
1224  if (!TII->isTriviallyReMaterializable(&MI, AA) &&
1225  !MI.isInvariantLoad(AA)) {
1226  DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1227  return false;
1228  }
1229 
1230  return true;
1231 }
1232 
1233 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
1234  // Don't unfold simple loads.
1235  if (MI->canFoldAsLoad())
1236  return nullptr;
1237 
1238  // If not, we may be able to unfold a load and hoist that.
1239  // First test whether the instruction is loading from an amenable
1240  // memory location.
1241  if (!MI->isInvariantLoad(AA))
1242  return nullptr;
1243 
1244  // Next determine the register class for a temporary register.
1245  unsigned LoadRegIndex;
1246  unsigned NewOpc =
1247  TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1248  /*UnfoldLoad=*/true,
1249  /*UnfoldStore=*/false,
1250  &LoadRegIndex);
1251  if (NewOpc == 0) return nullptr;
1252  const MCInstrDesc &MID = TII->get(NewOpc);
1253  if (MID.getNumDefs() != 1) return nullptr;
1254  MachineFunction &MF = *MI->getParent()->getParent();
1255  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1256  // Ok, we're unfolding. Create a temporary register and do the unfold.
1257  unsigned Reg = MRI->createVirtualRegister(RC);
1258 
1260  bool Success =
1261  TII->unfoldMemoryOperand(MF, MI, Reg,
1262  /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
1263  NewMIs);
1264  (void)Success;
1265  assert(Success &&
1266  "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1267  "succeeded!");
1268  assert(NewMIs.size() == 2 &&
1269  "Unfolded a load into multiple instructions!");
1270  MachineBasicBlock *MBB = MI->getParent();
1272  MBB->insert(Pos, NewMIs[0]);
1273  MBB->insert(Pos, NewMIs[1]);
1274  // If unfolding produced a load that wasn't loop-invariant or profitable to
1275  // hoist, discard the new instructions and bail.
1276  if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
1277  NewMIs[0]->eraseFromParent();
1278  NewMIs[1]->eraseFromParent();
1279  return nullptr;
1280  }
1281 
1282  // Update register pressure for the unfolded instruction.
1283  UpdateRegPressure(NewMIs[1]);
1284 
1285  // Otherwise we successfully unfolded a load that we can hoist.
1286  MI->eraseFromParent();
1287  return NewMIs[0];
1288 }
1289 
1290 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
1291  for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
1292  const MachineInstr *MI = &*I;
1293  unsigned Opcode = MI->getOpcode();
1294  CSEMap[Opcode].push_back(MI);
1295  }
1296 }
1297 
1298 const MachineInstr*
1299 MachineLICM::LookForDuplicate(const MachineInstr *MI,
1300  std::vector<const MachineInstr*> &PrevMIs) {
1301  for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
1302  const MachineInstr *PrevMI = PrevMIs[i];
1303  if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr)))
1304  return PrevMI;
1305  }
1306  return nullptr;
1307 }
1308 
1309 bool MachineLICM::EliminateCSE(MachineInstr *MI,
1310  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
1311  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1312  // the undef property onto uses.
1313  if (CI == CSEMap.end() || MI->isImplicitDef())
1314  return false;
1315 
1316  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1317  DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1318 
1319  // Replace virtual registers defined by MI by their counterparts defined
1320  // by Dup.
1322  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1323  const MachineOperand &MO = MI->getOperand(i);
1324 
1325  // Physical registers may not differ here.
1326  assert((!MO.isReg() || MO.getReg() == 0 ||
1327  !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
1328  MO.getReg() == Dup->getOperand(i).getReg()) &&
1329  "Instructions with different phys regs are not identical!");
1330 
1331  if (MO.isReg() && MO.isDef() &&
1332  !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
1333  Defs.push_back(i);
1334  }
1335 
1337  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1338  unsigned Idx = Defs[i];
1339  unsigned Reg = MI->getOperand(Idx).getReg();
1340  unsigned DupReg = Dup->getOperand(Idx).getReg();
1341  OrigRCs.push_back(MRI->getRegClass(DupReg));
1342 
1343  if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1344  // Restore old RCs if more than one defs.
1345  for (unsigned j = 0; j != i; ++j)
1346  MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1347  return false;
1348  }
1349  }
1350 
1351  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1352  unsigned Idx = Defs[i];
1353  unsigned Reg = MI->getOperand(Idx).getReg();
1354  unsigned DupReg = Dup->getOperand(Idx).getReg();
1355  MRI->replaceRegWith(Reg, DupReg);
1356  MRI->clearKillFlags(DupReg);
1357  }
1358 
1359  MI->eraseFromParent();
1360  ++NumCSEed;
1361  return true;
1362  }
1363  return false;
1364 }
1365 
1366 /// MayCSE - Return true if the given instruction will be CSE'd if it's
1367 /// hoisted out of the loop.
1368 bool MachineLICM::MayCSE(MachineInstr *MI) {
1369  unsigned Opcode = MI->getOpcode();
1371  CI = CSEMap.find(Opcode);
1372  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1373  // the undef property onto uses.
1374  if (CI == CSEMap.end() || MI->isImplicitDef())
1375  return false;
1376 
1377  return LookForDuplicate(MI, CI->second) != nullptr;
1378 }
1379 
1380 /// Hoist - When an instruction is found to use only loop invariant operands
1381 /// that are safe to hoist, this instruction is called to do the dirty work.
1382 ///
1383 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1384  // First check whether we should hoist this instruction.
1385  if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
1386  // If not, try unfolding a hoistable load.
1387  MI = ExtractHoistableLoad(MI);
1388  if (!MI) return false;
1389  }
1390 
1391  // Now move the instructions to the predecessor, inserting it before any
1392  // terminator instructions.
1393  DEBUG({
1394  dbgs() << "Hoisting " << *MI;
1395  if (Preheader->getBasicBlock())
1396  dbgs() << " to MachineBasicBlock "
1397  << Preheader->getName();
1398  if (MI->getParent()->getBasicBlock())
1399  dbgs() << " from MachineBasicBlock "
1400  << MI->getParent()->getName();
1401  dbgs() << "\n";
1402  });
1403 
1404  // If this is the first instruction being hoisted to the preheader,
1405  // initialize the CSE map with potential common expressions.
1406  if (FirstInLoop) {
1407  InitCSEMap(Preheader);
1408  FirstInLoop = false;
1409  }
1410 
1411  // Look for opportunity to CSE the hoisted instruction.
1412  unsigned Opcode = MI->getOpcode();
1414  CI = CSEMap.find(Opcode);
1415  if (!EliminateCSE(MI, CI)) {
1416  // Otherwise, splice the instruction to the preheader.
1417  Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1418 
1419  // Update register pressure for BBs from header to this block.
1420  UpdateBackTraceRegPressure(MI);
1421 
1422  // Clear the kill flags of any register this instruction defines,
1423  // since they may need to be live throughout the entire loop
1424  // rather than just live for part of it.
1425  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1426  MachineOperand &MO = MI->getOperand(i);
1427  if (MO.isReg() && MO.isDef() && !MO.isDead())
1428  MRI->clearKillFlags(MO.getReg());
1429  }
1430 
1431  // Add to the CSE map.
1432  if (CI != CSEMap.end())
1433  CI->second.push_back(MI);
1434  else
1435  CSEMap[Opcode].push_back(MI);
1436  }
1437 
1438  ++NumHoisted;
1439  Changed = true;
1440 
1441  return true;
1442 }
1443 
1444 MachineBasicBlock *MachineLICM::getCurPreheader() {
1445  // Determine the block to which to hoist instructions. If we can't find a
1446  // suitable loop predecessor, we can't do any hoisting.
1447 
1448  // If we've tried to get a preheader and failed, don't try again.
1449  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1450  return nullptr;
1451 
1452  if (!CurPreheader) {
1453  CurPreheader = CurLoop->getLoopPreheader();
1454  if (!CurPreheader) {
1455  MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1456  if (!Pred) {
1457  CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1458  return nullptr;
1459  }
1460 
1461  CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
1462  if (!CurPreheader) {
1463  CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1464  return nullptr;
1465  }
1466  }
1467  }
1468  return CurPreheader;
1469 }
bool isImplicit() const
unsigned succ_size() const
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
BitVector & set()
Definition: BitVector.h:218
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
instr_iterator instr_end()
STATISTIC(NumFunctions,"Total number of functions")
int getNumber() const
getNumber - MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a M...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:191
char & MachineLICMID
MachineLICM - This pass performs LICM on machine instructions.
std::vector< unsigned >::const_iterator livein_iterator
iterator getFirstTerminator()
getFirstTerminator - returns an iterator to the first terminator instruction of this basic block...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:138
bool isDead() const
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
INITIALIZE_PASS_BEGIN(MachineLICM,"machinelicm","Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(MachineLICM
void addLiveIn(unsigned Reg)
Adds the specified register as a live in.
void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask.
Definition: BitVector.h:493
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
LoopT * getParentLoop() const
Definition: LoopInfo.h:97
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
bool canFoldAsLoad(QueryType Type=IgnoreBundle) const
Return true for instructions that can be folded as memory operands in other instructions.
Definition: MachineInstr.h:512
Instructions::iterator instr_iterator
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:295
BlockT * getHeader() const
Definition: LoopInfo.h:96
livein_iterator livein_begin() const
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
MachineMemOperand - A description of a memory reference used in the backend.
Each TargetRegisterClass has a per register weight, and weight limit which must be less than the limi...
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const override
isLoadFromStackSlot - If the specified machine instruction is a direct load from a stack slot...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:406
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI)
Machine Loop Invariant Code false
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:566
Reg
All possible values of the reg field in the ModR/M byte.
MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P)
SplitCriticalEdge - Split the critical edge from this block to the given successor block...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
static cl::opt< bool > AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden)
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
bool isKill() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
Base class for the actual dominator tree node.
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:790
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:301
static bool isLoadFromGOTOrConstantPool(MachineInstr &MI)
isLoadFromGOTOrConstantPool - Return true if this machine instruction loads from global offset table ...
const BasicBlock * getBasicBlock() const
getBasicBlock - Return the LLVM basic block that this instance corresponded to originally.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
TargetInstrInfo - Interface to description of machine instruction set.
bool isImplicitDef() const
Definition: MachineInstr.h:759
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:341
bundle_iterator< MachineInstr, instr_iterator > iterator
#define true
Definition: ConvertUTF.c:66
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
static cl::opt< bool > SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", cl::desc("MachineLICM should sink instructions into ""loops to avoid register spills"), cl::init(false), cl::Hidden)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
livein_iterator livein_end() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
static cl::opt< bool > HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden)
bool isCopy() const
Definition: MachineInstr.h:778
MCRegAliasIterator enumerates all registers aliasing Reg.
Represent the analysis usage information of a pass.
static bool InstructionStoresToFI(const MachineInstr *MI, int FI)
InstructionStoresToFI - Return true if instruction stores to the specified frame. ...
const std::vector< DomTreeNodeBase< NodeT > * > & getChildren() const
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:69
bool isInvariantLoad(AliasAnalysis *AA) const
Return true if this instruction is loading from a location whose value is invariant across the functi...
#define INITIALIZE_AG_DEPENDENCY(depName)
Definition: PassSupport.h:72
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void initializeMachineLICMPass(PassRegistry &)
void setIsKill(bool Val=true)
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:53
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
bool test(unsigned Idx) const
Definition: BitVector.h:322
virtual const TargetLowering * getTargetLowering() const
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
StringRef getName() const
getName - Return the name of the corresponding LLVM basic block, or "(null)".
PseudoSourceValue - Special value supplied for machine level alias analysis.
Machine Loop Invariant Code static false bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop)
LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most loop that has a unique pred...
NodeT * getBlock() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
#define Success
Representation of each machine instruction.
Definition: MachineInstr.h:51
static bool isPhysicalRegister(unsigned Reg)
isPhysicalRegister - Return true if the specified register number is in the physical register namespa...
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool isLiveIn(unsigned Reg) const
isLiveIn - Return true if the specified register is in the live in set.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
iterator end()
Definition: DenseMap.h:68
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
machinelicm
unsigned getReg() const
getReg - Returns the register number.
Machine Loop Invariant Code Motion
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:69
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:185
#define DEBUG(X)
Definition: Debug.h:92
static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &ExitBlocks)
Return true if the specified block is in the list.
Definition: LCSSA.cpp:51
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
FixedStackPseudoSourceValue - A specialized PseudoSourceValue for holding FixedStack values...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
This file describes how to lower LLVM code to machine code.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:340