LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineLICM.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 532 546 97.4 %
Date: 2017-09-14 15:23:50 Functions: 39 39 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This pass performs loop invariant code motion on machine instructions. We
      11             : // attempt to remove as much code from the body of a loop as possible.
      12             : //
      13             : // This pass is not intended to be a replacement or a complete alternative
      14             : // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
      15             : // constructs that are not exposed before lowering and instruction selection.
      16             : //
      17             : //===----------------------------------------------------------------------===//
      18             : 
      19             : #include "llvm/ADT/DenseMap.h"
      20             : #include "llvm/ADT/SmallSet.h"
      21             : #include "llvm/ADT/Statistic.h"
      22             : #include "llvm/Analysis/AliasAnalysis.h"
      23             : #include "llvm/CodeGen/MachineDominators.h"
      24             : #include "llvm/CodeGen/MachineFrameInfo.h"
      25             : #include "llvm/CodeGen/MachineLoopInfo.h"
      26             : #include "llvm/CodeGen/MachineMemOperand.h"
      27             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      28             : #include "llvm/CodeGen/Passes.h"
      29             : #include "llvm/CodeGen/PseudoSourceValue.h"
      30             : #include "llvm/CodeGen/TargetSchedule.h"
      31             : #include "llvm/Support/CommandLine.h"
      32             : #include "llvm/Support/Debug.h"
      33             : #include "llvm/Support/raw_ostream.h"
      34             : #include "llvm/Target/TargetInstrInfo.h"
      35             : #include "llvm/Target/TargetLowering.h"
      36             : #include "llvm/Target/TargetMachine.h"
      37             : #include "llvm/Target/TargetRegisterInfo.h"
      38             : #include "llvm/Target/TargetSubtargetInfo.h"
      39             : using namespace llvm;
      40             : 
      41             : #define DEBUG_TYPE "machinelicm"
      42             : 
      43             : static cl::opt<bool>
      44       72306 : AvoidSpeculation("avoid-speculation",
      45      216918 :                  cl::desc("MachineLICM should avoid speculation"),
      46      289224 :                  cl::init(true), cl::Hidden);
      47             : 
      48             : static cl::opt<bool>
      49       72306 : HoistCheapInsts("hoist-cheap-insts",
      50      216918 :                 cl::desc("MachineLICM should hoist even cheap instructions"),
      51      289224 :                 cl::init(false), cl::Hidden);
      52             : 
      53             : static cl::opt<bool>
      54       72306 : SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
      55      216918 :                        cl::desc("MachineLICM should sink instructions into "
      56             :                                 "loops to avoid register spills"),
      57      289224 :                        cl::init(false), cl::Hidden);
      58             : 
      59             : STATISTIC(NumHoisted,
      60             :           "Number of machine instructions hoisted out of loops");
      61             : STATISTIC(NumLowRP,
      62             :           "Number of instructions hoisted in low reg pressure situation");
      63             : STATISTIC(NumHighLatency,
      64             :           "Number of high latency instructions hoisted");
      65             : STATISTIC(NumCSEed,
      66             :           "Number of hoisted machine instructions CSEed");
      67             : STATISTIC(NumPostRAHoisted,
      68             :           "Number of machine instructions hoisted out of loops post regalloc");
      69             : 
      70             : namespace {
      71      257768 :   class MachineLICM : public MachineFunctionPass {
      72             :     const TargetInstrInfo *TII;
      73             :     const TargetLoweringBase *TLI;
      74             :     const TargetRegisterInfo *TRI;
      75             :     const MachineFrameInfo *MFI;
      76             :     MachineRegisterInfo *MRI;
      77             :     TargetSchedModel SchedModel;
      78             :     bool PreRegAlloc;
      79             : 
      80             :     // Various analyses that we use...
      81             :     AliasAnalysis        *AA;      // Alias analysis info.
      82             :     MachineLoopInfo      *MLI;     // Current MachineLoopInfo
      83             :     MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
      84             : 
      85             :     // State that is updated as we process loops
      86             :     bool         Changed;          // True if a loop is changed.
      87             :     bool         FirstInLoop;      // True if it's the first LICM in the loop.
      88             :     MachineLoop *CurLoop;          // The current loop we are working on.
      89             :     MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
      90             : 
      91             :     // Exit blocks for CurLoop.
      92             :     SmallVector<MachineBasicBlock*, 8> ExitBlocks;
      93             : 
      94             :     bool isExitBlock(const MachineBasicBlock *MBB) const {
      95         510 :       return is_contained(ExitBlocks, MBB);
      96             :     }
      97             : 
      98             :     // Track 'estimated' register pressure.
      99             :     SmallSet<unsigned, 32> RegSeen;
     100             :     SmallVector<unsigned, 8> RegPressure;
     101             : 
     102             :     // Register pressure "limit" per register pressure set. If the pressure
     103             :     // is higher than the limit, then it's considered high.
     104             :     SmallVector<unsigned, 8> RegLimit;
     105             : 
     106             :     // Register pressure on path leading from loop preheader to current BB.
     107             :     SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
     108             : 
     109             :     // For each opcode, keep a list of potential CSE instructions.
     110             :     DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
     111             : 
     112             :     enum {
     113             :       SpeculateFalse   = 0,
     114             :       SpeculateTrue    = 1,
     115             :       SpeculateUnknown = 2
     116             :     };
     117             : 
     118             :     // If a MBB does not dominate loop exiting blocks then it may not safe
     119             :     // to hoist loads from this block.
     120             :     // Tri-state: 0 - false, 1 - true, 2 - unknown
     121             :     unsigned SpeculationState;
     122             : 
     123             :   public:
     124             :     static char ID; // Pass identification, replacement for typeid
     125       32418 :     MachineLICM() :
     126      226926 :       MachineFunctionPass(ID), PreRegAlloc(true) {
     127       32418 :         initializeMachineLICMPass(*PassRegistry::getPassRegistry());
     128       32418 :       }
     129             : 
     130             :     explicit MachineLICM(bool PreRA) :
     131             :       MachineFunctionPass(ID), PreRegAlloc(PreRA) {
     132             :         initializeMachineLICMPass(*PassRegistry::getPassRegistry());
     133             :       }
     134             : 
     135             :     bool runOnMachineFunction(MachineFunction &MF) override;
     136             : 
     137       32314 :     void getAnalysisUsage(AnalysisUsage &AU) const override {
     138       32314 :       AU.addRequired<MachineLoopInfo>();
     139       32314 :       AU.addRequired<MachineDominatorTree>();
     140       32314 :       AU.addRequired<AAResultsWrapperPass>();
     141       32314 :       AU.addPreserved<MachineLoopInfo>();
     142       32314 :       AU.addPreserved<MachineDominatorTree>();
     143       32314 :       MachineFunctionPass::getAnalysisUsage(AU);
     144       32314 :     }
     145             : 
     146      285036 :     void releaseMemory() override {
     147      570072 :       RegSeen.clear();
     148      570072 :       RegPressure.clear();
     149      570072 :       RegLimit.clear();
     150      285036 :       BackTrace.clear();
     151      285036 :       CSEMap.clear();
     152      285036 :     }
     153             : 
     154             :   private:
     155             :     /// Keep track of information about hoisting candidates.
     156             :     struct CandidateInfo {
     157             :       MachineInstr *MI;
     158             :       unsigned      Def;
     159             :       int           FI;
     160             :       CandidateInfo(MachineInstr *mi, unsigned def, int fi)
     161         664 :         : MI(mi), Def(def), FI(fi) {}
     162             :     };
     163             : 
     164             :     void HoistRegionPostRA();
     165             : 
     166             :     void HoistPostRA(MachineInstr *MI, unsigned Def);
     167             : 
     168             :     void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
     169             :                    BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
     170             :                    SmallVectorImpl<CandidateInfo> &Candidates);
     171             : 
     172             :     void AddToLiveIns(unsigned Reg);
     173             : 
     174             :     bool IsLICMCandidate(MachineInstr &I);
     175             : 
     176             :     bool IsLoopInvariantInst(MachineInstr &I);
     177             : 
     178             :     bool HasLoopPHIUse(const MachineInstr *MI) const;
     179             : 
     180             :     bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
     181             :                                unsigned Reg) const;
     182             : 
     183             :     bool IsCheapInstruction(MachineInstr &MI) const;
     184             : 
     185             :     bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
     186             :                                  bool Cheap);
     187             : 
     188             :     void UpdateBackTraceRegPressure(const MachineInstr *MI);
     189             : 
     190             :     bool IsProfitableToHoist(MachineInstr &MI);
     191             : 
     192             :     bool IsGuaranteedToExecute(MachineBasicBlock *BB);
     193             : 
     194             :     void EnterScope(MachineBasicBlock *MBB);
     195             : 
     196             :     void ExitScope(MachineBasicBlock *MBB);
     197             : 
     198             :     void ExitScopeIfDone(
     199             :         MachineDomTreeNode *Node,
     200             :         DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
     201             :         DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
     202             : 
     203             :     void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
     204             : 
     205             :     void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
     206             : 
     207             :     void SinkIntoLoop();
     208             : 
     209             :     void InitRegPressure(MachineBasicBlock *BB);
     210             : 
     211             :     DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
     212             :                                              bool ConsiderSeen,
     213             :                                              bool ConsiderUnseenAsDef);
     214             : 
     215             :     void UpdateRegPressure(const MachineInstr *MI,
     216             :                            bool ConsiderUnseenAsDef = false);
     217             : 
     218             :     MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
     219             : 
     220             :     const MachineInstr *
     221             :     LookForDuplicate(const MachineInstr *MI,
     222             :                      std::vector<const MachineInstr *> &PrevMIs);
     223             : 
     224             :     bool EliminateCSE(
     225             :         MachineInstr *MI,
     226             :         DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
     227             : 
     228             :     bool MayCSE(MachineInstr *MI);
     229             : 
     230             :     bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
     231             : 
     232             :     void InitCSEMap(MachineBasicBlock *BB);
     233             : 
     234             :     MachineBasicBlock *getCurPreheader();
     235             :   };
     236             : } // end anonymous namespace
     237             : 
     238             : char MachineLICM::ID = 0;
     239             : char &llvm::MachineLICMID = MachineLICM::ID;
     240       20212 : INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
     241             :                       "Machine Loop Invariant Code Motion", false, false)
     242       20212 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     243       20212 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     244       20212 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     245      248453 : INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
     246             :                     "Machine Loop Invariant Code Motion", false, false)
     247             : 
     248             : /// Test if the given loop is the outer-most loop that has a unique predecessor.
     249        6054 : static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
     250             :   // Check whether this loop even has a unique predecessor.
     251        6054 :   if (!CurLoop->getLoopPredecessor())
     252             :     return false;
     253             :   // Ok, now check to see if any of its outer loops do.
     254        6016 :   for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
     255           2 :     if (L->getLoopPredecessor())
     256             :       return false;
     257             :   // None of them did, so this is the outermost with a unique predecessor.
     258             :   return true;
     259             : }
     260             : 
     261      284988 : bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
     262      284988 :   if (skipFunction(*MF.getFunction()))
     263             :     return false;
     264             : 
     265      284873 :   Changed = FirstInLoop = false;
     266      284873 :   const TargetSubtargetInfo &ST = MF.getSubtarget();
     267      284873 :   TII = ST.getInstrInfo();
     268      284873 :   TLI = ST.getTargetLowering();
     269      284873 :   TRI = ST.getRegisterInfo();
     270      284873 :   MFI = &MF.getFrameInfo();
     271      284873 :   MRI = &MF.getRegInfo();
     272      284873 :   SchedModel.init(ST.getSchedModel(), &ST, TII);
     273             : 
     274      569746 :   PreRegAlloc = MRI->isSSA();
     275             : 
     276             :   if (PreRegAlloc)
     277             :     DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
     278             :   else
     279             :     DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
     280             :   DEBUG(dbgs() << MF.getName() << " ********\n");
     281             : 
     282      284873 :   if (PreRegAlloc) {
     283             :     // Estimate register pressure during pre-regalloc pass.
     284      150120 :     unsigned NumRPS = TRI->getNumRegPressureSets();
     285      150120 :     RegPressure.resize(NumRPS);
     286      600480 :     std::fill(RegPressure.begin(), RegPressure.end(), 0);
     287      150120 :     RegLimit.resize(NumRPS);
     288     3773303 :     for (unsigned i = 0, e = NumRPS; i != e; ++i)
     289     7246366 :       RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
     290             :   }
     291             : 
     292             :   // Get our Loop information...
     293      284873 :   MLI = &getAnalysis<MachineLoopInfo>();
     294      284873 :   DT  = &getAnalysis<MachineDominatorTree>();
     295      569746 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     296             : 
     297      854619 :   SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
     298      296958 :   while (!Worklist.empty()) {
     299       12085 :     CurLoop = Worklist.pop_back_val();
     300       12085 :     CurPreheader = nullptr;
     301       24170 :     ExitBlocks.clear();
     302             : 
     303             :     // If this is done before regalloc, only visit outer-most preheader-sporting
     304             :     // loops.
     305       12125 :     if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
     306         120 :       Worklist.append(CurLoop->begin(), CurLoop->end());
     307          40 :       continue;
     308             :     }
     309             : 
     310       12045 :     CurLoop->getExitBlocks(ExitBlocks);
     311             : 
     312       12045 :     if (!PreRegAlloc)
     313        6031 :       HoistRegionPostRA();
     314             :     else {
     315             :       // CSEMap is initialized for loop header when the first instruction is
     316             :       // being hoisted.
     317       12028 :       MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
     318        6014 :       FirstInLoop = true;
     319        6014 :       HoistOutOfLoop(N);
     320        6014 :       CSEMap.clear();
     321             : 
     322        6014 :       if (SinkInstsToAvoidSpills)
     323           1 :         SinkIntoLoop();
     324             :     }
     325             :   }
     326             : 
     327      284873 :   return Changed;
     328             : }
     329             : 
     330             : /// Return true if instruction stores to the specified frame.
     331        4410 : static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
     332             :   // If we lost memory operands, conservatively assume that the instruction
     333             :   // writes to all slots.
     334        4410 :   if (MI->memoperands_empty())
     335             :     return true;
     336        7523 :   for (const MachineMemOperand *MemOp : MI->memoperands()) {
     337       13170 :     if (!MemOp->isStore() || !MemOp->getPseudoValue())
     338        3136 :       continue;
     339             :     if (const FixedStackPseudoSourceValue *Value =
     340        2508 :         dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
     341        1254 :       if (Value->getFrameIndex() == FI)
     342             :         return true;
     343             :     }
     344             :   }
     345             :   return false;
     346             : }
     347             : 
     348             : /// Examine the instruction for potentai LICM candidate. Also
     349             : /// gather register def and frame object update information.
     350      303412 : void MachineLICM::ProcessMI(MachineInstr *MI,
     351             :                             BitVector &PhysRegDefs,
     352             :                             BitVector &PhysRegClobbers,
     353             :                             SmallSet<int, 32> &StoredFIs,
     354             :                             SmallVectorImpl<CandidateInfo> &Candidates) {
     355      303412 :   bool RuledOut = false;
     356      303412 :   bool HasNonInvariantUse = false;
     357      303412 :   unsigned Def = 0;
     358     1673920 :   for (const MachineOperand &MO : MI->operands()) {
     359     1370508 :     if (MO.isFI()) {
     360             :       // Remember if the instruction stores to the frame index.
     361       24233 :       int FI = MO.getIndex();
     362       46321 :       if (!StoredFIs.count(FI) &&
     363       50731 :           MFI->isSpillSlotObjectIndex(FI) &&
     364        4410 :           InstructionStoresToFI(MI, FI))
     365        1277 :         StoredFIs.insert(FI);
     366       24233 :       HasNonInvariantUse = true;
     367       24233 :       continue;
     368             :     }
     369             : 
     370             :     // We can't hoist an instruction defining a physreg that is clobbered in
     371             :     // the loop.
     372     1362295 :     if (MO.isRegMask()) {
     373       32040 :       PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
     374       16020 :       continue;
     375             :     }
     376             : 
     377     1330255 :     if (!MO.isReg())
     378      455522 :       continue;
     379      874733 :     unsigned Reg = MO.getReg();
     380      874733 :     if (!Reg)
     381      220043 :       continue;
     382             :     assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
     383             :            "Not expecting virtual register!");
     384             : 
     385     1023953 :     if (!MO.isDef()) {
     386      492159 :       if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
     387             :         // If it's using a non-loop-invariant register, then it's obviously not
     388             :         // safe to hoist.
     389             :         HasNonInvariantUse = true;
     390      369263 :       continue;
     391             :     }
     392             : 
     393      285427 :     if (MO.isImplicit()) {
     394     1165398 :       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
     395      845410 :         PhysRegClobbers.set(*AI);
     396      159994 :       if (!MO.isDead())
     397             :         // Non-dead implicit def? This cannot be hoisted.
     398       58772 :         RuledOut = true;
     399             :       // No need to check if a dead implicit def is also defined by
     400             :       // another instruction.
     401      159994 :       continue;
     402             :     }
     403             : 
     404             :     // FIXME: For now, avoid instructions with multiple defs, unless
     405             :     // it's a dead implicit def.
     406      125433 :     if (Def)
     407             :       RuledOut = true;
     408             :     else
     409      124283 :       Def = Reg;
     410             : 
     411             :     // If we have already seen another instruction that defines the same
     412             :     // register, then this is not safe.  Two defs is indicated by setting a
     413             :     // PhysRegClobbers bit.
     414     1676390 :     for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
     415     1425524 :       if (PhysRegDefs.test(*AS))
     416      648204 :         PhysRegClobbers.set(*AS);
     417     1425524 :       PhysRegDefs.set(*AS);
     418             :     }
     419      125433 :     if (PhysRegClobbers.test(Reg))
     420             :       // MI defined register is seen defined by another instruction in
     421             :       // the loop, it cannot be a LICM candidate.
     422      114552 :       RuledOut = true;
     423             :   }
     424             : 
     425             :   // Only consider reloads for now and remats which do not have register
     426             :   // operands. FIXME: Consider unfold load folding instructions.
     427      303412 :   if (Def && !RuledOut) {
     428        9150 :     int FI = INT_MIN;
     429       17752 :     if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
     430        8970 :         (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
     431        1328 :       Candidates.push_back(CandidateInfo(MI, Def, FI));
     432             :   }
     433      303412 : }
     434             : 
     435             : /// Walk the specified region of the CFG and hoist loop invariants out to the
     436             : /// preheader.
     437        6031 : void MachineLICM::HoistRegionPostRA() {
     438        6031 :   MachineBasicBlock *Preheader = getCurPreheader();
     439        6031 :   if (!Preheader)
     440          57 :     return;
     441             : 
     442        5974 :   unsigned NumRegs = TRI->getNumRegs();
     443       11948 :   BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
     444       11948 :   BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
     445             : 
     446       11948 :   SmallVector<CandidateInfo, 32> Candidates;
     447       11948 :   SmallSet<int, 32> StoredFIs;
     448             : 
     449             :   // Walk the entire region, count number of defs for each register, and
     450             :   // collect potential LICM candidates.
     451       11948 :   const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
     452       49351 :   for (MachineBasicBlock *BB : Blocks) {
     453             :     // If the header of the loop containing this basic block is a landing pad,
     454             :     // then don't try to hoist instructions out of this loop.
     455       50910 :     const MachineLoop *ML = MLI->getLoopFor(BB);
     456       50910 :     if (ML && ML->getHeader()->isEHPad()) continue;
     457             : 
     458             :     // Conservatively treat live-in's as an external def.
     459             :     // FIXME: That means a reload that're reused in successor block(s) will not
     460             :     // be LICM'ed.
     461      170611 :     for (const auto &LI : BB->liveins()) {
     462     1673082 :       for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
     463     1433680 :         PhysRegDefs.set(*AI);
     464             :     }
     465             : 
     466       25455 :     SpeculationState = SpeculateUnknown;
     467      708644 :     for (MachineInstr &MI : *BB)
     468      303412 :       ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
     469             :   }
     470             : 
     471             :   // Gather the registers read / clobbered by the terminator.
     472       11948 :   BitVector TermRegs(NumRegs);
     473        5974 :   MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
     474       11948 :   if (TI != Preheader->end()) {
     475        2790 :     for (const MachineOperand &MO : TI->operands()) {
     476         986 :       if (!MO.isReg())
     477         923 :         continue;
     478          63 :       unsigned Reg = MO.getReg();
     479          63 :       if (!Reg)
     480          21 :         continue;
     481         252 :       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
     482         168 :         TermRegs.set(*AI);
     483             :     }
     484             :   }
     485             : 
     486             :   // Now evaluate whether the potential candidates qualify.
     487             :   // 1. Check if the candidate defined register is defined by another
     488             :   //    instruction in the loop.
     489             :   // 2. If the candidate is a load from stack slot (always true for now),
     490             :   //    check if the slot is stored anywhere in the loop.
     491             :   // 3. Make sure candidate def should not clobber
     492             :   //    registers read by the terminator. Similarly its def should not be
     493             :   //    clobbered by the terminator.
     494       18586 :   for (CandidateInfo &Candidate : Candidates) {
     495         803 :     if (Candidate.FI != INT_MIN &&
     496         116 :         StoredFIs.count(Candidate.FI))
     497          23 :       continue;
     498             : 
     499         641 :     unsigned Def = Candidate.Def;
     500         702 :     if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
     501          61 :       bool Safe = true;
     502          61 :       MachineInstr *MI = Candidate.MI;
     503         307 :       for (const MachineOperand &MO : MI->operands()) {
     504         624 :         if (!MO.isReg() || MO.isDef() || !MO.getReg())
     505         246 :           continue;
     506           1 :         unsigned Reg = MO.getReg();
     507           1 :         if (PhysRegDefs.test(Reg) ||
     508           0 :             PhysRegClobbers.test(Reg)) {
     509             :           // If it's using a non-loop-invariant register, then it's obviously
     510             :           // not safe to hoist.
     511             :           Safe = false;
     512             :           break;
     513             :         }
     514             :       }
     515          61 :       if (Safe)
     516          60 :         HoistPostRA(MI, Candidate.Def);
     517             :     }
     518             :   }
     519             : }
     520             : 
     521             : /// Add register 'Reg' to the livein sets of BBs in the current loop, and make
     522             : /// sure it is not killed by any instructions in the loop.
     523          60 : void MachineLICM::AddToLiveIns(unsigned Reg) {
     524         120 :   const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
     525         378 :   for (MachineBasicBlock *BB : Blocks) {
     526         138 :     if (!BB->isLiveIn(Reg))
     527         106 :       BB->addLiveIn(Reg);
     528        4056 :     for (MachineInstr &MI : *BB) {
     529        9557 :       for (MachineOperand &MO : MI.operands()) {
     530       11571 :         if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
     531        2191 :         if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
     532             :           MO.setIsKill(false);
     533             :       }
     534             :     }
     535             :   }
     536          60 : }
     537             : 
     538             : /// When an instruction is found to only use loop invariant operands that is
     539             : /// safe to hoist, this instruction is called to do the dirty work.
     540          60 : void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
     541          60 :   MachineBasicBlock *Preheader = getCurPreheader();
     542             : 
     543             :   // Now move the instructions to the predecessor, inserting it before any
     544             :   // terminator instructions.
     545             :   DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
     546             :                << MI->getParent()->getNumber() << ": " << *MI);
     547             : 
     548             :   // Splice the instruction to the preheader.
     549          60 :   MachineBasicBlock *MBB = MI->getParent();
     550          60 :   Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
     551             : 
     552             :   // Add register to livein list to all the BBs in the current loop since a
     553             :   // loop invariant must be kept live throughout the whole loop. This is
     554             :   // important to ensure later passes do not scavenge the def register.
     555          60 :   AddToLiveIns(Def);
     556             : 
     557          60 :   ++NumPostRAHoisted;
     558          60 :   Changed = true;
     559          60 : }
     560             : 
     561             : /// Check if this mbb is guaranteed to execute. If not then a load from this mbb
     562             : /// may not be safe to hoist.
     563        7256 : bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
     564        7256 :   if (SpeculationState != SpeculateUnknown)
     565        3725 :     return SpeculationState == SpeculateFalse;
     566             : 
     567        7062 :   if (BB != CurLoop->getHeader()) {
     568             :     // Check loop exiting blocks.
     569        3428 :     SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
     570        3312 :     CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
     571       10551 :     for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
     572        7622 :       if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
     573        3196 :         SpeculationState = SpeculateTrue;
     574        6392 :         return false;
     575             :       }
     576             :   }
     577             : 
     578         335 :   SpeculationState = SpeculateFalse;
     579         335 :   return true;
     580             : }
     581             : 
     582             : void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
     583             :   DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n');
     584             : 
     585             :   // Remember livein register pressure.
     586       24638 :   BackTrace.push_back(RegPressure);
     587             : }
     588             : 
     589             : void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
     590             :   DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n');
     591       21756 :   BackTrace.pop_back();
     592             : }
     593             : 
     594             : /// Destroy scope for the MBB that corresponds to the given dominator tree node
     595             : /// if its a leaf or all of its children are done. Walk up the dominator tree to
     596             : /// destroy ancestors which are now done.
     597       24638 : void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
     598             :                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
     599             :                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
     600       24638 :   if (OpenChildren[Node])
     601             :     return;
     602             : 
     603             :   // Pop scope.
     604        7855 :   ExitScope(Node->getBlock());
     605             : 
     606             :   // Now traverse upwards to pop ancestors whose offsprings are all done.
     607       10878 :   while (MachineDomTreeNode *Parent = ParentMap[Node]) {
     608        9680 :     unsigned Left = --OpenChildren[Parent];
     609        9680 :     if (Left != 0)
     610             :       break;
     611        6046 :     ExitScope(Parent->getBlock());
     612        3023 :     Node = Parent;
     613        3023 :   }
     614             : }
     615             : 
     616             : /// Walk the specified loop in the CFG (defined by all blocks dominated by the
     617             : /// specified header block, and that are in the current loop) in depth first
     618             : /// order w.r.t the DominatorTree. This allows us to visit definitions before
     619             : /// uses, allowing us to hoist a loop body in one pass without iteration.
     620             : ///
     621        6014 : void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
     622        6014 :   MachineBasicBlock *Preheader = getCurPreheader();
     623        6014 :   if (!Preheader)
     624          18 :     return;
     625             : 
     626       11992 :   SmallVector<MachineDomTreeNode*, 32> Scopes;
     627       11992 :   SmallVector<MachineDomTreeNode*, 8> WorkList;
     628       11992 :   DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
     629       11992 :   DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
     630             : 
     631             :   // Perform a DFS walk to determine the order of visit.
     632        5996 :   WorkList.push_back(HeaderN);
     633       43282 :   while (!WorkList.empty()) {
     634       37286 :     MachineDomTreeNode *Node = WorkList.pop_back_val();
     635             :     assert(Node && "Null dominator tree node?");
     636       37286 :     MachineBasicBlock *BB = Node->getBlock();
     637             : 
     638             :     // If the header of the loop containing this basic block is a landing pad,
     639             :     // then don't try to hoist instructions out of this loop.
     640       74572 :     const MachineLoop *ML = MLI->getLoopFor(BB);
     641       49297 :     if (ML && ML->getHeader()->isEHPad())
     642       12649 :       continue;
     643             : 
     644             :     // If this subregion is not in the top level loop at all, exit.
     645       87217 :     if (!CurLoop->contains(BB))
     646       12647 :       continue;
     647             : 
     648       24638 :     Scopes.push_back(Node);
     649       24638 :     const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
     650       24638 :     unsigned NumChildren = Children.size();
     651             : 
     652             :     // Don't hoist things out of a large switch statement.  This often causes
     653             :     // code to be hoisted that wasn't going to be executed, and increases
     654             :     // register pressure in a situation where it's likely to matter.
     655       24638 :     if (BB->succ_size() >= 25)
     656           0 :       NumChildren = 0;
     657             : 
     658       24638 :     OpenChildren[Node] = NumChildren;
     659             :     // Add children in reverse order as then the next popped worklist node is
     660             :     // the first child of this node.  This means we ultimately traverse the
     661             :     // DOM tree in exactly the same order as if we'd recursed.
     662       55928 :     for (int i = (int)NumChildren-1; i >= 0; --i) {
     663       62580 :       MachineDomTreeNode *Child = Children[i];
     664       31290 :       ParentMap[Child] = Node;
     665       31290 :       WorkList.push_back(Child);
     666             :     }
     667             :   }
     668             : 
     669        5996 :   if (Scopes.size() == 0)
     670           0 :     return;
     671             : 
     672             :   // Compute registers which are livein into the loop headers.
     673       11992 :   RegSeen.clear();
     674        5996 :   BackTrace.clear();
     675        5996 :   InitRegPressure(Preheader);
     676             : 
     677             :   // Now perform LICM.
     678       42626 :   for (MachineDomTreeNode *Node : Scopes) {
     679       24638 :     MachineBasicBlock *MBB = Node->getBlock();
     680             : 
     681       49276 :     EnterScope(MBB);
     682             : 
     683             :     // Process the block
     684       24638 :     SpeculationState = SpeculateUnknown;
     685      364327 :     for (MachineBasicBlock::iterator
     686      438241 :          MII = MBB->begin(), E = MBB->end(); MII != E; ) {
     687      728654 :       MachineBasicBlock::iterator NextMII = MII; ++NextMII;
     688      364327 :       MachineInstr *MI = &*MII;
     689      364327 :       if (!Hoist(MI, Preheader))
     690      329903 :         UpdateRegPressure(MI);
     691      364327 :       MII = NextMII;
     692             :     }
     693             : 
     694             :     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
     695       24638 :     ExitScopeIfDone(Node, OpenChildren, ParentMap);
     696             :   }
     697             : }
     698             : 
     699             : /// Sink instructions into loops if profitable. This especially tries to prevent
     700             : /// register spills caused by register pressure if there is little to no
     701             : /// overhead moving instructions into loops.
     702           1 : void MachineLICM::SinkIntoLoop() {
     703           1 :   MachineBasicBlock *Preheader = getCurPreheader();
     704           1 :   if (!Preheader)
     705           0 :     return;
     706             : 
     707           2 :   SmallVector<MachineInstr *, 8> Candidates;
     708           1 :   for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
     709           9 :        I != Preheader->instr_end(); ++I) {
     710             :     // We need to ensure that we can safely move this instruction into the loop.
     711             :     // As such, it must not have side-effects, e.g. such as a call has.
     712          14 :     if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
     713           5 :       Candidates.push_back(&*I);
     714             :   }
     715             : 
     716           8 :   for (MachineInstr *I : Candidates) {
     717           5 :     const MachineOperand &MO = I->getOperand(0);
     718          10 :     if (!MO.isDef() || !MO.isReg() || !MO.getReg())
     719           0 :       continue;
     720           5 :     if (!MRI->hasOneDef(MO.getReg()))
     721           0 :       continue;
     722           5 :     bool CanSink = true;
     723           5 :     MachineBasicBlock *B = nullptr;
     724          25 :     for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
     725             :       // FIXME: Come up with a proper cost model that estimates whether sinking
     726             :       // the instruction (and thus possibly executing it on every loop
     727             :       // iteration) is more expensive than a register.
     728             :       // For now assumes that copies are cheap and thus almost always worth it.
     729           5 :       if (!MI.isCopy()) {
     730             :         CanSink = false;
     731             :         break;
     732             :       }
     733          10 :       if (!B) {
     734           5 :         B = MI.getParent();
     735           5 :         continue;
     736             :       }
     737           0 :       B = DT->findNearestCommonDominator(B, MI.getParent());
     738           0 :       if (!B) {
     739             :         CanSink = false;
     740             :         break;
     741             :       }
     742             :     }
     743           5 :     if (!CanSink || !B || B == Preheader)
     744           0 :       continue;
     745           5 :     B->splice(B->getFirstNonPHI(), Preheader, I);
     746             :   }
     747             : }
     748             : 
     749             : static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
     750      317238 :   return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
     751             : }
     752             : 
     753             : /// Find all virtual register references that are liveout of the preheader to
     754             : /// initialize the starting "register pressure". Note this does not count live
     755             : /// through (livein but not used) registers.
     756        8898 : void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
     757       35592 :   std::fill(RegPressure.begin(), RegPressure.end(), 0);
     758             : 
     759             :   // If the preheader has only a single predecessor and it ends with a
     760             :   // fallthrough or an unconditional branch, then scan its predecessor for live
     761             :   // defs as well. This happens whenever the preheader is created by splitting
     762             :   // the critical edge from the loop predecessor to the loop header.
     763        8898 :   if (BB->pred_size() == 1) {
     764        3489 :     MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
     765        6978 :     SmallVector<MachineOperand, 4> Cond;
     766        3489 :     if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
     767        2902 :       InitRegPressure(*BB->pred_begin());
     768             :   }
     769             : 
     770      213568 :   for (const MachineInstr &MI : *BB)
     771       88988 :     UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
     772        8898 : }
     773             : 
     774             : /// Update estimate of register pressure after the specified instruction.
     775      419532 : void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
     776             :                                     bool ConsiderUnseenAsDef) {
     777      839064 :   auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
     778     1655075 :   for (const auto &RPIdAndCost : Cost) {
     779      396479 :     unsigned Class = RPIdAndCost.first;
     780      792958 :     if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
     781           0 :       RegPressure[Class] = 0;
     782             :     else
     783      792958 :       RegPressure[Class] += RPIdAndCost.second;
     784             :   }
     785      419532 : }
     786             : 
     787             : /// Calculate the additional register pressure that the registers used in MI
     788             : /// cause.
     789             : ///
     790             : /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
     791             : /// figure out which usages are live-ins.
     792             : /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
     793             : DenseMap<unsigned, int>
     794      445693 : MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
     795             :                               bool ConsiderUnseenAsDef) {
     796      445693 :   DenseMap<unsigned, int> Cost;
     797      445693 :   if (MI->isImplicitDef())
     798             :     return Cost;
     799     2366437 :   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
     800     2957354 :     const MachineOperand &MO = MI->getOperand(i);
     801     2366871 :     if (!MO.isReg() || MO.isImplicit())
     802     1763793 :       continue;
     803      888194 :     unsigned Reg = MO.getReg();
     804     1776388 :     if (!TargetRegisterInfo::isVirtualRegister(Reg))
     805      372415 :       continue;
     806             : 
     807             :     // FIXME: It seems bad to use RegSeen only for some of these calculations.
     808      515779 :     bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
     809     1031558 :     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
     810             : 
     811      515779 :     RegClassWeight W = TRI->getRegClassWeight(RC);
     812      515779 :     int RCCost = 0;
     813      515779 :     if (MO.isDef())
     814      198541 :       RCCost = W.RegWeight;
     815             :     else {
     816      634476 :       bool isKill = isOperandKill(MO, MRI);
     817      317238 :       if (isNew && !isKill && ConsiderUnseenAsDef)
     818             :         // Haven't seen this, it must be a livein.
     819        2405 :         RCCost = W.RegWeight;
     820      314833 :       else if (!isNew && isKill)
     821      105047 :         RCCost = -W.RegWeight;
     822             :     }
     823      305993 :     if (RCCost == 0)
     824      210412 :       continue;
     825      305367 :     const int *PS = TRI->getRegClassPressureSets(RC);
     826     1353659 :     for (; *PS != -1; ++PS) {
     827     1048292 :       if (Cost.find(*PS) == Cost.end())
     828      853852 :         Cost[*PS] = RCCost;
     829             :       else
     830      194440 :         Cost[*PS] += RCCost;
     831             :     }
     832             :   }
     833             :   return Cost;
     834             : }
     835             : 
     836             : /// Return true if this machine instruction loads from global offset table or
     837             : /// constant pool.
     838        2355 : static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
     839             :   assert (MI.mayLoad() && "Expected MI that loads!");
     840             : 
     841             :   // If we lost memory operands, conservatively assume that the instruction
     842             :   // reads from everything..
     843        2355 :   if (MI.memoperands_empty())
     844             :     return true;
     845             : 
     846        2483 :   for (MachineMemOperand *MemOp : MI.memoperands())
     847        2251 :     if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
     848        2251 :       if (PSV->isGOT() || PSV->isConstantPool())
     849             :         return true;
     850             : 
     851             :   return false;
     852             : }
     853             : 
     854             : /// Returns true if the instruction may be a suitable candidate for LICM.
     855             : /// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
     856      366113 : bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
     857             :   // Check if it's safe to move the instruction.
     858      366113 :   bool DontMoveAcrossStore = true;
     859      366113 :   if (!I.isSafeToMove(AA, DontMoveAcrossStore))
     860             :     return false;
     861             : 
     862             :   // If it is load then check if it is guaranteed to execute by making sure that
     863             :   // it dominates all exiting blocks. If it doesn't, then there is a path out of
     864             :   // the loop which does not execute this load, so we can't hoist it. Loads
     865             :   // from constant memory are not safe to speculate all the time, for example
     866             :   // indexed load from a jump table.
     867             :   // Stores and side effects are already checked by isSafeToMove.
     868      185540 :   if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
     869         124 :       !IsGuaranteedToExecute(I.getParent()))
     870             :     return false;
     871             : 
     872             :   return true;
     873             : }
     874             : 
     875             : /// Returns true if the instruction is loop invariant.
     876             : /// I.e., all virtual register operands are defined outside of the loop,
     877             : /// physical registers aren't accessed explicitly, and there are no side
     878             : /// effects that aren't captured by the operands or other flags.
     879             : ///
     880      364989 : bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
     881      364989 :   if (!IsLICMCandidate(I))
     882             :     return false;
     883             : 
     884             :   // The instruction is loop invariant if all of its operands are.
     885      599192 :   for (const MachineOperand &MO : I.operands()) {
     886      556007 :     if (!MO.isReg())
     887      156504 :       continue;
     888             : 
     889      399503 :     unsigned Reg = MO.getReg();
     890      399503 :     if (Reg == 0) continue;
     891             : 
     892             :     // Don't hoist an instruction that uses or defines a physical register.
     893      344109 :     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
     894      166281 :       if (MO.isUse()) {
     895             :         // If the physreg has no defs anywhere, it's just an ambient register
     896             :         // and we can freely move its uses. Alternatively, if it's allocatable,
     897             :         // it could get allocated to something with a def during allocation.
     898             :         // However, if the physreg is known to always be caller saved/restored
     899             :         // then this use is safe to hoist.
     900      101681 :         if (!MRI->isConstantPhysReg(Reg) &&
     901       45133 :             !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent())))
     902             :             return false;
     903             :         // Otherwise it's safe to move.
     904       11420 :         continue;
     905       98313 :       } else if (!MO.isDead()) {
     906             :         // A def that isn't dead. We can't move it.
     907             :         return false;
     908      128984 :       } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
     909             :         // If the reg is live into the loop, we can't hoist an instruction
     910             :         // which would clobber it.
     911             :         return false;
     912             :       }
     913             :     }
     914             : 
     915      253740 :     if (!MO.isUse())
     916      176583 :       continue;
     917             : 
     918             :     assert(MRI->getVRegDef(Reg) &&
     919             :            "Machine instr not mapped for this vreg?!");
     920             : 
     921             :     // If the loop contains the definition of an operand, then the instruction
     922             :     // isn't loop invariant.
     923      154314 :     if (CurLoop->contains(MRI->getVRegDef(Reg)))
     924             :       return false;
     925             :   }
     926             : 
     927             :   // If we got this far, the instruction is loop invariant!
     928             :   return true;
     929             : }
     930             : 
     931             : 
     932             : /// Return true if the specified instruction is used by a phi node and hoisting
     933             : /// it could cause a copy to be inserted.
     934       42116 : bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
     935       42116 :   SmallVector<const MachineInstr*, 8> Work(1, MI);
     936             :   do {
     937       62381 :     MI = Work.pop_back_val();
     938      350197 :     for (const MachineOperand &MO : MI->operands()) {
     939      386329 :       if (!MO.isReg() || !MO.isDef())
     940      164097 :         continue;
     941       63553 :       unsigned Reg = MO.getReg();
     942       63553 :       if (!TargetRegisterInfo::isVirtualRegister(Reg))
     943       20816 :         continue;
     944      230048 :       for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
     945             :         // A PHI may cause a copy to be inserted.
     946       49811 :         if (UseMI.isPHI()) {
     947             :           // A PHI inside the loop causes a copy because the live range of Reg is
     948             :           // extended across the PHI.
     949        4440 :           if (CurLoop->contains(&UseMI))
     950             :             return true;
     951             :           // A PHI in an exit block can cause a copy to be inserted if the PHI
     952             :           // has multiple predecessors in the loop with different values.
     953             :           // For now, approximate by rejecting all exit blocks.
     954         510 :           if (isExitBlock(UseMI.getParent()))
     955             :             return true;
     956           5 :           continue;
     957             :         }
     958             :         // Look past copies as well.
     959       90458 :         if (UseMI.isCopy() && CurLoop->contains(&UseMI))
     960       20296 :           Work.push_back(&UseMI);
     961             :       }
     962             :     }
     963       60166 :   } while (!Work.empty());
     964             :   return false;
     965             : }
     966             : 
     967             : /// Compute operand latency between a def of 'Reg' and an use in the current
     968             : /// loop, return true if the target considered it high.
     969       10316 : bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
     970             :                                         unsigned DefIdx, unsigned Reg) const {
     971       20632 :   if (MRI->use_nodbg_empty(Reg))
     972             :     return false;
     973             : 
     974       55624 :   for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
     975       10628 :     if (UseMI.isCopyLike())
     976        6837 :       continue;
     977        7769 :     if (!CurLoop->contains(UseMI.getParent()))
     978         187 :       continue;
     979       21879 :     for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
     980       36588 :       const MachineOperand &MO = UseMI.getOperand(i);
     981       39158 :       if (!MO.isReg() || !MO.isUse())
     982        7781 :         continue;
     983       10513 :       unsigned MOReg = MO.getReg();
     984       17295 :       if (MOReg != Reg)
     985        6782 :         continue;
     986             : 
     987        3731 :       if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
     988          19 :         return true;
     989             :     }
     990             : 
     991             :     // Only look at the first in loop use.
     992             :     break;
     993             :   }
     994             : 
     995       10297 :   return false;
     996             : }
     997             : 
     998             : /// Return true if the instruction is marked "cheap" or the operand latency
     999             : /// between its def and a use is one or less.
    1000       42110 : bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
    1001       71399 :   if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
    1002             :     return true;
    1003             : 
    1004       29289 :   bool isCheap = false;
    1005       58578 :   unsigned NumDefs = MI.getDesc().getNumDefs();
    1006       29595 :   for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
    1007       58578 :     MachineOperand &DefMO = MI.getOperand(i);
    1008       58578 :     if (!DefMO.isReg() || !DefMO.isDef())
    1009           0 :       continue;
    1010       29289 :     --NumDefs;
    1011       29289 :     unsigned Reg = DefMO.getReg();
    1012       29289 :     if (TargetRegisterInfo::isPhysicalRegister(Reg))
    1013           0 :       continue;
    1014             : 
    1015       29289 :     if (!TII->hasLowDefLatency(SchedModel, MI, i))
    1016             :       return false;
    1017             :     isCheap = true;
    1018             :   }
    1019             : 
    1020             :   return isCheap;
    1021             : }
    1022             : 
    1023             : /// Visit BBs from header to current BB, check if hoisting an instruction of the
    1024             : /// given cost matrix can cause high register pressure.
    1025       10294 : bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
    1026             :                                           bool CheapInstr) {
    1027       20588 :   for (const auto &RPIdAndCost : Cost) {
    1028       11784 :     if (RPIdAndCost.second <= 0)
    1029        1869 :       continue;
    1030             : 
    1031        9915 :     unsigned Class = RPIdAndCost.first;
    1032       19830 :     int Limit = RegLimit[Class];
    1033             : 
    1034             :     // Don't hoist cheap instructions if they would increase register pressure,
    1035             :     // even if we're under the limit.
    1036       10970 :     if (CheapInstr && !HoistCheapInsts)
    1037        7416 :       return true;
    1038             : 
    1039       47708 :     for (const auto &RP : BackTrace)
    1040       54978 :       if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
    1041             :         return true;
    1042             :   }
    1043             : 
    1044        2878 :   return false;
    1045             : }
    1046             : 
    1047             : /// Traverse the back trace from header to the current block and update their
    1048             : /// register pressures to reflect the effect of hoisting MI from the current
    1049             : /// block to the preheader.
    1050       15867 : void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
    1051             :   // First compute the 'cost' of the instruction, i.e. its contribution
    1052             :   // to register pressure.
    1053             :   auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
    1054       31734 :                                /*ConsiderUnseenAsDef=*/false);
    1055             : 
    1056             :   // Update register pressure of blocks from loop header to current block.
    1057      186667 :   for (auto &RP : BackTrace)
    1058      559714 :     for (const auto &RPIdAndCost : Cost)
    1059      285032 :       RP[RPIdAndCost.first] += RPIdAndCost.second;
    1060       15867 : }
    1061             : 
    1062             : /// Return true if it is potentially profitable to hoist the given loop
    1063             : /// invariant.
    1064       43179 : bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
    1065       43179 :   if (MI.isImplicitDef())
    1066             :     return true;
    1067             : 
    1068             :   // Besides removing computation from the loop, hoisting an instruction has
    1069             :   // these effects:
    1070             :   //
    1071             :   // - The value defined by the instruction becomes live across the entire
    1072             :   //   loop. This increases register pressure in the loop.
    1073             :   //
    1074             :   // - If the value is used by a PHI in the loop, a copy will be required for
    1075             :   //   lowering the PHI after extending the live range.
    1076             :   //
    1077             :   // - When hoisting the last use of a value in the loop, that value no longer
    1078             :   //   needs to be live in the loop. This lowers register pressure in the loop.
    1079             : 
    1080       42110 :   bool CheapInstr = IsCheapInstruction(MI);
    1081       42110 :   bool CreatesCopy = HasLoopPHIUse(&MI);
    1082             : 
    1083             :   // Don't hoist a cheap instruction if it would create a copy in the loop.
    1084       42110 :   if (CheapInstr && CreatesCopy) {
    1085             :     DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
    1086             :     return false;
    1087             :   }
    1088             : 
    1089             :   // Rematerializable instructions should always be hoisted since the register
    1090             :   // allocator can just pull them down again when needed.
    1091       40739 :   if (TII->isTriviallyReMaterializable(MI, AA))
    1092             :     return true;
    1093             : 
    1094             :   // FIXME: If there are long latency loop-invariant instructions inside the
    1095             :   // loop at this point, why didn't the optimizer's LICM hoist them?
    1096       74539 :   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
    1097      107864 :     const MachineOperand &MO = MI.getOperand(i);
    1098       89881 :     if (!MO.isReg() || MO.isImplicit())
    1099       17983 :       continue;
    1100       35949 :     unsigned Reg = MO.getReg();
    1101       35949 :     if (!TargetRegisterInfo::isVirtualRegister(Reg))
    1102       15181 :       continue;
    1103       20768 :     if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
    1104             :       DEBUG(dbgs() << "Hoist High Latency: " << MI);
    1105             :       ++NumHighLatency;
    1106             :       return true;
    1107             :     }
    1108             :   }
    1109             : 
    1110             :   // Estimate register pressure to determine whether to LICM the instruction.
    1111             :   // In low register pressure situation, we can be more aggressive about
    1112             :   // hoisting. Also, favors hoisting long latency instructions even in
    1113             :   // moderately high pressure situation.
    1114             :   // Cheap instructions will only be hoisted if they don't increase register
    1115             :   // pressure at all.
    1116             :   auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
    1117       10294 :                                /*ConsiderUnseenAsDef=*/false);
    1118             : 
    1119             :   // Visit BBs from header to current BB, if hoisting this doesn't cause
    1120             :   // high register pressure, then it's safe to proceed.
    1121       10294 :   if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
    1122             :     DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
    1123             :     ++NumLowRP;
    1124             :     return true;
    1125             :   }
    1126             : 
    1127             :   // Don't risk increasing register pressure if it would create copies.
    1128        7416 :   if (CreatesCopy) {
    1129             :     DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
    1130             :     return false;
    1131             :   }
    1132             : 
    1133             :   // Do not "speculate" in high register pressure situation. If an
    1134             :   // instruction is not guaranteed to be executed in the loop, it's best to be
    1135             :   // conservative.
    1136       14264 :   if (AvoidSpeculation &&
    1137       20946 :       (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
    1138             :     DEBUG(dbgs() << "Won't speculate: " << MI);
    1139             :     return false;
    1140             :   }
    1141             : 
    1142             :   // High register pressure situation, only hoist if the instruction is going
    1143             :   // to be remat'ed.
    1144        4978 :   if (!TII->isTriviallyReMaterializable(MI, AA) &&
    1145        2489 :       !MI.isDereferenceableInvariantLoad(AA)) {
    1146             :     DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
    1147             :     return false;
    1148             :   }
    1149             : 
    1150             :   return true;
    1151             : }
    1152             : 
    1153             : /// Unfold a load from the given machineinstr if the load itself could be
    1154             : /// hoisted. Return the unfolded and hoistable load, or null if the load
    1155             : /// couldn't be unfolded or if it wouldn't be hoistable.
    1156      330544 : MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
    1157             :   // Don't unfold simple loads.
    1158      330544 :   if (MI->canFoldAsLoad())
    1159             :     return nullptr;
    1160             : 
    1161             :   // If not, we may be able to unfold a load and hoist that.
    1162             :   // First test whether the instruction is loading from an amenable
    1163             :   // memory location.
    1164      308306 :   if (!MI->isDereferenceableInvariantLoad(AA))
    1165             :     return nullptr;
    1166             : 
    1167             :   // Next determine the register class for a temporary register.
    1168             :   unsigned LoadRegIndex;
    1169             :   unsigned NewOpc =
    1170        1416 :     TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
    1171             :                                     /*UnfoldLoad=*/true,
    1172             :                                     /*UnfoldStore=*/false,
    1173        1416 :                                     &LoadRegIndex);
    1174         708 :   if (NewOpc == 0) return nullptr;
    1175        1308 :   const MCInstrDesc &MID = TII->get(NewOpc);
    1176         654 :   MachineFunction &MF = *MI->getParent()->getParent();
    1177         654 :   const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
    1178             :   // Ok, we're unfolding. Create a temporary register and do the unfold.
    1179         654 :   unsigned Reg = MRI->createVirtualRegister(RC);
    1180             : 
    1181         654 :   SmallVector<MachineInstr *, 2> NewMIs;
    1182         654 :   bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
    1183             :                                           /*UnfoldLoad=*/true,
    1184         654 :                                           /*UnfoldStore=*/false, NewMIs);
    1185             :   (void)Success;
    1186             :   assert(Success &&
    1187             :          "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
    1188             :          "succeeded!");
    1189             :   assert(NewMIs.size() == 2 &&
    1190             :          "Unfolded a load into multiple instructions!");
    1191         654 :   MachineBasicBlock *MBB = MI->getParent();
    1192         654 :   MachineBasicBlock::iterator Pos = MI;
    1193        1308 :   MBB->insert(Pos, NewMIs[0]);
    1194        1308 :   MBB->insert(Pos, NewMIs[1]);
    1195             :   // If unfolding produced a load that wasn't loop-invariant or profitable to
    1196             :   // hoist, discard the new instructions and bail.
    1197        1298 :   if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
    1198          13 :     NewMIs[0]->eraseFromParent();
    1199          13 :     NewMIs[1]->eraseFromParent();
    1200          13 :     return nullptr;
    1201             :   }
    1202             : 
    1203             :   // Update register pressure for the unfolded instruction.
    1204         641 :   UpdateRegPressure(NewMIs[1]);
    1205             : 
    1206             :   // Otherwise we successfully unfolded a load that we can hoist.
    1207         641 :   MI->eraseFromParent();
    1208         641 :   return NewMIs[0];
    1209             : }
    1210             : 
    1211             : /// Initialize the CSE map with instructions that are in the current loop
    1212             : /// preheader that may become duplicates of instructions that are hoisted
    1213             : /// out of the loop.
    1214        3053 : void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
    1215       72672 :   for (MachineInstr &MI : *BB)
    1216       90690 :     CSEMap[MI.getOpcode()].push_back(&MI);
    1217        3053 : }
    1218             : 
    1219             : /// Find an instruction amount PrevMIs that is a duplicate of MI.
    1220             : /// Return this instruction if it's found.
    1221             : const MachineInstr*
    1222       35816 : MachineLICM::LookForDuplicate(const MachineInstr *MI,
    1223             :                               std::vector<const MachineInstr*> &PrevMIs) {
    1224      518575 :   for (const MachineInstr *PrevMI : PrevMIs)
    1225      395907 :     if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
    1226             :       return PrevMI;
    1227             : 
    1228             :   return nullptr;
    1229             : }
    1230             : 
    1231             : /// Given a LICM'ed instruction, look for an instruction on the preheader that
    1232             : /// computes the same value. If it's found, do a RAU on with the definition of
    1233             : /// the existing instruction rather than hoisting the instruction to the
    1234             : /// preheader.
    1235       34424 : bool MachineLICM::EliminateCSE(MachineInstr *MI,
    1236             :           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
    1237             :   // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
    1238             :   // the undef property onto uses.
    1239      133340 :   if (CI == CSEMap.end() || MI->isImplicitDef())
    1240             :     return false;
    1241             : 
    1242       29359 :   if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
    1243             :     DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
    1244             : 
    1245             :     // Replace virtual registers defined by MI by their counterparts defined
    1246             :     // by Dup.
    1247       37114 :     SmallVector<unsigned, 2> Defs;
    1248       97118 :     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    1249      157122 :       const MachineOperand &MO = MI->getOperand(i);
    1250             : 
    1251             :       // Physical registers may not differ here.
    1252             :       assert((!MO.isReg() || MO.getReg() == 0 ||
    1253             :               !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
    1254             :               MO.getReg() == Dup->getOperand(i).getReg()) &&
    1255             :              "Instructions with different phys regs are not identical!");
    1256             : 
    1257      146107 :       if (MO.isReg() && MO.isDef() &&
    1258       38602 :           !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
    1259       18557 :         Defs.push_back(i);
    1260             :     }
    1261             : 
    1262       37114 :     SmallVector<const TargetRegisterClass*, 2> OrigRCs;
    1263       55671 :     for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
    1264       37114 :       unsigned Idx = Defs[i];
    1265       37114 :       unsigned Reg = MI->getOperand(Idx).getReg();
    1266       37114 :       unsigned DupReg = Dup->getOperand(Idx).getReg();
    1267       37114 :       OrigRCs.push_back(MRI->getRegClass(DupReg));
    1268             : 
    1269       37114 :       if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
    1270             :         // Restore old RCs if more than one defs.
    1271           0 :         for (unsigned j = 0; j != i; ++j)
    1272           0 :           MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
    1273             :         return false;
    1274             :       }
    1275             :     }
    1276             : 
    1277       74228 :     for (unsigned Idx : Defs) {
    1278       37114 :       unsigned Reg = MI->getOperand(Idx).getReg();
    1279       37114 :       unsigned DupReg = Dup->getOperand(Idx).getReg();
    1280       18557 :       MRI->replaceRegWith(Reg, DupReg);
    1281       18557 :       MRI->clearKillFlags(DupReg);
    1282             :     }
    1283             : 
    1284       18557 :     MI->eraseFromParent();
    1285       18557 :     ++NumCSEed;
    1286       18557 :     return true;
    1287             :   }
    1288             :   return false;
    1289             : }
    1290             : 
    1291             : /// Return true if the given instruction will be CSE'd if it's hoisted out of
    1292             : /// the loop.
    1293        6682 : bool MachineLICM::MayCSE(MachineInstr *MI) {
    1294       13364 :   unsigned Opcode = MI->getOpcode();
    1295             :   DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
    1296        6682 :     CI = CSEMap.find(Opcode);
    1297             :   // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
    1298             :   // the undef property onto uses.
    1299       26503 :   if (CI == CSEMap.end() || MI->isImplicitDef())
    1300             :     return false;
    1301             : 
    1302        6457 :   return LookForDuplicate(MI, CI->second) != nullptr;
    1303             : }
    1304             : 
    1305             : /// When an instruction is found to use only loop invariant operands
    1306             : /// that are safe to hoist, this instruction is called to do the dirty work.
    1307             : /// It returns true if the instruction is hoisted.
    1308      364327 : bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
    1309             :   // First check whether we should hoist this instruction.
    1310      364327 :   if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
    1311             :     // If not, try unfolding a hoistable load.
    1312      330544 :     MI = ExtractHoistableLoad(MI);
    1313      330544 :     if (!MI) return false;
    1314             :   }
    1315             : 
    1316             :   // Now move the instructions to the predecessor, inserting it before any
    1317             :   // terminator instructions.
    1318             :   DEBUG({
    1319             :       dbgs() << "Hoisting " << *MI;
    1320             :       if (MI->getParent()->getBasicBlock())
    1321             :         dbgs() << " from BB#" << MI->getParent()->getNumber();
    1322             :       if (Preheader->getBasicBlock())
    1323             :         dbgs() << " to BB#" << Preheader->getNumber();
    1324             :       dbgs() << "\n";
    1325             :     });
    1326             : 
    1327             :   // If this is the first instruction being hoisted to the preheader,
    1328             :   // initialize the CSE map with potential common expressions.
    1329       34424 :   if (FirstInLoop) {
    1330        3053 :     InitCSEMap(Preheader);
    1331        3053 :     FirstInLoop = false;
    1332             :   }
    1333             : 
    1334             :   // Look for opportunity to CSE the hoisted instruction.
    1335       68848 :   unsigned Opcode = MI->getOpcode();
    1336             :   DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
    1337       34424 :     CI = CSEMap.find(Opcode);
    1338       34424 :   if (!EliminateCSE(MI, CI)) {
    1339             :     // Otherwise, splice the instruction to the preheader.
    1340       15867 :     Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
    1341             : 
    1342             :     // Since we are moving the instruction out of its basic block, we do not
    1343             :     // retain its debug location. Doing so would degrade the debugging
    1344             :     // experience and adversely affect the accuracy of profiling information.
    1345       63468 :     MI->setDebugLoc(DebugLoc());
    1346             : 
    1347             :     // Update register pressure for BBs from header to this block.
    1348       15867 :     UpdateBackTraceRegPressure(MI);
    1349             : 
    1350             :     // Clear the kill flags of any register this instruction defines,
    1351             :     // since they may need to be live throughout the entire loop
    1352             :     // rather than just live for part of it.
    1353       86846 :     for (MachineOperand &MO : MI->operands())
    1354      131616 :       if (MO.isReg() && MO.isDef() && !MO.isDead())
    1355       15870 :         MRI->clearKillFlags(MO.getReg());
    1356             : 
    1357             :     // Add to the CSE map.
    1358       47601 :     if (CI != CSEMap.end())
    1359       23022 :       CI->second.push_back(MI);
    1360             :     else
    1361       13068 :       CSEMap[Opcode].push_back(MI);
    1362             :   }
    1363             : 
    1364       34424 :   ++NumHoisted;
    1365       34424 :   Changed = true;
    1366             : 
    1367       34424 :   return true;
    1368             : }
    1369             : 
    1370             : /// Get the preheader for the current loop, splitting a critical edge if needed.
    1371       12106 : MachineBasicBlock *MachineLICM::getCurPreheader() {
    1372             :   // Determine the block to which to hoist instructions. If we can't find a
    1373             :   // suitable loop predecessor, we can't do any hoisting.
    1374             : 
    1375             :   // If we've tried to get a preheader and failed, don't try again.
    1376       12106 :   if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
    1377             :     return nullptr;
    1378             : 
    1379       12106 :   if (!CurPreheader) {
    1380       12045 :     CurPreheader = CurLoop->getLoopPreheader();
    1381       12045 :     if (!CurPreheader) {
    1382         270 :       MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
    1383         270 :       if (!Pred) {
    1384          39 :         CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
    1385          39 :         return nullptr;
    1386             :       }
    1387             : 
    1388         462 :       CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
    1389         231 :       if (!CurPreheader) {
    1390          36 :         CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
    1391          36 :         return nullptr;
    1392             :       }
    1393             :     }
    1394             :   }
    1395       12031 :   return CurPreheader;
    1396      216918 : }

Generated by: LCOV version 1.13