LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineCSE.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 288 293 98.3 %
Date: 2017-09-14 15:23:50 Functions: 19 19 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This pass performs global common subexpression elimination on machine
      11             : // instructions using a scoped hash table based value numbering scheme. It
      12             : // must be run while the machine function is still in SSA form.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "llvm/ADT/DenseMap.h"
      17             : #include "llvm/ADT/ScopedHashTable.h"
      18             : #include "llvm/ADT/SmallPtrSet.h"
      19             : #include "llvm/ADT/SmallSet.h"
      20             : #include "llvm/ADT/SmallVector.h"
      21             : #include "llvm/ADT/Statistic.h"
      22             : #include "llvm/Analysis/AliasAnalysis.h"
      23             : #include "llvm/CodeGen/MachineBasicBlock.h"
      24             : #include "llvm/CodeGen/MachineDominators.h"
      25             : #include "llvm/CodeGen/MachineFunction.h"
      26             : #include "llvm/CodeGen/MachineFunctionPass.h"
      27             : #include "llvm/CodeGen/MachineInstr.h"
      28             : #include "llvm/CodeGen/MachineOperand.h"
      29             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      30             : #include "llvm/CodeGen/Passes.h"
      31             : #include "llvm/MC/MCInstrDesc.h"
      32             : #include "llvm/MC/MCRegisterInfo.h"
      33             : #include "llvm/Pass.h"
      34             : #include "llvm/Support/Allocator.h"
      35             : #include "llvm/Support/Debug.h"
      36             : #include "llvm/Support/RecyclingAllocator.h"
      37             : #include "llvm/Support/raw_ostream.h"
      38             : #include "llvm/Target/TargetInstrInfo.h"
      39             : #include "llvm/Target/TargetOpcodes.h"
      40             : #include "llvm/Target/TargetRegisterInfo.h"
      41             : #include "llvm/Target/TargetSubtargetInfo.h"
      42             : #include <cassert>
      43             : #include <iterator>
      44             : #include <utility>
      45             : #include <vector>
      46             : 
      47             : using namespace llvm;
      48             : 
      49             : #define DEBUG_TYPE "machine-cse"
      50             : 
      51             : STATISTIC(NumCoalesces, "Number of copies coalesced");
      52             : STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
      53             : STATISTIC(NumPhysCSEs,
      54             :           "Number of physreg referencing common subexpr eliminated");
      55             : STATISTIC(NumCrossBBCSEs,
      56             :           "Number of cross-MBB physreg referencing CS eliminated");
      57             : STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
      58             : 
      59             : namespace {
      60             : 
      61       67704 :   class MachineCSE : public MachineFunctionPass {
      62             :     const TargetInstrInfo *TII;
      63             :     const TargetRegisterInfo *TRI;
      64             :     AliasAnalysis *AA;
      65             :     MachineDominatorTree *DT;
      66             :     MachineRegisterInfo *MRI;
      67             : 
      68             :   public:
      69             :     static char ID; // Pass identification
      70             : 
      71       68128 :     MachineCSE() : MachineFunctionPass(ID) {
      72       17032 :       initializeMachineCSEPass(*PassRegistry::getPassRegistry());
      73       17032 :     }
      74             : 
      75             :     bool runOnMachineFunction(MachineFunction &MF) override;
      76             : 
      77       16980 :     void getAnalysisUsage(AnalysisUsage &AU) const override {
      78       16980 :       AU.setPreservesCFG();
      79       16980 :       MachineFunctionPass::getAnalysisUsage(AU);
      80       16980 :       AU.addRequired<AAResultsWrapperPass>();
      81       33960 :       AU.addPreservedID(MachineLoopInfoID);
      82       16980 :       AU.addRequired<MachineDominatorTree>();
      83       16980 :       AU.addPreserved<MachineDominatorTree>();
      84       16980 :     }
      85             : 
      86      150327 :     void releaseMemory() override {
      87      150327 :       ScopeMap.clear();
      88      300654 :       Exps.clear();
      89      150327 :     }
      90             : 
      91             :   private:
      92             :     using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
      93             :                             ScopedHashTableVal<MachineInstr *, unsigned>>;
      94             :     using ScopedHTType =
      95             :         ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
      96             :                         AllocatorTy>;
      97             :     using ScopeType = ScopedHTType::ScopeTy;
      98             : 
      99             :     unsigned LookAheadLimit = 0;
     100             :     DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
     101             :     ScopedHTType VNT;
     102             :     SmallVector<MachineInstr *, 64> Exps;
     103             :     unsigned CurrVN = 0;
     104             : 
     105             :     bool PerformTrivialCopyPropagation(MachineInstr *MI,
     106             :                                        MachineBasicBlock *MBB);
     107             :     bool isPhysDefTriviallyDead(unsigned Reg,
     108             :                                 MachineBasicBlock::const_iterator I,
     109             :                                 MachineBasicBlock::const_iterator E) const;
     110             :     bool hasLivePhysRegDefUses(const MachineInstr *MI,
     111             :                                const MachineBasicBlock *MBB,
     112             :                                SmallSet<unsigned,8> &PhysRefs,
     113             :                                SmallVectorImpl<unsigned> &PhysDefs,
     114             :                                bool &PhysUseDef) const;
     115             :     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
     116             :                           SmallSet<unsigned,8> &PhysRefs,
     117             :                           SmallVectorImpl<unsigned> &PhysDefs,
     118             :                           bool &NonLocal) const;
     119             :     bool isCSECandidate(MachineInstr *MI);
     120             :     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
     121             :                            MachineInstr *CSMI, MachineInstr *MI);
     122             :     void EnterScope(MachineBasicBlock *MBB);
     123             :     void ExitScope(MachineBasicBlock *MBB);
     124             :     bool ProcessBlock(MachineBasicBlock *MBB);
     125             :     void ExitScopeIfDone(MachineDomTreeNode *Node,
     126             :                          DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
     127             :     bool PerformCSE(MachineDomTreeNode *Node);
     128             :   };
     129             : 
     130             : } // end anonymous namespace
     131             : 
     132             : char MachineCSE::ID = 0;
     133             : 
     134             : char &llvm::MachineCSEID = MachineCSE::ID;
     135             : 
     136       20212 : INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
     137             :                       "Machine Common Subexpression Elimination", false, false)
     138       20212 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     139       20212 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     140      202295 : INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
     141             :                     "Machine Common Subexpression Elimination", false, false)
     142             : 
     143             : /// The source register of a COPY machine instruction can be propagated to all
     144             : /// its users, and this propagation could increase the probability of finding
     145             : /// common subexpressions. If the COPY has only one user, the COPY itself can
     146             : /// be removed.
     147     1217127 : bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
     148             :                                                MachineBasicBlock *MBB) {
     149     1217127 :   bool Changed = false;
     150     6955422 :   for (MachineOperand &MO : MI->operands()) {
     151     9398586 :     if (!MO.isReg() || !MO.isUse())
     152             :       continue;
     153     2250223 :     unsigned Reg = MO.getReg();
     154     2250223 :     if (!TargetRegisterInfo::isVirtualRegister(Reg))
     155             :       continue;
     156     1445081 :     bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
     157     1445081 :     MachineInstr *DefMI = MRI->getVRegDef(Reg);
     158     1445081 :     if (!DefMI->isCopy())
     159             :       continue;
     160      473265 :     unsigned SrcReg = DefMI->getOperand(1).getReg();
     161      473265 :     if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
     162             :       continue;
     163      536826 :     if (DefMI->getOperand(0).getSubReg())
     164             :       continue;
     165             :     // FIXME: We should trivially coalesce subregister copies to expose CSE
     166             :     // opportunities on instructions with truncated operands (see
     167             :     // cse-add-with-overflow.ll). This can be done here as follows:
     168             :     // if (SrcSubReg)
     169             :     //  RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
     170             :     //                                     SrcSubReg);
     171             :     // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
     172             :     //
     173             :     // The 2-addr pass has been updated to handle coalesced subregs. However,
     174             :     // some machine-specific code still can't handle it.
     175             :     // To handle it properly we also need a way find a constrained subregister
     176             :     // class given a super-reg class and subreg index.
     177      536802 :     if (DefMI->getOperand(1).getSubReg())
     178             :       continue;
     179      237118 :     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
     180      118559 :     if (!MRI->constrainRegClass(SrcReg, RC))
     181             :       continue;
     182             :     DEBUG(dbgs() << "Coalescing: " << *DefMI);
     183             :     DEBUG(dbgs() << "***     to: " << *MI);
     184             :     // Propagate SrcReg of copies to MI.
     185       66629 :     MO.setReg(SrcReg);
     186       66629 :     MRI->clearKillFlags(SrcReg);
     187             :     // Coalesce single use copies.
     188       66629 :     if (OnlyOneUse) {
     189       55061 :       DefMI->eraseFromParent();
     190             :       ++NumCoalesces;
     191             :     }
     192             :     Changed = true;
     193             :   }
     194             : 
     195     1217127 :   return Changed;
     196             : }
     197             : 
     198             : bool
     199       27891 : MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
     200             :                                    MachineBasicBlock::const_iterator I,
     201             :                                    MachineBasicBlock::const_iterator E) const {
     202       27891 :   unsigned LookAheadLeft = LookAheadLimit;
     203       46020 :   while (LookAheadLeft) {
     204             :     // Skip over dbg_value's.
     205       45994 :     I = skipDebugInstructionsForward(I, E);
     206             : 
     207       45994 :     if (I == E)
     208             :       // Reached end of block, we don't know if register is dead or not.
     209             :       return false;
     210             : 
     211       45933 :     bool SeenDef = false;
     212      212086 :     for (const MachineOperand &MO : I->operands()) {
     213      147314 :       if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
     214             :         SeenDef = true;
     215      206952 :       if (!MO.isReg() || !MO.getReg())
     216             :         continue;
     217      146873 :       if (!TRI->regsOverlap(MO.getReg(), Reg))
     218             :         continue;
     219       27979 :       if (MO.isUse())
     220             :         // Found a use!
     221             :         return false;
     222             :       SeenDef = true;
     223             :     }
     224       18964 :     if (SeenDef)
     225             :       // See a def of Reg (or an alias) before encountering any use, it's
     226             :       // trivially dead.
     227             :       return true;
     228             : 
     229       18129 :     --LookAheadLeft;
     230             :     ++I;
     231             :   }
     232             :   return false;
     233             : }
     234             : 
     235             : /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
     236             : /// physical registers (except for dead defs of physical registers). It also
     237             : /// returns the physical register def by reference if it's the only one and the
     238             : /// instruction does not uses a physical register.
     239      446589 : bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
     240             :                                        const MachineBasicBlock *MBB,
     241             :                                        SmallSet<unsigned,8> &PhysRefs,
     242             :                                        SmallVectorImpl<unsigned> &PhysDefs,
     243             :                                        bool &PhysUseDef) const{
     244             :   // First, add all uses to PhysRefs.
     245     2712502 :   for (const MachineOperand &MO : MI->operands()) {
     246     3578859 :     if (!MO.isReg() || MO.isDef())
     247     1710937 :       continue;
     248      554976 :     unsigned Reg = MO.getReg();
     249      554976 :     if (!Reg)
     250      154250 :       continue;
     251      400726 :     if (TargetRegisterInfo::isVirtualRegister(Reg))
     252       56981 :       continue;
     253             :     // Reading constant physregs is ok.
     254      343745 :     if (!MRI->isConstantPhysReg(Reg))
     255     3065612 :       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
     256     1216930 :         PhysRefs.insert(*AI);
     257             :   }
     258             : 
     259             :   // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
     260             :   // (which currently contains only uses), set the PhysUseDef flag.
     261      446589 :   PhysUseDef = false;
     262      893178 :   MachineBasicBlock::const_iterator I = MI; I = std::next(I);
     263     2712502 :   for (const MachineOperand &MO : MI->operands()) {
     264     5086802 :     if (!MO.isReg() || !MO.isDef())
     265     3143164 :       continue;
     266      757970 :     unsigned Reg = MO.getReg();
     267      757970 :     if (!Reg)
     268           0 :       continue;
     269     1515940 :     if (TargetRegisterInfo::isVirtualRegister(Reg))
     270      127278 :       continue;
     271             :     // Check against PhysRefs even if the def is "dead".
     272      630692 :     if (PhysRefs.count(Reg))
     273      300797 :       PhysUseDef = true;
     274             :     // If the def is dead, it's ok. But the def may not marked "dead". That's
     275             :     // common since this pass is run before livevariables. We can scan
     276             :     // forward a few instructions and check if it is obviously dead.
     277      658583 :     if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
     278       27056 :       PhysDefs.push_back(Reg);
     279             :   }
     280             : 
     281             :   // Finally, add all defs to PhysRefs as well.
     282      920234 :   for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
     283      146918 :     for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
     284       32875 :       PhysRefs.insert(*AI);
     285             : 
     286      446589 :   return !PhysRefs.empty();
     287             : }
     288             : 
     289       40974 : bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
     290             :                                   SmallSet<unsigned,8> &PhysRefs,
     291             :                                   SmallVectorImpl<unsigned> &PhysDefs,
     292             :                                   bool &NonLocal) const {
     293             :   // For now conservatively returns false if the common subexpression is
     294             :   // not in the same basic block as the given instruction. The only exception
     295             :   // is if the common subexpression is in the sole predecessor block.
     296       40974 :   const MachineBasicBlock *MBB = MI->getParent();
     297       40974 :   const MachineBasicBlock *CSMBB = CSMI->getParent();
     298             : 
     299       40974 :   bool CrossMBB = false;
     300       40974 :   if (CSMBB != MBB) {
     301       21851 :     if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
     302             :       return false;
     303             : 
     304        3131 :     for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
     305         994 :       if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
     306             :         // Avoid extending live range of physical registers if they are
     307             :         //allocatable or reserved.
     308             :         return false;
     309             :     }
     310             :     CrossMBB = true;
     311             :   }
     312       56216 :   MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
     313       28108 :   MachineBasicBlock::const_iterator E = MI;
     314       28108 :   MachineBasicBlock::const_iterator EE = CSMBB->end();
     315       28108 :   unsigned LookAheadLeft = LookAheadLimit;
     316      694590 :   while (LookAheadLeft) {
     317             :     // Skip over dbg_value's.
     318     2756251 :     while (I != E && I != EE && I->isDebugValue())
     319             :       ++I;
     320             : 
     321      694186 :     if (I == EE) {
     322             :       assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
     323             :       (void)CrossMBB;
     324         220 :       CrossMBB = false;
     325         220 :       NonLocal = true;
     326         220 :       I = MBB->begin();
     327         220 :       EE = MBB->end();
     328             :       continue;
     329             :     }
     330             : 
     331      693746 :     if (I == E)
     332             :       return true;
     333             : 
     334     3964821 :     for (const MachineOperand &MO : I->operands()) {
     335             :       // RegMasks go on instructions like calls that clobber lots of physregs.
     336             :       // Don't attempt to CSE across such an instruction.
     337     2611264 :       if (MO.isRegMask())
     338       21033 :         return false;
     339     6528439 :       if (!MO.isReg() || !MO.isDef())
     340     2583685 :         continue;
     341      647288 :       unsigned MOReg = MO.getReg();
     342     1914311 :       if (TargetRegisterInfo::isVirtualRegister(MOReg))
     343             :         continue;
     344       27553 :       if (PhysRefs.count(MOReg))
     345             :         return false;
     346             :     }
     347             : 
     348      666262 :     --LookAheadLeft;
     349             :     ++I;
     350             :   }
     351             : 
     352             :   return false;
     353             : }
     354             : 
     355     4342544 : bool MachineCSE::isCSECandidate(MachineInstr *MI) {
     356    21129257 :   if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
     357     8391606 :       MI->isInlineAsm() || MI->isDebugValue())
     358             :     return false;
     359             : 
     360             :   // Ignore copies.
     361     2984878 :   if (MI->isCopyLike())
     362             :     return false;
     363             : 
     364             :   // Ignore stuff that we obviously can't move.
     365     9448722 :   if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
     366     1899909 :       MI->hasUnmodeledSideEffects())
     367             :     return false;
     368             : 
     369     1870947 :   if (MI->mayLoad()) {
     370             :     // Okay, this instruction does a load. As a refinement, we allow the target
     371             :     // to decide whether the loaded value is actually a constant. If so, we can
     372             :     // actually use it as a load.
     373      320398 :     if (!MI->isDereferenceableInvariantLoad(AA))
     374             :       // FIXME: we should be able to hoist loads with no other side effects if
     375             :       // there are no other instructions which can change memory in this loop.
     376             :       // This is a trivial form of alias analysis.
     377             :       return false;
     378             :   }
     379             : 
     380             :   // Ignore stack guard loads, otherwise the register that holds CSEed value may
     381             :   // be spilled and get loaded back with corrupted data.
     382     3326734 :   if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
     383             :     return false;
     384             : 
     385             :   return true;
     386             : }
     387             : 
     388             : /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
     389             : /// common expression that defines Reg.
     390      105190 : bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
     391             :                                    MachineInstr *CSMI, MachineInstr *MI) {
     392             :   // FIXME: Heuristics that works around the lack the live range splitting.
     393             : 
     394             :   // If CSReg is used at all uses of Reg, CSE should not increase register
     395             :   // pressure of CSReg.
     396      105190 :   bool MayIncreasePressure = true;
     397      210380 :   if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
     398      105190 :       TargetRegisterInfo::isVirtualRegister(Reg)) {
     399      105190 :     MayIncreasePressure = false;
     400      210380 :     SmallPtrSet<MachineInstr*, 8> CSUses;
     401     4678386 :     for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
     402     1454272 :       CSUses.insert(&MI);
     403             :     }
     404      421145 :     for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
     405      105087 :       if (!CSUses.count(&MI)) {
     406             :         MayIncreasePressure = true;
     407             :         break;
     408             :       }
     409             :     }
     410             :   }
     411      105190 :   if (!MayIncreasePressure) return true;
     412             : 
     413             :   // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
     414             :   // an immediate predecessor. We don't want to increase register pressure and
     415             :   // end up causing other computation to be spilled.
     416      104843 :   if (TII->isAsCheapAsAMove(*MI)) {
     417       27023 :     MachineBasicBlock *CSBB = CSMI->getParent();
     418       27023 :     MachineBasicBlock *BB = MI->getParent();
     419       27023 :     if (CSBB != BB && !CSBB->isSuccessor(BB))
     420             :       return false;
     421             :   }
     422             : 
     423             :   // Heuristics #2: If the expression doesn't not use a vr and the only use
     424             :   // of the redundant computation are copies, do not cse.
     425       87212 :   bool HasVRegUse = false;
     426      456200 :   for (const MachineOperand &MO : MI->operands()) {
     427      799639 :     if (MO.isReg() && MO.isUse() &&
     428      320322 :         TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
     429             :       HasVRegUse = true;
     430             :       break;
     431             :     }
     432             :   }
     433       87212 :   if (!HasVRegUse) {
     434       65158 :     bool HasNonCopyUse = false;
     435      347580 :     for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
     436             :       // Ignore copies.
     437             :       if (!MI.isCopyLike()) {
     438             :         HasNonCopyUse = true;
     439             :         break;
     440             :       }
     441             :     }
     442       65158 :     if (!HasNonCopyUse)
     443             :       return false;
     444             :   }
     445             : 
     446             :   // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
     447             :   // it unless the defined value is already used in the BB of the new use.
     448       49150 :   bool HasPHI = false;
     449       49150 :   SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
     450     3917478 :   for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
     451     1256676 :     HasPHI |= MI.isPHI();
     452     1256676 :     CSBBs.insert(MI.getParent());
     453             :   }
     454             : 
     455       49150 :   if (!HasPHI)
     456             :     return true;
     457         436 :   return CSBBs.count(MI->getParent());
     458             : }
     459             : 
     460      295468 : void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
     461             :   DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
     462      590936 :   ScopeType *Scope = new ScopeType(VNT);
     463      590936 :   ScopeMap[MBB] = Scope;
     464      295468 : }
     465             : 
     466      295468 : void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
     467             :   DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
     468      295468 :   DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
     469             :   assert(SI != ScopeMap.end());
     470      295468 :   delete SI->second;
     471      590936 :   ScopeMap.erase(SI);
     472      295468 : }
     473             : 
     474      295468 : bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     475      295468 :   bool Changed = false;
     476             : 
     477      590936 :   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
     478      590936 :   SmallVector<unsigned, 2> ImplicitDefsToUpdate;
     479      590936 :   SmallVector<unsigned, 2> ImplicitDefs;
     480     5228948 :   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
     481     4342544 :     MachineInstr *MI = &*I;
     482     4342544 :     ++I;
     483             : 
     484     4342544 :     if (!isCSECandidate(MI))
     485     6910510 :       continue;
     486             : 
     487     3326210 :     bool FoundCSE = VNT.count(MI);
     488             :     if (!FoundCSE) {
     489             :       // Using trivial copy propagation to find more CSE opportunities.
     490     1217127 :       if (PerformTrivialCopyPropagation(MI, MBB)) {
     491       47289 :         Changed = true;
     492             : 
     493             :         // After coalescing MI itself may become a copy.
     494       94578 :         if (MI->isCopyLike())
     495           0 :           continue;
     496             : 
     497             :         // Try again to see if CSE is possible.
     498       94578 :         FoundCSE = VNT.count(MI);
     499             :       }
     500             :     }
     501             : 
     502             :     // Commute commutable instructions.
     503     1663105 :     bool Commuted = false;
     504     2879623 :     if (!FoundCSE && MI->isCommutable()) {
     505      186540 :       if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
     506      153814 :         Commuted = true;
     507      307628 :         FoundCSE = VNT.count(NewMI);
     508      153814 :         if (NewMI != MI) {
     509             :           // New instruction. It doesn't need to be kept.
     510           0 :           NewMI->eraseFromParent();
     511           0 :           Changed = true;
     512      153814 :         } else if (!FoundCSE)
     513             :           // MI was changed but it didn't help, commute it back!
     514      153812 :           (void)TII->commuteInstruction(*MI);
     515             :       }
     516             :     }
     517             : 
     518             :     // If the instruction defines physical registers and the values *may* be
     519             :     // used, then it's not safe to replace it with a common subexpression.
     520             :     // It's also not safe if the instruction uses physical registers.
     521     1663105 :     bool CrossMBBPhysDef = false;
     522     1774578 :     SmallSet<unsigned, 8> PhysRefs;
     523     1774578 :     SmallVector<unsigned, 2> PhysDefs;
     524     1663105 :     bool PhysUseDef = false;
     525     1663105 :     if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
     526             :                                           PhysDefs, PhysUseDef)) {
     527      341567 :       FoundCSE = false;
     528             : 
     529             :       // ... Unless the CS is local or is in the sole predecessor block
     530             :       // and it also defines the physical register which is not clobbered
     531             :       // in between and the physical register uses were not clobbered.
     532             :       // This can never be the case if the instruction both uses and
     533             :       // defines the same physical register, which was detected above.
     534      341567 :       if (!PhysUseDef) {
     535       40974 :         unsigned CSVN = VNT.lookup(MI);
     536       81948 :         MachineInstr *CSMI = Exps[CSVN];
     537       40974 :         if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
     538             :           FoundCSE = true;
     539             :       }
     540             :     }
     541             : 
     542     2873170 :     if (!FoundCSE) {
     543     3103264 :       VNT.insert(MI, CurrVN++);
     544     1551632 :       Exps.push_back(MI);
     545     1551632 :       continue;
     546             :     }
     547             : 
     548             :     // Found a common subexpression, eliminate it.
     549      111473 :     unsigned CSVN = VNT.lookup(MI);
     550      222946 :     MachineInstr *CSMI = Exps[CSVN];
     551             :     DEBUG(dbgs() << "Examining: " << *MI);
     552             :     DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
     553             : 
     554             :     // Check if it's profitable to perform this CSE.
     555      111473 :     bool DoCSE = true;
     556      111473 :     unsigned NumDefs = MI->getDesc().getNumDefs() +
     557      222946 :                        MI->getDesc().getNumImplicitDefs();
     558             : 
     559      168900 :     for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
     560      226984 :       MachineOperand &MO = MI->getOperand(i);
     561      227895 :       if (!MO.isReg() || !MO.isDef())
     562        9700 :         continue;
     563      112094 :       unsigned OldReg = MO.getReg();
     564      224188 :       unsigned NewReg = CSMI->getOperand(i).getReg();
     565             : 
     566             :       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
     567             :       // we should make sure it is not dead at CSMI.
     568      113137 :       if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
     569          24 :         ImplicitDefsToUpdate.push_back(i);
     570             : 
     571             :       // Keep track of implicit defs of CSMI and MI, to clear possibly
     572             :       // made-redundant kill flags.
     573      112855 :       if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
     574         282 :         ImplicitDefs.push_back(OldReg);
     575             : 
     576      118998 :       if (OldReg == NewReg) {
     577        6904 :         --NumDefs;
     578        6904 :         continue;
     579             :       }
     580             : 
     581             :       assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
     582             :              TargetRegisterInfo::isVirtualRegister(NewReg) &&
     583             :              "Do not CSE physical register defs!");
     584             : 
     585      105190 :       if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
     586             :         DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
     587             :         DoCSE = false;
     588       56065 :         break;
     589             :       }
     590             : 
     591             :       // Don't perform CSE if the result of the old instruction cannot exist
     592             :       // within the register class of the new instruction.
     593       98256 :       const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
     594       49128 :       if (!MRI->constrainRegClass(NewReg, OldRC)) {
     595             :         DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
     596             :         DoCSE = false;
     597             :         break;
     598             :       }
     599             : 
     600       98250 :       CSEPairs.push_back(std::make_pair(OldReg, NewReg));
     601       49125 :       --NumDefs;
     602             :     }
     603             : 
     604             :     // Actually perform the elimination.
     605             :     if (DoCSE) {
     606      215349 :       for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
     607       49125 :         unsigned OldReg = CSEPair.first;
     608       49125 :         unsigned NewReg = CSEPair.second;
     609             :         // OldReg may have been unused but is used now, clear the Dead flag
     610       49125 :         MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
     611             :         assert(Def != nullptr && "CSEd register has no unique definition?");
     612       49125 :         Def->clearRegisterDeads(NewReg);
     613             :         // Replace with NewReg and clear kill flags which may be wrong now.
     614       49125 :         MRI->replaceRegWith(OldReg, NewReg);
     615       49125 :         MRI->clearKillFlags(NewReg);
     616             :       }
     617             : 
     618             :       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
     619             :       // we should make sure it is not dead at CSMI.
     620      166248 :       for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
     621          72 :         CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
     622             : 
     623             :       // Go through implicit defs of CSMI and MI, and clear the kill flags on
     624             :       // their uses in all the instructions between CSMI and MI.
     625             :       // We might have made some of the kill flags redundant, consider:
     626             :       //   subs  ... %NZCV<imp-def>        <- CSMI
     627             :       //   csinc ... %NZCV<imp-use,kill>   <- this kill flag isn't valid anymore
     628             :       //   subs  ... %NZCV<imp-def>        <- MI, to be eliminated
     629             :       //   csinc ... %NZCV<imp-use,kill>
     630             :       // Since we eliminated MI, and reused a register imp-def'd by CSMI
     631             :       // (here %NZCV), that register, if it was killed before MI, should have
     632             :       // that kill flag removed, because it's lifetime was extended.
     633       55408 :       if (CSMI->getParent() == MI->getParent()) {
     634      909644 :         for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
     635     2683225 :           for (auto ImplicitDef : ImplicitDefs)
     636         319 :             if (MachineOperand *MO = II->findRegisterUseOperand(
     637           0 :                     ImplicitDef, /*isKill=*/true, TRI))
     638             :               MO->setIsKill(false);
     639             :       } else {
     640             :         // If the instructions aren't in the same BB, bail out and clear the
     641             :         // kill flag on all uses of the imp-def'd register.
     642      143365 :         for (auto ImplicitDef : ImplicitDefs)
     643         154 :           MRI->clearKillFlags(ImplicitDef);
     644             :       }
     645             : 
     646       55408 :       if (CrossMBBPhysDef) {
     647             :         // Add physical register defs now coming in from a predecessor to MBB
     648             :         // livein list.
     649         315 :         while (!PhysDefs.empty()) {
     650         154 :           unsigned LiveIn = PhysDefs.pop_back_val();
     651         154 :           if (!MBB->isLiveIn(LiveIn))
     652         154 :             MBB->addLiveIn(LiveIn);
     653             :         }
     654             :         ++NumCrossBBCSEs;
     655             :       }
     656             : 
     657       55408 :       MI->eraseFromParent();
     658       55408 :       ++NumCSEs;
     659       55408 :       if (!PhysRefs.empty())
     660             :         ++NumPhysCSEs;
     661             :       if (Commuted)
     662             :         ++NumCommutes;
     663       55408 :       Changed = true;
     664             :     } else {
     665      112130 :       VNT.insert(MI, CurrVN++);
     666       56065 :       Exps.push_back(MI);
     667             :     }
     668      111473 :     CSEPairs.clear();
     669      111473 :     ImplicitDefsToUpdate.clear();
     670      111473 :     ImplicitDefs.clear();
     671             :   }
     672             : 
     673      590936 :   return Changed;
     674             : }
     675             : 
     676             : /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
     677             : /// dominator tree node if its a leaf or all of its children are done. Walk
     678             : /// up the dominator tree to destroy ancestors which are now done.
     679             : void
     680      295468 : MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
     681             :                         DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
     682      295468 :   if (OpenChildren[Node])
     683             :     return;
     684             : 
     685             :   // Pop scope.
     686      221225 :   ExitScope(Node->getBlock());
     687             : 
     688             :   // Now traverse upwards to pop ancestors whose offsprings are all done.
     689      295468 :   while (MachineDomTreeNode *Parent = Node->getIDom()) {
     690      145223 :     unsigned Left = --OpenChildren[Parent];
     691      145223 :     if (Left != 0)
     692             :       break;
     693       74243 :     ExitScope(Parent->getBlock());
     694       74243 :     Node = Parent;
     695       74243 :   }
     696             : }
     697             : 
     698      150245 : bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
     699      300490 :   SmallVector<MachineDomTreeNode*, 32> Scopes;
     700      300490 :   SmallVector<MachineDomTreeNode*, 8> WorkList;
     701      300490 :   DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
     702             : 
     703      150245 :   CurrVN = 0;
     704             : 
     705             :   // Perform a DFS walk to determine the order of visit.
     706      150245 :   WorkList.push_back(Node);
     707             :   do {
     708      295468 :     Node = WorkList.pop_back_val();
     709      295468 :     Scopes.push_back(Node);
     710      590936 :     const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
     711      590936 :     OpenChildren[Node] = Children.size();
     712     1327095 :     for (MachineDomTreeNode *Child : Children)
     713      145223 :       WorkList.push_back(Child);
     714      295468 :   } while (!WorkList.empty());
     715             : 
     716             :   // Now perform CSE.
     717      150245 :   bool Changed = false;
     718      746203 :   for (MachineDomTreeNode *Node : Scopes) {
     719      295468 :     MachineBasicBlock *MBB = Node->getBlock();
     720      295468 :     EnterScope(MBB);
     721      295468 :     Changed |= ProcessBlock(MBB);
     722             :     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
     723      295468 :     ExitScopeIfDone(Node, OpenChildren);
     724             :   }
     725             : 
     726      300490 :   return Changed;
     727             : }
     728             : 
     729      150303 : bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
     730      150303 :   if (skipFunction(*MF.getFunction()))
     731             :     return false;
     732             : 
     733      150245 :   TII = MF.getSubtarget().getInstrInfo();
     734      150245 :   TRI = MF.getSubtarget().getRegisterInfo();
     735      150245 :   MRI = &MF.getRegInfo();
     736      300490 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     737      150245 :   DT = &getAnalysis<MachineDominatorTree>();
     738      150245 :   LookAheadLimit = TII->getMachineCSELookAheadLimit();
     739      300490 :   return PerformCSE(DT->getRootNode());
     740             : }

Generated by: LCOV version 1.13