LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineCombiner.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 192 193 99.5 %
Date: 2017-09-14 15:23:50 Functions: 18 18 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // The machine combiner pass uses machine trace metrics to ensure the combined
      11             : // instructions do not lengthen the critical path or the resource depth.
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "llvm/ADT/DenseMap.h"
      15             : #include "llvm/ADT/Statistic.h"
      16             : #include "llvm/CodeGen/MachineDominators.h"
      17             : #include "llvm/CodeGen/MachineFunction.h"
      18             : #include "llvm/CodeGen/MachineFunctionPass.h"
      19             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      20             : #include "llvm/CodeGen/MachineLoopInfo.h"
      21             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      22             : #include "llvm/CodeGen/MachineTraceMetrics.h"
      23             : #include "llvm/CodeGen/Passes.h"
      24             : #include "llvm/CodeGen/TargetSchedule.h"
      25             : #include "llvm/Support/CommandLine.h"
      26             : #include "llvm/Support/Debug.h"
      27             : #include "llvm/Support/raw_ostream.h"
      28             : #include "llvm/Target/TargetInstrInfo.h"
      29             : #include "llvm/Target/TargetRegisterInfo.h"
      30             : #include "llvm/Target/TargetSubtargetInfo.h"
      31             : 
      32             : using namespace llvm;
      33             : 
      34             : #define DEBUG_TYPE "machine-combiner"
      35             : 
      36             : STATISTIC(NumInstCombined, "Number of machineinst combined");
      37             : 
      38             : static cl::opt<unsigned>
      39       72306 : inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
      40      216918 :               cl::desc("Incremental depth computation will be used for basic "
      41      289224 :                        "blocks with more instructions."), cl::init(500));
      42             : 
      43             : namespace {
      44       24789 : class MachineCombiner : public MachineFunctionPass {
      45             :   const TargetInstrInfo *TII;
      46             :   const TargetRegisterInfo *TRI;
      47             :   MCSchedModel SchedModel;
      48             :   MachineRegisterInfo *MRI;
      49             :   MachineLoopInfo *MLI; // Current MachineLoopInfo
      50             :   MachineTraceMetrics *Traces;
      51             :   MachineTraceMetrics::Ensemble *MinInstr;
      52             : 
      53             :   TargetSchedModel TSchedModel;
      54             : 
      55             :   /// True if optimizing for code size.
      56             :   bool OptSize;
      57             : 
      58             : public:
      59             :   static char ID;
      60        8301 :   MachineCombiner() : MachineFunctionPass(ID) {
      61        8301 :     initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
      62        8301 :   }
      63             :   void getAnalysisUsage(AnalysisUsage &AU) const override;
      64             :   bool runOnMachineFunction(MachineFunction &MF) override;
      65        8292 :   StringRef getPassName() const override { return "Machine InstCombiner"; }
      66             : 
      67             : private:
      68             :   bool doSubstitute(unsigned NewSize, unsigned OldSize);
      69             :   bool combineInstructions(MachineBasicBlock *);
      70             :   MachineInstr *getOperandDef(const MachineOperand &MO);
      71             :   unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
      72             :                     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
      73             :                     MachineTraceMetrics::Trace BlockTrace);
      74             :   unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
      75             :                       MachineTraceMetrics::Trace BlockTrace);
      76             :   bool
      77             :   improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
      78             :                           MachineTraceMetrics::Trace BlockTrace,
      79             :                           SmallVectorImpl<MachineInstr *> &InsInstrs,
      80             :                           SmallVectorImpl<MachineInstr *> &DelInstrs,
      81             :                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
      82             :                           MachineCombinerPattern Pattern, bool SlackIsAccurate);
      83             :   bool preservesResourceLen(MachineBasicBlock *MBB,
      84             :                             MachineTraceMetrics::Trace BlockTrace,
      85             :                             SmallVectorImpl<MachineInstr *> &InsInstrs,
      86             :                             SmallVectorImpl<MachineInstr *> &DelInstrs);
      87             :   void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
      88             :                      SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
      89             : };
      90             : }
      91             : 
      92             : char MachineCombiner::ID = 0;
      93             : char &llvm::MachineCombinerID = MachineCombiner::ID;
      94             : 
      95       20212 : INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
      96             :                       "Machine InstCombiner", false, false)
      97       20212 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
      98       20212 : INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
      99      176102 : INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
     100             :                     false, false)
     101             : 
     102        8277 : void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
     103        8277 :   AU.setPreservesCFG();
     104        8277 :   AU.addPreserved<MachineDominatorTree>();
     105        8277 :   AU.addRequired<MachineLoopInfo>();
     106        8277 :   AU.addPreserved<MachineLoopInfo>();
     107        8277 :   AU.addRequired<MachineTraceMetrics>();
     108        8277 :   AU.addPreserved<MachineTraceMetrics>();
     109        8277 :   MachineFunctionPass::getAnalysisUsage(AU);
     110        8277 : }
     111             : 
     112        4077 : MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
     113        4077 :   MachineInstr *DefInstr = nullptr;
     114             :   // We need a virtual register definition.
     115        8154 :   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
     116        4077 :     DefInstr = MRI->getUniqueVRegDef(MO.getReg());
     117             :   // PHI's have no depth etc.
     118        4077 :   if (DefInstr && DefInstr->isPHI())
     119             :     DefInstr = nullptr;
     120        4077 :   return DefInstr;
     121             : }
     122             : 
     123             : /// Computes depth of instructions in vector \InsInstr.
     124             : ///
     125             : /// \param InsInstrs is a vector of machine instructions
     126             : /// \param InstrIdxForVirtReg is a dense map of virtual register to index
     127             : /// of defining machine instruction in \p InsInstrs
     128             : /// \param BlockTrace is a trace of machine instructions
     129             : ///
     130             : /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
     131             : unsigned
     132        1359 : MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
     133             :                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
     134             :                           MachineTraceMetrics::Trace BlockTrace) {
     135        2718 :   SmallVector<unsigned, 16> InstrDepth;
     136             :   assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
     137             :          "Missing machine model\n");
     138             : 
     139             :   // For each instruction in the new sequence compute the depth based on the
     140             :   // operands. Use the trace information when possible. For new operands which
     141             :   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
     142        6780 :   for (auto *InstrPtr : InsInstrs) { // for each Use
     143        2703 :     unsigned IDepth = 0;
     144             :     DEBUG(dbgs() << "NEW INSTR ";
     145             :           InstrPtr->print(dbgs(), TII);
     146             :           dbgs() << "\n";);
     147       11243 :     for (const MachineOperand &MO : InstrPtr->operands()) {
     148             :       // Check for virtual register operand.
     149       17080 :       if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
     150        3119 :         continue;
     151        8124 :       if (!MO.isUse())
     152             :         continue;
     153        5421 :       unsigned DepthOp = 0;
     154        5421 :       unsigned LatencyOp = 0;
     155             :       DenseMap<unsigned, unsigned>::iterator II =
     156        5421 :           InstrIdxForVirtReg.find(MO.getReg());
     157       10842 :       if (II != InstrIdxForVirtReg.end()) {
     158             :         // Operand is new virtual register not in trace
     159             :         assert(II->second < InstrDepth.size() && "Bad Index");
     160        2688 :         MachineInstr *DefInstr = InsInstrs[II->second];
     161             :         assert(DefInstr &&
     162             :                "There must be a definition for a new virtual register");
     163        2688 :         DepthOp = InstrDepth[II->second];
     164        4032 :         LatencyOp = TSchedModel.computeOperandLatency(
     165        1344 :             DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
     166        1344 :             InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
     167             :       } else {
     168        4077 :         MachineInstr *DefInstr = getOperandDef(MO);
     169        4077 :         if (DefInstr) {
     170        8118 :           DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
     171       12177 :           LatencyOp = TSchedModel.computeOperandLatency(
     172        4059 :               DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
     173        4059 :               InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
     174             :         }
     175             :       }
     176       10842 :       IDepth = std::max(IDepth, DepthOp + LatencyOp);
     177             :     }
     178        2703 :     InstrDepth.push_back(IDepth);
     179             :   }
     180        2718 :   unsigned NewRootIdx = InsInstrs.size() - 1;
     181        4077 :   return InstrDepth[NewRootIdx];
     182             : }
     183             : 
     184             : /// Computes instruction latency as max of latency of defined operands.
     185             : ///
     186             : /// \param Root is a machine instruction that could be replaced by NewRoot.
     187             : /// It is used to compute a more accurate latency information for NewRoot in
     188             : /// case there is a dependent instruction in the same trace (\p BlockTrace)
     189             : /// \param NewRoot is the instruction for which the latency is computed
     190             : /// \param BlockTrace is a trace of machine instructions
     191             : ///
     192             : /// \returns Latency of \p NewRoot
     193          15 : unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
     194             :                                      MachineTraceMetrics::Trace BlockTrace) {
     195             :   assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
     196             :          "Missing machine model\n");
     197             : 
     198             :   // Check each definition in NewRoot and compute the latency
     199          15 :   unsigned NewRootLatency = 0;
     200             : 
     201          75 :   for (const MachineOperand &MO : NewRoot->operands()) {
     202             :     // Check for virtual register operand.
     203         120 :     if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
     204          45 :       continue;
     205          60 :     if (!MO.isDef())
     206          45 :       continue;
     207             :     // Get the first instruction that uses MO
     208          30 :     MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
     209          30 :     RI++;
     210          15 :     MachineInstr *UseMO = RI->getParent();
     211          15 :     unsigned LatencyOp = 0;
     212          15 :     if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
     213          42 :       LatencyOp = TSchedModel.computeOperandLatency(
     214          14 :           NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
     215          14 :           UseMO->findRegisterUseOperandIdx(MO.getReg()));
     216             :     } else {
     217           1 :       LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
     218             :     }
     219          15 :     NewRootLatency = std::max(NewRootLatency, LatencyOp);
     220             :   }
     221          15 :   return NewRootLatency;
     222             : }
     223             : 
     224             : /// The combiner's goal may differ based on which pattern it is attempting
     225             : /// to optimize.
     226             : enum class CombinerObjective {
     227             :   MustReduceDepth, // The data dependency chain must be improved.
     228             :   Default          // The critical path must not be lengthened.
     229             : };
     230             : 
     231             : static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
     232             :   // TODO: If C++ ever gets a real enum class, make this part of the
     233             :   // MachineCombinerPattern class.
     234        1359 :   switch (P) {
     235             :   case MachineCombinerPattern::REASSOC_AX_BY:
     236             :   case MachineCombinerPattern::REASSOC_AX_YB:
     237             :   case MachineCombinerPattern::REASSOC_XA_BY:
     238             :   case MachineCombinerPattern::REASSOC_XA_YB:
     239             :     return CombinerObjective::MustReduceDepth;
     240             :   default:
     241             :     return CombinerObjective::Default;
     242             :   }
     243             : }
     244             : 
     245             : /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
     246             : /// The new code sequence ends in MI NewRoot. A necessary condition for the new
     247             : /// sequence to replace the old sequence is that it cannot lengthen the critical
     248             : /// path. The definition of "improve" may be restricted by specifying that the
     249             : /// new path improves the data dependency chain (MustReduceDepth).
     250        1359 : bool MachineCombiner::improvesCriticalPathLen(
     251             :     MachineBasicBlock *MBB, MachineInstr *Root,
     252             :     MachineTraceMetrics::Trace BlockTrace,
     253             :     SmallVectorImpl<MachineInstr *> &InsInstrs,
     254             :     SmallVectorImpl<MachineInstr *> &DelInstrs,
     255             :     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
     256             :     MachineCombinerPattern Pattern,
     257             :     bool SlackIsAccurate) {
     258             :   assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
     259             :          "Missing machine model\n");
     260             :   // NewRoot is the last instruction in the \p InsInstrs vector.
     261        2718 :   unsigned NewRootIdx = InsInstrs.size() - 1;
     262        2718 :   MachineInstr *NewRoot = InsInstrs[NewRootIdx];
     263             : 
     264             :   // Get depth and latency of NewRoot and Root.
     265        1359 :   unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
     266        2718 :   unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
     267             : 
     268             :   DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n";
     269             :         dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
     270             :         dbgs() << " RootDepth: " << RootDepth << "\n");
     271             : 
     272             :   // For a transform such as reassociation, the cost equation is
     273             :   // conservatively calculated so that we must improve the depth (data
     274             :   // dependency cycles) in the critical path to proceed with the transform.
     275             :   // Being conservative also protects against inaccuracies in the underlying
     276             :   // machine trace metrics and CPU models.
     277        1359 :   if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth)
     278        1344 :     return NewRootDepth < RootDepth;
     279             : 
     280             :   // A more flexible cost calculation for the critical path includes the slack
     281             :   // of the original code sequence. This may allow the transform to proceed
     282             :   // even if the instruction depths (data dependency cycles) become worse.
     283             : 
     284          15 :   unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
     285          15 :   unsigned RootLatency = 0;
     286             : 
     287          75 :   for (auto I : DelInstrs)
     288          30 :     RootLatency += TSchedModel.computeInstrLatency(I);
     289             : 
     290          15 :   unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
     291          15 :   unsigned NewCycleCount = NewRootDepth + NewRootLatency;
     292          30 :   unsigned OldCycleCount = RootDepth + RootLatency +
     293          15 :                            (SlackIsAccurate ? RootSlack : 0);
     294             :   DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
     295             :         dbgs() << " RootLatency: " << RootLatency << "\n";
     296             :         dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate="
     297             :                << SlackIsAccurate << "\n";
     298             :         dbgs() << " NewRootDepth + NewRootLatency = "
     299             :                << NewCycleCount << "\n";
     300             :         dbgs() << " RootDepth + RootLatency + RootSlack = "
     301             :                << OldCycleCount << "\n";
     302             :         );
     303             : 
     304          15 :   return NewCycleCount <= OldCycleCount;
     305             : }
     306             : 
     307             : /// helper routine to convert instructions into SC
     308         552 : void MachineCombiner::instr2instrSC(
     309             :     SmallVectorImpl<MachineInstr *> &Instrs,
     310             :     SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
     311        2745 :   for (auto *InstrPtr : Instrs) {
     312        2178 :     unsigned Opc = InstrPtr->getOpcode();
     313        2178 :     unsigned Idx = TII->get(Opc).getSchedClass();
     314        2178 :     const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
     315        1089 :     InstrsSC.push_back(SC);
     316             :   }
     317         552 : }
     318             : 
     319             : /// True when the new instructions do not increase resource length
     320         306 : bool MachineCombiner::preservesResourceLen(
     321             :     MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
     322             :     SmallVectorImpl<MachineInstr *> &InsInstrs,
     323             :     SmallVectorImpl<MachineInstr *> &DelInstrs) {
     324         306 :   if (!TSchedModel.hasInstrSchedModel())
     325             :     return true;
     326             : 
     327             :   // Compute current resource length
     328             : 
     329             :   //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
     330         276 :   SmallVector <const MachineBasicBlock *, 1> MBBarr;
     331         276 :   MBBarr.push_back(MBB);
     332         828 :   unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
     333             : 
     334             :   // Deal with SC rather than Instructions.
     335         552 :   SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
     336         552 :   SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
     337             : 
     338         276 :   instr2instrSC(InsInstrs, InsInstrsSC);
     339         276 :   instr2instrSC(DelInstrs, DelInstrsSC);
     340             : 
     341         276 :   ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
     342         276 :   ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
     343             : 
     344             :   // Compute new resource length.
     345             :   unsigned ResLenAfterCombine =
     346         276 :       BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
     347             : 
     348             :   DEBUG(dbgs() << "RESOURCE DATA: \n";
     349             :         dbgs() << " resource len before: " << ResLenBeforeCombine
     350             :                << " after: " << ResLenAfterCombine << "\n";);
     351             : 
     352         276 :   return ResLenAfterCombine <= ResLenBeforeCombine;
     353             : }
     354             : 
     355             : /// \returns true when new instruction sequence should be generated
     356             : /// independent if it lengthens critical path or not
     357             : bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
     358        1702 :   if (OptSize && (NewSize < OldSize))
     359             :     return true;
     360        1662 :   if (!TSchedModel.hasInstrSchedModelOrItineraries())
     361             :     return true;
     362             :   return false;
     363             : }
     364             : 
     365             : /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
     366             : /// depths if requested.
     367             : ///
     368             : /// \param MBB basic block to insert instructions in
     369             : /// \param MI current machine instruction
     370             : /// \param InsInstrs new instructions to insert in \p MBB
     371             : /// \param DelInstrs instruction to delete from \p MBB
     372             : /// \param MinInstr is a pointer to the machine trace information
     373             : /// \param RegUnits set of live registers, needed to compute instruction depths
     374             : /// \param IncrementalUpdate if true, compute instruction depths incrementally,
     375             : ///                          otherwise invalidate the trace
     376         673 : static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
     377             :                                      SmallVector<MachineInstr *, 16> InsInstrs,
     378             :                                      SmallVector<MachineInstr *, 16> DelInstrs,
     379             :                                      MachineTraceMetrics::Ensemble *MinInstr,
     380             :                                      SparseSet<LiveRegUnit> &RegUnits,
     381             :                                      bool IncrementalUpdate) {
     382        3147 :   for (auto *InstrPtr : InsInstrs)
     383        2256 :     MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
     384             : 
     385        3365 :   for (auto *InstrPtr : DelInstrs) {
     386        1346 :     InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
     387             :     // Erase all LiveRegs defined by the removed instruction
     388        2876 :     for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
     389         184 :       if (I->MI == InstrPtr)
     390           0 :         I = RegUnits.erase(I);
     391             :       else
     392         184 :         I++;
     393             :     }
     394             :   }
     395             : 
     396         673 :   if (IncrementalUpdate)
     397         560 :     for (auto *InstrPtr : InsInstrs)
     398         224 :       MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
     399             :   else
     400         561 :     MinInstr->invalidate(MBB);
     401             : 
     402         673 :   NumInstCombined++;
     403         673 : }
     404             : 
     405             : /// Substitute a slow code sequence with a faster one by
     406             : /// evaluating instruction combining pattern.
     407             : /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
     408             : /// combining based on machine trace metrics. Only combine a sequence of
     409             : /// instructions  when this neither lengthens the critical path nor increases
     410             : /// resource pressure. When optimizing for codesize always combine when the new
     411             : /// sequence is shorter.
     412      216085 : bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
     413      216085 :   bool Changed = false;
     414             :   DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
     415             : 
     416      216085 :   bool IncrementalUpdate = false;
     417      216085 :   auto BlockIter = MBB->begin();
     418      216085 :   auto LastUpdate = BlockIter;
     419             :   // Check if the block is in a loop.
     420      432170 :   const MachineLoop *ML = MLI->getLoopFor(MBB);
     421      216085 :   if (!MinInstr)
     422       86660 :     MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
     423             : 
     424      432170 :   SparseSet<LiveRegUnit> RegUnits;
     425      216085 :   RegUnits.setUniverse(TRI->getNumRegUnits());
     426             : 
     427     6637744 :   while (BlockIter != MBB->end()) {
     428     9308361 :     auto &MI = *BlockIter++;
     429             : 
     430             :     DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
     431     3103888 :     SmallVector<MachineCombinerPattern, 16> Patterns;
     432             :     // The motivating example is:
     433             :     //
     434             :     //     MUL  Other        MUL_op1 MUL_op2  Other
     435             :     //      \    /               \      |    /
     436             :     //      ADD/SUB      =>        MADD/MSUB
     437             :     //      (=Root)                (=NewRoot)
     438             : 
     439             :     // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
     440             :     // usually beneficial for code size it unfortunately can hurt performance
     441             :     // when the ADD is on the critical path, but the MUL is not. With the
     442             :     // substitution the MUL becomes part of the critical path (in form of the
     443             :     // MADD) and can lengthen it on architectures where the MADD latency is
     444             :     // longer than the ADD latency.
     445             :     //
     446             :     // For each instruction we check if it can be the root of a combiner
     447             :     // pattern. Then for each pattern the new code sequence in form of MI is
     448             :     // generated and evaluated. When the efficiency criteria (don't lengthen
     449             :     // critical path, don't use more resources) is met the new sequence gets
     450             :     // hooked up into the basic block before the old sequence is removed.
     451             :     //
     452             :     // The algorithm does not try to evaluate all patterns and pick the best.
     453             :     // This is only an artificial restriction though. In practice there is
     454             :     // mostly one pattern, and getMachineCombinerPatterns() can order patterns
     455             :     // based on an internal cost heuristic.
     456             : 
     457     3102787 :     if (!TII->getMachineCombinerPatterns(MI, Patterns))
     458     3101686 :       continue;
     459             : 
     460        4357 :     for (auto P : Patterns) {
     461        2780 :       SmallVector<MachineInstr *, 16> InsInstrs;
     462        2780 :       SmallVector<MachineInstr *, 16> DelInstrs;
     463        2780 :       DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
     464        3454 :       TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
     465        1727 :                                       InstrIdxForVirtReg);
     466        1727 :       unsigned NewInstCount = InsInstrs.size();
     467        1727 :       unsigned OldInstCount = DelInstrs.size();
     468             :       // Found pattern, but did not generate alternative sequence.
     469             :       // This can happen e.g. when an immediate could not be materialized
     470             :       // in a single instruction.
     471        1727 :       if (!NewInstCount)
     472           1 :         continue;
     473             : 
     474        1726 :       bool SubstituteAlways = false;
     475        1726 :       if (ML && TII->isThroughputPattern(P))
     476             :         SubstituteAlways = true;
     477             : 
     478        1726 :       if (IncrementalUpdate) {
     479             :         // Update depths since the last incremental update.
     480         134 :         MinInstr->updateDepths(LastUpdate, std::next(BlockIter), RegUnits);
     481          67 :         LastUpdate = BlockIter;
     482             :       }
     483             : 
     484             :       // Substitute when we optimize for codesize and the new sequence has
     485             :       // fewer instructions OR
     486             :       // the new sequence neither lengthens the critical path nor increases
     487             :       // resource pressure.
     488        3085 :       if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
     489        1835 :         insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
     490             :                                  RegUnits, IncrementalUpdate);
     491             :         // Eagerly stop after the first pattern fires.
     492         367 :         Changed = true;
     493        1040 :         break;
     494             :       } else {
     495             :         // For big basic blocks, we only compute the full trace the first time
     496             :         // we hit this. We do not invalidate the trace, but instead update the
     497             :         // instruction depths incrementally.
     498             :         // NOTE: Only the instruction depths up to MI are accurate. All other
     499             :         // trace information is not updated.
     500        1359 :         MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
     501        1359 :         Traces->verifyAnalysis();
     502        1359 :         if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
     503             :                                     InstrIdxForVirtReg, P,
     504        3024 :                                     !IncrementalUpdate) &&
     505         306 :             preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
     506         612 :           if (MBB->size() > inc_threshold)
     507             :             // Use incremental depth updates for basic blocks above treshold
     508         112 :             IncrementalUpdate = true;
     509             : 
     510        1530 :           insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
     511             :                                    RegUnits, IncrementalUpdate);
     512             : 
     513             :           // Eagerly stop after the first pattern fires.
     514         306 :           Changed = true;
     515         306 :           break;
     516             :         }
     517             :         // Cleanup instructions of the alternative code sequence. There is no
     518             :         // use for them.
     519        1053 :         MachineFunction *MF = MBB->getParent();
     520        8424 :         for (auto *InstrPtr : InsInstrs)
     521        2106 :           MF->DeleteMachineInstr(InstrPtr);
     522             :       }
     523        1053 :       InstrIdxForVirtReg.clear();
     524             :     }
     525             :   }
     526             : 
     527      216085 :   if (Changed && IncrementalUpdate)
     528          77 :     Traces->invalidate(MBB);
     529      216085 :   return Changed;
     530             : }
     531             : 
     532       86660 : bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
     533       86660 :   const TargetSubtargetInfo &STI = MF.getSubtarget();
     534       86660 :   TII = STI.getInstrInfo();
     535       86660 :   TRI = STI.getRegisterInfo();
     536       86660 :   SchedModel = STI.getSchedModel();
     537       86660 :   TSchedModel.init(SchedModel, &STI, TII);
     538       86660 :   MRI = &MF.getRegInfo();
     539       86660 :   MLI = &getAnalysis<MachineLoopInfo>();
     540       86660 :   Traces = &getAnalysis<MachineTraceMetrics>();
     541       86660 :   MinInstr = nullptr;
     542       86660 :   OptSize = MF.getFunction()->optForSize();
     543             : 
     544             :   DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
     545       86660 :   if (!TII->useMachineCombiner()) {
     546             :     DEBUG(dbgs() << "  Skipping pass: Target does not support machine combiner\n");
     547             :     return false;
     548             :   }
     549             : 
     550       86660 :   bool Changed = false;
     551             : 
     552             :   // Try to combine instructions.
     553      476065 :   for (auto &MBB : MF)
     554      216085 :     Changed |= combineInstructions(&MBB);
     555             : 
     556             :   return Changed;
     557      216918 : }

Generated by: LCOV version 1.13