LCOV - code coverage report
Current view: top level - lib/CodeGen - ExecutionDepsFix.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 330 340 97.1 %
Date: 2017-09-14 15:23:50 Functions: 21 21 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "llvm/CodeGen/ExecutionDepsFix.h"
      11             : 
      12             : #include "llvm/ADT/PostOrderIterator.h"
      13             : #include "llvm/ADT/iterator_range.h"
      14             : #include "llvm/CodeGen/LivePhysRegs.h"
      15             : #include "llvm/CodeGen/MachineFunctionPass.h"
      16             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      17             : #include "llvm/CodeGen/RegisterClassInfo.h"
      18             : #include "llvm/Support/Allocator.h"
      19             : #include "llvm/Support/Debug.h"
      20             : #include "llvm/Support/raw_ostream.h"
      21             : #include "llvm/Target/TargetInstrInfo.h"
      22             : #include "llvm/Target/TargetSubtargetInfo.h"
      23             : 
      24             : using namespace llvm;
      25             : 
      26             : #define DEBUG_TYPE "execution-deps-fix"
      27             : 
      28             : /// Translate TRI register number to a list of indices into our smaller tables
      29             : /// of interesting registers.
      30             : iterator_range<SmallVectorImpl<int>::const_iterator>
      31     3717783 : ExecutionDepsFix::regIndices(unsigned Reg) const {
      32             :   assert(Reg < AliasMap.size() && "Invalid register");
      33     7435566 :   const auto &Entry = AliasMap[Reg];
      34    14871132 :   return make_range(Entry.begin(), Entry.end());
      35             : }
      36             : 
      37      414976 : DomainValue *ExecutionDepsFix::alloc(int domain) {
      38      829952 :   DomainValue *dv = Avail.empty() ?
      39      517362 :                       new(Allocator.Allocate()) DomainValue :
      40      657498 :                       Avail.pop_back_val();
      41      414976 :   if (domain >= 0)
      42      321252 :     dv->addDomain(domain);
      43             :   assert(dv->Refs == 0 && "Reference count wasn't cleared");
      44             :   assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
      45      414976 :   return dv;
      46             : }
      47             : 
      48             : /// Release a reference to DV.  When the last reference is released,
      49             : /// collapse if needed.
      50     2116425 : void ExecutionDepsFix::release(DomainValue *DV) {
      51     2946369 :   while (DV) {
      52             :     assert(DV->Refs && "Bad DomainValue");
      53      921366 :     if (--DV->Refs)
      54             :       return;
      55             : 
      56             :     // There are no more DV references. Collapse any contained instructions.
      57      829246 :     if (DV->AvailableDomains && !DV->isCollapsed())
      58       25018 :       collapse(DV, DV->getFirstDomain());
      59             : 
      60      414972 :     DomainValue *Next = DV->Next;
      61      829944 :     DV->clear();
      62      414972 :     Avail.push_back(DV);
      63             :     // Also release the next DomainValue in the chain.
      64      414972 :     DV = Next;
      65             :   }
      66             : }
      67             : 
      68             : /// Follow the chain of dead DomainValues until a live DomainValue is reached.
      69             : /// Update the referenced pointer when necessary.
      70     7387520 : DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) {
      71     7387520 :   DomainValue *DV = DVRef;
      72     7387520 :   if (!DV || !DV->Next)
      73             :     return DV;
      74             : 
      75             :   // DV has a chain. Find the end.
      76          86 :   do DV = DV->Next;
      77          86 :   while (DV->Next);
      78             : 
      79             :   // Update DVRef to point to DV.
      80         170 :   retain(DV);
      81          85 :   release(DVRef);
      82          85 :   DVRef = DV;
      83          85 :   return DV;
      84             : }
      85             : 
      86             : /// Set LiveRegs[rx] = dv, updating reference counts.
      87      920583 : void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) {
      88             :   assert(unsigned(rx) < NumRegs && "Invalid index");
      89             :   assert(LiveRegs && "Must enter basic block first.");
      90             : 
      91      920583 :   if (LiveRegs[rx].Value == dv)
      92             :     return;
      93      920583 :   if (LiveRegs[rx].Value)
      94       13597 :     release(LiveRegs[rx].Value);
      95     1841166 :   LiveRegs[rx].Value = retain(dv);
      96             : }
      97             : 
      98             : // Kill register rx, recycle or collapse any DomainValue.
      99      384560 : void ExecutionDepsFix::kill(int rx) {
     100             :   assert(unsigned(rx) < NumRegs && "Invalid index");
     101             :   assert(LiveRegs && "Must enter basic block first.");
     102      384560 :   if (!LiveRegs[rx].Value)
     103             :     return;
     104             : 
     105      291668 :   release(LiveRegs[rx].Value);
     106      291668 :   LiveRegs[rx].Value = nullptr;
     107             : }
     108             : 
     109             : /// Force register rx into domain.
     110      720432 : void ExecutionDepsFix::force(int rx, unsigned domain) {
     111             :   assert(unsigned(rx) < NumRegs && "Invalid index");
     112             :   assert(LiveRegs && "Must enter basic block first.");
     113      720432 :   if (DomainValue *dv = LiveRegs[rx].Value) {
     114      412026 :     if (dv->isCollapsed())
     115      331780 :       dv->addDomain(domain);
     116      160492 :     else if (dv->hasDomain(domain))
     117       80188 :       collapse(dv, domain);
     118             :     else {
     119             :       // This is an incompatible open DomainValue. Collapse it to whatever and
     120             :       // force the new value into domain. This costs a domain crossing.
     121         116 :       collapse(dv, dv->getFirstDomain());
     122             :       assert(LiveRegs[rx].Value && "Not live after collapse?");
     123          58 :       LiveRegs[rx].Value->addDomain(domain);
     124             :     }
     125             :   } else {
     126             :     // Set up basic collapsed DomainValue.
     127      308406 :     setLiveReg(rx, alloc(domain));
     128             :   }
     129      720432 : }
     130             : 
     131             : /// Collapse open DomainValue into given domain. If there are multiple
     132             : /// registers using dv, they each get a unique collapsed DomainValue.
     133       93022 : void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) {
     134             :   assert(dv->hasDomain(domain) && "Cannot collapse");
     135             : 
     136             :   // Collapse all the instructions.
     137      372208 :   while (!dv->Instrs.empty())
     138      279186 :     TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
     139      186044 :   dv->setSingleDomain(domain);
     140             : 
     141             :   // If there are multiple users, give them new, unique DomainValues.
     142       93022 :   if (LiveRegs && dv->Refs > 1)
     143      410280 :     for (unsigned rx = 0; rx != NumRegs; ++rx)
     144      201984 :       if (LiveRegs[rx].Value == dv)
     145       12846 :         setLiveReg(rx, alloc(domain));
     146       93022 : }
     147             : 
     148             : /// All instructions and registers in B are moved to A, and B is released.
     149        4951 : bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) {
     150             :   assert(!A->isCollapsed() && "Cannot merge into collapsed");
     151             :   assert(!B->isCollapsed() && "Cannot merge from collapsed");
     152        4951 :   if (A == B)
     153             :     return true;
     154             :   // Restrict to the domains that A and B have in common.
     155        1396 :   unsigned common = A->getCommonDomains(B->AvailableDomains);
     156         698 :   if (!common)
     157             :     return false;
     158         698 :   A->AvailableDomains = common;
     159        2094 :   A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
     160             : 
     161             :   // Clear the old DomainValue so we won't try to swizzle instructions twice.
     162         698 :   B->clear();
     163             :   // All uses of B are referred to A.
     164        1396 :   B->Next = retain(A);
     165             : 
     166       23034 :   for (unsigned rx = 0; rx != NumRegs; ++rx) {
     167             :     assert(LiveRegs && "no space allocated for live registers");
     168       22336 :     if (LiveRegs[rx].Value == B)
     169         751 :       setLiveReg(rx, A);
     170             :   }
     171             :   return true;
     172             : }
     173             : 
     174             : /// Set up LiveRegs by merging predecessor live-out values.
     175      225958 : void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
     176             :   // Reset instruction counter in each basic block.
     177      225958 :   CurInstr = 0;
     178             : 
     179             :   // Set up UndefReads to track undefined register reads.
     180      451916 :   UndefReads.clear();
     181      451916 :   LiveRegSet.clear();
     182             : 
     183             :   // Set up LiveRegs to represent registers entering MBB.
     184      225958 :   if (!LiveRegs)
     185      225958 :     LiveRegs = new LiveReg[NumRegs];
     186             : 
     187             :   // Default values are 'nothing happened a long time ago'.
     188    14687270 :   for (unsigned rx = 0; rx != NumRegs; ++rx) {
     189     7230656 :     LiveRegs[rx].Value = nullptr;
     190     7230656 :     LiveRegs[rx].Def = -(1 << 20);
     191             :   }
     192             : 
     193             :   // This is the entry block.
     194      225958 :   if (MBB->pred_empty()) {
     195      260922 :     for (const auto &LI : MBB->liveins()) {
     196      208840 :       for (int rx : regIndices(LI.PhysReg)) {
     197             :         // Treat function live-ins as if they were defined just before the first
     198             :         // instruction.  Usually, function arguments are set up immediately
     199             :         // before the call.
     200       74294 :         LiveRegs[rx].Def = -1;
     201             :       }
     202             :     }
     203             :     DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
     204             :     return;
     205             :   }
     206             : 
     207             :   // Try to coalesce live-out registers from predecessors.
     208             :   for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
     209      396745 :        pe = MBB->pred_end(); pi != pe; ++pi) {
     210      233975 :     auto fi = MBBInfos.find(*pi);
     211             :     assert(fi != MBBInfos.end() &&
     212             :            "Should have pre-allocated MBBInfos for all MBBs");
     213      233975 :     LiveReg *Incoming = fi->second.OutRegs;
     214             :     // Incoming is null if this is a backedge from a BB
     215             :     // we haven't processed yet
     216      233975 :     if (Incoming == nullptr) {
     217        3115 :       continue;
     218             :     }
     219             : 
     220    15005900 :     for (unsigned rx = 0; rx != NumRegs; ++rx) {
     221             :       // Use the most recent predecessor def for each register.
     222    14775040 :       LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def);
     223             : 
     224     7387520 :       DomainValue *pdv = resolve(Incoming[rx].Value);
     225     7387520 :       if (!pdv)
     226     6679867 :         continue;
     227     1204592 :       if (!LiveRegs[rx].Value) {
     228      496939 :         setLiveReg(rx, pdv);
     229      496939 :         continue;
     230             :       }
     231             : 
     232             :       // We have a live DomainValue from more than one predecessor.
     233      421428 :       if (LiveRegs[rx].Value->isCollapsed()) {
     234             :         // We are already collapsed, but predecessor is not. Force it.
     235      412048 :         unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
     236      206291 :         if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
     237         267 :           collapse(pdv, Domain);
     238      206024 :         continue;
     239             :       }
     240             : 
     241             :       // Currently open, merge in predecessor.
     242        4690 :       if (!pdv->isCollapsed())
     243        4448 :         merge(LiveRegs[rx].Value, pdv);
     244             :       else
     245         484 :         force(rx, pdv->getFirstDomain());
     246             :     }
     247             :   }
     248             :   DEBUG(
     249             :       dbgs() << "BB#" << MBB->getNumber()
     250             :              << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n"));
     251             : }
     252             : 
     253      225958 : void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
     254             :   assert(LiveRegs && "Must enter basic block first.");
     255      451916 :   LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs;
     256             :   // Save register clearances at end of MBB - used by enterBasicBlock().
     257      451916 :   MBBInfos[MBB].OutRegs = LiveRegs;
     258             : 
     259             :   // While processing the basic block, we kept `Def` relative to the start
     260             :   // of the basic block for convenience. However, future use of this information
     261             :   // only cares about the clearance from the end of the block, so adjust
     262             :   // everything to be relative to the end of the basic block.
     263     7456614 :   for (unsigned i = 0, e = NumRegs; i != e; ++i)
     264     7230656 :     LiveRegs[i].Def -= CurInstr;
     265      225958 :   if (OldOutRegs) {
     266             :     // This must be the second pass.
     267             :     // Release all the DomainValues instead of keeping them.
     268     1384713 :     for (unsigned i = 0, e = NumRegs; i != e; ++i)
     269     1342752 :       release(OldOutRegs[i].Value);
     270       41961 :     delete[] OldOutRegs;
     271             :   }
     272      225958 :   LiveRegs = nullptr;
     273      225958 : }
     274             : 
     275     2153733 : bool ExecutionDepsFix::visitInstr(MachineInstr *MI) {
     276             :   // Update instructions with explicit execution domains.
     277     2153733 :   std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
     278     2153733 :   if (DomP.first) {
     279      458349 :     if (DomP.second)
     280      227984 :       visitSoftInstr(MI, DomP.second);
     281             :     else
     282      230365 :       visitHardInstr(MI, DomP.first);
     283             :   }
     284             : 
     285     2153733 :   return !DomP.first;
     286             : }
     287             : 
     288             : /// \brief Helps avoid false dependencies on undef registers by updating the
     289             : /// machine instructions' undef operand to use a register that the instruction
     290             : /// is truly dependent on, or use a register with clearance higher than Pref.
     291             : /// Returns true if it was able to find a true dependency, thus not requiring
     292             : /// a dependency breaking instruction regardless of clearance.
     293        1126 : bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI,
     294             :                                                 unsigned OpIdx, unsigned Pref) {
     295        2252 :   MachineOperand &MO = MI->getOperand(OpIdx);
     296             :   assert(MO.isUndef() && "Expected undef machine operand");
     297             : 
     298        1126 :   unsigned OriginalReg = MO.getReg();
     299             : 
     300             :   // Update only undef operands that are mapped to one register.
     301        3378 :   if (AliasMap[OriginalReg].size() != 1)
     302             :     return false;
     303             : 
     304             :   // Get the undef operand's register class
     305             :   const TargetRegisterClass *OpRC =
     306        1126 :       TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
     307             : 
     308             :   // If the instruction has a true dependency, we can hide the false depdency
     309             :   // behind it.
     310        4780 :   for (MachineOperand &CurrMO : MI->operands()) {
     311       14765 :     if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
     312        2610 :         !OpRC->contains(CurrMO.getReg()))
     313             :       continue;
     314             :     // We found a true dependency - replace the undef register with the true
     315             :     // dependency.
     316         124 :     MO.setReg(CurrMO.getReg());
     317         124 :     return true;
     318             :   }
     319             : 
     320             :   // Go over all registers in the register class and find the register with
     321             :   // max clearance or clearance higher than Pref.
     322        1002 :   unsigned MaxClearance = 0;
     323        1002 :   unsigned MaxClearanceReg = OriginalReg;
     324        1002 :   ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
     325        5043 :   for (auto Reg : Order) {
     326             :     assert(AliasMap[Reg].size() == 1 &&
     327             :            "Reg is expected to be mapped to a single index");
     328        4031 :     int RCrx = *regIndices(Reg).begin();
     329        4031 :     unsigned Clearance = CurInstr - LiveRegs[RCrx].Def;
     330        4031 :     if (Clearance <= MaxClearance)
     331        1911 :       continue;
     332        2120 :     MaxClearance = Clearance;
     333        2120 :     MaxClearanceReg = Reg;
     334             : 
     335        2120 :     if (MaxClearance > Pref)
     336             :       break;
     337             :   }
     338             : 
     339             :   // Update the operand if we found a register with better clearance.
     340        1002 :   if (MaxClearanceReg != OriginalReg)
     341         887 :     MO.setReg(MaxClearanceReg);
     342             : 
     343             :   return false;
     344             : }
     345             : 
     346             : /// \brief Return true to if it makes sense to break dependence on a partial def
     347             : /// or undef use.
     348        1473 : bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
     349             :                                              unsigned Pref) {
     350        2946 :   unsigned reg = MI->getOperand(OpIdx).getReg();
     351        1694 :   for (int rx : regIndices(reg)) {
     352        1483 :     unsigned Clearance = CurInstr - LiveRegs[rx].Def;
     353             :     DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
     354             : 
     355        1483 :     if (Pref > Clearance) {
     356             :       DEBUG(dbgs() << ": Break dependency.\n");
     357             :       continue;
     358             :     }
     359             :     DEBUG(dbgs() << ": OK .\n");
     360             :     return false;
     361             :   }
     362             :   return true;
     363             : }
     364             : 
     365             : // Update def-ages for registers defined by MI.
     366             : // If Kill is set, also kill off DomainValues clobbered by the defs.
     367             : //
     368             : // Also break dependencies on partial defs and undef uses.
     369     2557084 : void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency,
     370             :                                    bool Kill) {
     371             :   assert(!MI->isDebugValue() && "Won't process debug values");
     372             : 
     373             :   // Break dependence on undef uses. Do this before updating LiveRegs below.
     374             :   unsigned OpNum;
     375     2557084 :   if (breakDependency) {
     376     2153734 :     unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
     377     2153734 :     if (Pref) {
     378        1126 :       bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
     379             :       // We don't need to bother trying to break a dependency if this
     380             :       // instruction has a true dependency on that register through another
     381             :       // operand - we'll have to wait for it to be available regardless.
     382        1126 :       if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
     383          30 :         UndefReads.push_back(std::make_pair(MI, OpNum));
     384             :     }
     385             :   }
     386     2557084 :   const MCInstrDesc &MCID = MI->getDesc();
     387     3935226 :   for (unsigned i = 0,
     388     7671252 :          e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
     389     3935226 :          i != e; ++i) {
     390     2756284 :     MachineOperand &MO = MI->getOperand(i);
     391     1378142 :     if (!MO.isReg())
     392       53711 :       continue;
     393     1324431 :     if (MO.isUse())
     394       84535 :       continue;
     395     1670102 :     for (int rx : regIndices(MO.getReg())) {
     396             :       // This instruction explicitly defines rx.
     397             :       DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
     398             :                    << '\t' << *MI);
     399             : 
     400      430206 :       if (breakDependency) {
     401             :         // Check clearance before partial register updates.
     402             :         // Call breakDependence before setting LiveRegs[rx].Def.
     403      380221 :         unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
     404      380221 :         if (Pref && shouldBreakDependence(MI, i, Pref))
     405         201 :           TII->breakPartialRegDependency(*MI, i, TRI);
     406             :       }
     407             : 
     408             :       // How many instructions since rx was last written?
     409      430206 :       LiveRegs[rx].Def = CurInstr;
     410             : 
     411             :       // Kill off domains redefined by generic instructions.
     412      430206 :       if (Kill)
     413       42948 :         kill(rx);
     414             :     }
     415             :   }
     416     2557084 :   ++CurInstr;
     417     2557084 : }
     418             : 
     419             : /// \break Break false dependencies on undefined register reads.
     420             : ///
     421             : /// Walk the block backward computing precise liveness. This is expensive, so we
     422             : /// only do it on demand. Note that the occurrence of undefined register reads
     423             : /// that should be broken is very rare, but when they occur we may have many in
     424             : /// a single block.
     425      183997 : void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) {
     426      367994 :   if (UndefReads.empty())
     427             :     return;
     428             : 
     429             :   // Collect this block's live out register units.
     430          20 :   LiveRegSet.init(*TRI);
     431             :   // We do not need to care about pristine registers as they are just preserved
     432             :   // but not actually used in the function.
     433          10 :   LiveRegSet.addLiveOutsNoPristines(*MBB);
     434             : 
     435          20 :   MachineInstr *UndefMI = UndefReads.back().first;
     436          20 :   unsigned OpIdx = UndefReads.back().second;
     437             : 
     438         188 :   for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
     439             :     // Update liveness, including the current instruction's defs.
     440          79 :     LiveRegSet.stepBackward(I);
     441             : 
     442          79 :     if (UndefMI == &I) {
     443          30 :       if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
     444           9 :         TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
     445             : 
     446          10 :       UndefReads.pop_back();
     447          20 :       if (UndefReads.empty())
     448          10 :         return;
     449             : 
     450           0 :       UndefMI = UndefReads.back().first;
     451           0 :       OpIdx = UndefReads.back().second;
     452             :     }
     453             :   }
     454             : }
     455             : 
     456             : // A hard instruction only works in one domain. All input registers will be
     457             : // forced into that domain.
     458      318752 : void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
     459             :   // Collapse all uses.
     460     1678642 :   for (unsigned i = mi->getDesc().getNumDefs(),
     461      637504 :                 e = mi->getDesc().getNumOperands(); i != e; ++i) {
     462     2082276 :     MachineOperand &mo = mi->getOperand(i);
     463     1041138 :     if (!mo.isReg()) continue;
     464     1270643 :     for (int rx : regIndices(mo.getReg())) {
     465      480219 :       force(rx, domain);
     466             :     }
     467             :   }
     468             : 
     469             :   // Kill all defs and force them.
     470      888240 :   for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
     471      501472 :     MachineOperand &mo = mi->getOperand(i);
     472      250736 :     if (!mo.isReg()) continue;
     473      490707 :     for (int rx : regIndices(mo.getReg())) {
     474      239971 :       kill(rx);
     475      239971 :       force(rx, domain);
     476             :     }
     477             :   }
     478      318752 : }
     479             : 
     480             : // A soft instruction can be changed to work in other domains given by mask.
     481      227984 : void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
     482             :   // Bitmask of available domains for this instruction after taking collapsed
     483             :   // operands into account.
     484      227984 :   unsigned available = mask;
     485             : 
     486             :   // Scan the explicit use operands for incoming domains.
     487      367581 :   SmallVector<int, 4> used;
     488      227984 :   if (LiveRegs)
     489     1608192 :     for (unsigned i = mi->getDesc().getNumDefs(),
     490      455968 :                   e = mi->getDesc().getNumOperands(); i != e; ++i) {
     491     2304448 :       MachineOperand &mo = mi->getOperand(i);
     492     1152224 :       if (!mo.isReg()) continue;
     493      919266 :       for (int rx : regIndices(mo.getReg())) {
     494      165017 :         DomainValue *dv = LiveRegs[rx].Value;
     495      165017 :         if (dv == nullptr)
     496       18339 :           continue;
     497             :         // Bitmask of domains that dv and available have in common.
     498      293356 :         unsigned common = dv->getCommonDomains(available);
     499             :         // Is it possible to use this collapsed register for free?
     500      146678 :         if (dv->isCollapsed()) {
     501             :           // Restrict available domains to the ones in common with the operand.
     502             :           // If there are no common domains, we must pay the cross-domain
     503             :           // penalty for this operand.
     504       98155 :           if (common) available = common;
     505       48523 :         } else if (common)
     506             :           // Open DomainValue is compatible, save it for merging.
     507       48523 :           used.push_back(rx);
     508             :         else
     509             :           // Open DomainValue is not compatible with instruction. It is useless
     510             :           // now.
     511           0 :           kill(rx);
     512             :       }
     513             :     }
     514             : 
     515             :   // If the collapsed operands force a single domain, propagate the collapse.
     516      227984 :   if (isPowerOf2_32(available)) {
     517       88387 :     unsigned domain = countTrailingZeros(available);
     518       88387 :     TII->setExecutionDomain(*mi, domain);
     519       88387 :     visitHardInstr(mi, domain);
     520       88387 :     return;
     521             :   }
     522             : 
     523             :   // Kill off any remaining uses that don't match available, and build a list of
     524             :   // incoming DomainValues that we want to merge.
     525      279194 :   SmallVector<const LiveReg *, 4> Regs;
     526      653048 :   for (int rx : used) {
     527             :     assert(LiveRegs && "no space allocated for live registers");
     528       47330 :     const LiveReg &LR = LiveRegs[rx];
     529             :     // This useless DomainValue could have been missed above.
     530       94660 :     if (!LR.Value->getCommonDomains(available)) {
     531           0 :       kill(rx);
     532           0 :       continue;
     533             :     }
     534             :     // Sorted insertion.
     535      141990 :     auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR,
     536             :                               [](const LiveReg *LHS, const LiveReg *RHS) {
     537             :                                 return LHS->Def < RHS->Def;
     538       94660 :                               });
     539       47330 :     Regs.insert(I, &LR);
     540             :   }
     541             : 
     542             :   // doms are now sorted in order of appearance. Try to merge them all, giving
     543             :   // priority to the latest ones.
     544             :   DomainValue *dv = nullptr;
     545      186927 :   while (!Regs.empty()) {
     546       93203 :     if (!dv) {
     547       45873 :       dv = Regs.pop_back_val()->Value;
     548             :       // Force the first dv to match the current instruction.
     549       91746 :       dv->AvailableDomains = dv->getCommonDomains(available);
     550             :       assert(dv->AvailableDomains && "Domain should have been filtered");
     551       45873 :       continue;
     552             :     }
     553             : 
     554        1457 :     DomainValue *Latest = Regs.pop_back_val()->Value;
     555             :     // Skip already merged values.
     556        1457 :     if (Latest == dv || Latest->Next)
     557         954 :       continue;
     558         503 :     if (merge(dv, Latest))
     559         503 :       continue;
     560             : 
     561             :     // If latest didn't merge, it is useless now. Kill all registers using it.
     562           0 :     for (int i : used) {
     563             :       assert(LiveRegs && "no space allocated for live registers");
     564           0 :       if (LiveRegs[i].Value == Latest)
     565           0 :         kill(i);
     566             :     }
     567             :   }
     568             : 
     569             :   // dv is the DomainValue we are going to use for this instruction.
     570      139597 :   if (!dv) {
     571       93724 :     dv = alloc();
     572       93724 :     dv->AvailableDomains = available;
     573             :   }
     574      139597 :   dv->Instrs.push_back(mi);
     575             : 
     576             :   // Finally set all defs and non-collapsed uses to dv. We must iterate through
     577             :   // all the operators, including imp-def ones.
     578      938249 :   for (MachineInstr::mop_iterator ii = mi->operands_begin(),
     579      279194 :                                   ee = mi->operands_end();
     580      938249 :                                   ii != ee; ++ii) {
     581      798652 :     MachineOperand &mo = *ii;
     582      798652 :     if (!mo.isReg()) continue;
     583      704092 :     for (int rx : regIndices(mo.getReg())) {
     584      285331 :       if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
     585      101641 :         kill(rx);
     586      101641 :         setLiveReg(rx, dv);
     587             :       }
     588             :     }
     589             :   }
     590             : }
     591             : 
     592      225958 : void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB,
     593             :                                          bool PrimaryPass) {
     594      225958 :   enterBasicBlock(MBB);
     595             :   // If this block is not done, it makes little sense to make any decisions
     596             :   // based on clearance information. We need to make a second pass anyway,
     597             :   // and by then we'll have better information, so we can avoid doing the work
     598             :   // to try and break dependencies now.
     599      225958 :   bool breakDependency = isBlockDone(MBB);
     600     6049658 :   for (MachineInstr &MI : *MBB) {
     601     2572913 :     if (!MI.isDebugValue()) {
     602     2557084 :       bool Kill = false;
     603     2557084 :       if (PrimaryPass)
     604     2153733 :         Kill = visitInstr(&MI);
     605     2557084 :       processDefs(&MI, breakDependency, Kill);
     606             :     }
     607             :   }
     608      225958 :   if (breakDependency)
     609      183997 :     processUndefReads(MBB);
     610      225958 :   leaveBasicBlock(MBB);
     611      225958 : }
     612             : 
     613     1101770 : bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) {
     614     2959862 :   return MBBInfos[MBB].PrimaryCompleted &&
     615     3218593 :          MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming &&
     616     2914307 :          MBBInfos[MBB].IncomingProcessed == MBB->pred_size();
     617             : }
     618             : 
     619       79719 : bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) {
     620       79719 :   if (skipFunction(*mf.getFunction()))
     621             :     return false;
     622       79667 :   MF = &mf;
     623       79667 :   TII = MF->getSubtarget().getInstrInfo();
     624       79667 :   TRI = MF->getSubtarget().getRegisterInfo();
     625       79667 :   RegClassInfo.runOnMachineFunction(mf);
     626       79667 :   LiveRegs = nullptr;
     627             :   assert(NumRegs == RC->getNumRegs() && "Bad regclass");
     628             : 
     629             :   DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
     630             :                << TRI->getRegClassName(RC) << " **********\n");
     631             : 
     632             :   // If no relevant registers are used in the function, we can skip it
     633             :   // completely.
     634       79667 :   bool anyregs = false;
     635       79667 :   const MachineRegisterInfo &MRI = mf.getRegInfo();
     636      798494 :   for (unsigned Reg : *RC) {
     637      622681 :     if (MRI.isPhysRegUsed(Reg)) {
     638             :       anyregs = true;
     639             :       break;
     640             :     }
     641             :   }
     642       79667 :   if (!anyregs) return false;
     643             : 
     644             :   // Initialize the AliasMap on the first use.
     645      126376 :   if (AliasMap.empty()) {
     646             :     // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
     647             :     // therefore the LiveRegs array.
     648        5067 :     AliasMap.resize(TRI->getNumRegs());
     649      172278 :     for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
     650     1881928 :       for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
     651     3277424 :            AI.isValid(); ++AI)
     652     3115280 :         AliasMap[*AI].push_back(i);
     653             :   }
     654             : 
     655             :   // Initialize the MMBInfos
     656      373561 :   for (auto &MBB : mf) {
     657      183997 :     MBBInfo InitialInfo;
     658      551991 :     MBBInfos.insert(std::make_pair(&MBB, InitialInfo));
     659             :   }
     660             : 
     661             :   /*
     662             :    *  We want to visit every instruction in every basic block in order to update
     663             :    *  it's execution domain or break any false dependencies. However, for the
     664             :    *  dependency breaking, we need to know clearances from all predecessors
     665             :    *  (including any backedges). One way to do so would be to do two complete
     666             :    *  passes over all basic blocks/instructions, the first for recording
     667             :    *  clearances, the second to break the dependencies. However, for functions
     668             :    *  without backedges, or functions with a lot of straight-line code, and
     669             :    *  a small loop, that would be a lot of unnecessary work (since only the
     670             :    *  BBs that are part of the loop require two passes). As an example,
     671             :    *  consider the following loop.
     672             :    *
     673             :    *
     674             :    *     PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT
     675             :    *           ^                                  |
     676             :    *           +----------------------------------+
     677             :    *
     678             :    *  The iteration order is as follows:
     679             :    *  Naive: PH A B C D A' B' C' D'
     680             :    *  Optimized: PH A B C A' B' C' D
     681             :    *
     682             :    *  Note that we avoid processing D twice, because we can entirely process
     683             :    *  the predecessors before getting to D. We call a block that is ready
     684             :    *  for its second round of processing `done` (isBlockDone). Once we finish
     685             :    *  processing some block, we update the counters in MBBInfos and re-process
     686             :    *  any successors that are now done.
     687             :    */
     688             : 
     689      189564 :   MachineBasicBlock *Entry = &*MF->begin();
     690       63188 :   ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
     691      126376 :   SmallVector<MachineBasicBlock *, 4> Workqueue;
     692             :   for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
     693      310373 :          MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
     694      183997 :     MachineBasicBlock *MBB = *MBBI;
     695             :     // N.B: IncomingProcessed and IncomingCompleted were already updated while
     696             :     // processing this block's predecessors.
     697      367994 :     MBBInfos[MBB].PrimaryCompleted = true;
     698      551991 :     MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed;
     699      183997 :     bool Primary = true;
     700      183997 :     Workqueue.push_back(MBB);
     701      635913 :     while (!Workqueue.empty()) {
     702      451916 :       MachineBasicBlock *ActiveMBB = &*Workqueue.back();
     703      225958 :       Workqueue.pop_back();
     704      225958 :       processBasicBlock(ActiveMBB, Primary);
     705      225958 :       bool Done = isBlockDone(ActiveMBB);
     706      686402 :       for (auto *Succ : ActiveMBB->successors()) {
     707      234486 :         if (!isBlockDone(Succ)) {
     708      231371 :           if (Primary) {
     709      344306 :             MBBInfos[Succ].IncomingProcessed++;
     710             :           }
     711      231371 :           if (Done) {
     712      338076 :             MBBInfos[Succ].IncomingCompleted++;
     713             :           }
     714      231371 :           if (isBlockDone(Succ)) {
     715       41961 :             Workqueue.push_back(Succ);
     716             :           }
     717             :         }
     718             :       }
     719      225958 :       Primary = false;
     720             :     }
     721             :   }
     722             : 
     723             :   // We need to go through again and finalize any blocks that are not done yet.
     724             :   // This is possible if blocks have dead predecessors, so we didn't visit them
     725             :   // above.
     726             :   for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator
     727       63188 :            MBBI = RPOT.begin(),
     728             :            MBBE = RPOT.end();
     729      247185 :        MBBI != MBBE; ++MBBI) {
     730      183997 :     MachineBasicBlock *MBB = *MBBI;
     731      183997 :     if (!isBlockDone(MBB)) {
     732           0 :       processBasicBlock(MBB, false);
     733             :       // Don't update successors here. We'll get to them anyway through this
     734             :       // loop.
     735             :     }
     736             :   }
     737             : 
     738             :   // Clear the LiveOuts vectors and collapse any remaining DomainValues.
     739             :   for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
     740      310373 :          MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
     741      367994 :     auto FI = MBBInfos.find(*MBBI);
     742      551991 :     if (FI == MBBInfos.end() || !FI->second.OutRegs)
     743           0 :       continue;
     744     6071901 :     for (unsigned i = 0, e = NumRegs; i != e; ++i)
     745     5887904 :       if (FI->second.OutRegs[i].Value)
     746      468323 :         release(FI->second.OutRegs[i].Value);
     747      183997 :     delete[] FI->second.OutRegs;
     748             :   }
     749       63188 :   MBBInfos.clear();
     750      126376 :   UndefReads.clear();
     751      126376 :   Avail.clear();
     752       63188 :   Allocator.DestroyAll();
     753             : 
     754       63188 :   return false;
     755             : }

Generated by: LCOV version 1.13