LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineSink.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 244 315 77.5 %
Date: 2018-10-20 13:21:21 Functions: 27 30 90.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- MachineSink.cpp - Sinking for machine instructions -----------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This pass moves instructions into successor blocks when possible, so that
      11             : // they aren't executed on paths where their results aren't needed.
      12             : //
      13             : // This pass is not intended to be a replacement or a complete alternative
      14             : // for an LLVM-IR-level sinking pass. It is only designed to sink simple
      15             : // constructs that are not exposed before lowering and instruction selection.
      16             : //
      17             : //===----------------------------------------------------------------------===//
      18             : 
      19             : #include "llvm/ADT/SetVector.h"
      20             : #include "llvm/ADT/SmallSet.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/ADT/SparseBitVector.h"
      23             : #include "llvm/ADT/Statistic.h"
      24             : #include "llvm/Analysis/AliasAnalysis.h"
      25             : #include "llvm/CodeGen/MachineBasicBlock.h"
      26             : #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
      27             : #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
      28             : #include "llvm/CodeGen/MachineDominators.h"
      29             : #include "llvm/CodeGen/MachineFunction.h"
      30             : #include "llvm/CodeGen/MachineFunctionPass.h"
      31             : #include "llvm/CodeGen/MachineInstr.h"
      32             : #include "llvm/CodeGen/MachineLoopInfo.h"
      33             : #include "llvm/CodeGen/MachineOperand.h"
      34             : #include "llvm/CodeGen/MachinePostDominators.h"
      35             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      36             : #include "llvm/CodeGen/TargetInstrInfo.h"
      37             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      38             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      39             : #include "llvm/IR/BasicBlock.h"
      40             : #include "llvm/IR/LLVMContext.h"
      41             : #include "llvm/IR/DebugInfoMetadata.h"
      42             : #include "llvm/Pass.h"
      43             : #include "llvm/Support/BranchProbability.h"
      44             : #include "llvm/Support/CommandLine.h"
      45             : #include "llvm/Support/Debug.h"
      46             : #include "llvm/Support/raw_ostream.h"
      47             : #include <algorithm>
      48             : #include <cassert>
      49             : #include <cstdint>
      50             : #include <map>
      51             : #include <utility>
      52             : #include <vector>
      53             : 
      54             : using namespace llvm;
      55             : 
      56             : #define DEBUG_TYPE "machine-sink"
      57             : 
      58             : static cl::opt<bool>
      59             : SplitEdges("machine-sink-split",
      60             :            cl::desc("Split critical edges during machine sinking"),
      61             :            cl::init(true), cl::Hidden);
      62             : 
      63             : static cl::opt<bool>
      64             : UseBlockFreqInfo("machine-sink-bfi",
      65             :            cl::desc("Use block frequency info to find successors to sink"),
      66             :            cl::init(true), cl::Hidden);
      67             : 
      68             : static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
      69             :     "machine-sink-split-probability-threshold",
      70             :     cl::desc(
      71             :         "Percentage threshold for splitting single-instruction critical edge. "
      72             :         "If the branch threshold is higher than this threshold, we allow "
      73             :         "speculative execution of up to 1 instruction to avoid branching to "
      74             :         "splitted critical edge"),
      75             :     cl::init(40), cl::Hidden);
      76             : 
      77             : STATISTIC(NumSunk,      "Number of machine instructions sunk");
      78             : STATISTIC(NumSplit,     "Number of critical edges split");
      79             : STATISTIC(NumCoalesces, "Number of copies coalesced");
      80             : STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
      81             : 
      82             : namespace {
      83             : 
      84             :   class MachineSinking : public MachineFunctionPass {
      85             :     const TargetInstrInfo *TII;
      86             :     const TargetRegisterInfo *TRI;
      87             :     MachineRegisterInfo  *MRI;     // Machine register information
      88             :     MachineDominatorTree *DT;      // Machine dominator tree
      89             :     MachinePostDominatorTree *PDT; // Machine post dominator tree
      90             :     MachineLoopInfo *LI;
      91             :     const MachineBlockFrequencyInfo *MBFI;
      92             :     const MachineBranchProbabilityInfo *MBPI;
      93             :     AliasAnalysis *AA;
      94             : 
      95             :     // Remember which edges have been considered for breaking.
      96             :     SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
      97             :     CEBCandidates;
      98             :     // Remember which edges we are about to split.
      99             :     // This is different from CEBCandidates since those edges
     100             :     // will be split.
     101             :     SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
     102             : 
     103             :     SparseBitVector<> RegsToClearKillFlags;
     104             : 
     105             :     using AllSuccsCache =
     106             :         std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
     107             : 
     108             :   public:
     109             :     static char ID; // Pass identification
     110             : 
     111       20208 :     MachineSinking() : MachineFunctionPass(ID) {
     112       20208 :       initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
     113       20208 :     }
     114             : 
     115             :     bool runOnMachineFunction(MachineFunction &MF) override;
     116             : 
     117       20044 :     void getAnalysisUsage(AnalysisUsage &AU) const override {
     118       20044 :       AU.setPreservesCFG();
     119       20044 :       MachineFunctionPass::getAnalysisUsage(AU);
     120             :       AU.addRequired<AAResultsWrapperPass>();
     121             :       AU.addRequired<MachineDominatorTree>();
     122             :       AU.addRequired<MachinePostDominatorTree>();
     123             :       AU.addRequired<MachineLoopInfo>();
     124             :       AU.addRequired<MachineBranchProbabilityInfo>();
     125             :       AU.addPreserved<MachineDominatorTree>();
     126             :       AU.addPreserved<MachinePostDominatorTree>();
     127             :       AU.addPreserved<MachineLoopInfo>();
     128       20044 :       if (UseBlockFreqInfo)
     129             :         AU.addRequired<MachineBlockFrequencyInfo>();
     130       20044 :     }
     131             : 
     132      197763 :     void releaseMemory() override {
     133             :       CEBCandidates.clear();
     134      197763 :     }
     135             : 
     136             :   private:
     137             :     bool ProcessBlock(MachineBasicBlock &MBB);
     138             :     bool isWorthBreakingCriticalEdge(MachineInstr &MI,
     139             :                                      MachineBasicBlock *From,
     140             :                                      MachineBasicBlock *To);
     141             : 
     142             :     /// Postpone the splitting of the given critical
     143             :     /// edge (\p From, \p To).
     144             :     ///
     145             :     /// We do not split the edges on the fly. Indeed, this invalidates
     146             :     /// the dominance information and thus triggers a lot of updates
     147             :     /// of that information underneath.
     148             :     /// Instead, we postpone all the splits after each iteration of
     149             :     /// the main loop. That way, the information is at least valid
     150             :     /// for the lifetime of an iteration.
     151             :     ///
     152             :     /// \return True if the edge is marked as toSplit, false otherwise.
     153             :     /// False can be returned if, for instance, this is not profitable.
     154             :     bool PostponeSplitCriticalEdge(MachineInstr &MI,
     155             :                                    MachineBasicBlock *From,
     156             :                                    MachineBasicBlock *To,
     157             :                                    bool BreakPHIEdge);
     158             :     bool SinkInstruction(MachineInstr &MI, bool &SawStore,
     159             : 
     160             :                          AllSuccsCache &AllSuccessors);
     161             :     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
     162             :                                  MachineBasicBlock *DefMBB,
     163             :                                  bool &BreakPHIEdge, bool &LocalUse) const;
     164             :     MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
     165             :                bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
     166             :     bool isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
     167             :                               MachineBasicBlock *MBB,
     168             :                               MachineBasicBlock *SuccToSinkTo,
     169             :                               AllSuccsCache &AllSuccessors);
     170             : 
     171             :     bool PerformTrivialForwardCoalescing(MachineInstr &MI,
     172             :                                          MachineBasicBlock *MBB);
     173             : 
     174             :     SmallVector<MachineBasicBlock *, 4> &
     175             :     GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
     176             :                            AllSuccsCache &AllSuccessors) const;
     177             :   };
     178             : 
     179             : } // end anonymous namespace
     180             : 
     181             : char MachineSinking::ID = 0;
     182             : 
     183             : char &llvm::MachineSinkingID = MachineSinking::ID;
     184             : 
     185       31780 : INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
     186             :                       "Machine code sinking", false, false)
     187       31780 : INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
     188       31780 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     189       31780 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     190       31780 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     191      105355 : INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
     192             :                     "Machine code sinking", false, false)
     193             : 
     194           0 : bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
     195             :                                                      MachineBasicBlock *MBB) {
     196           0 :   if (!MI.isCopy())
     197           0 :     return false;
     198             : 
     199           0 :   unsigned SrcReg = MI.getOperand(1).getReg();
     200           0 :   unsigned DstReg = MI.getOperand(0).getReg();
     201           0 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
     202           0 :       !TargetRegisterInfo::isVirtualRegister(DstReg) ||
     203           0 :       !MRI->hasOneNonDBGUse(SrcReg))
     204           0 :     return false;
     205             : 
     206           0 :   const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
     207             :   const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
     208           0 :   if (SRC != DRC)
     209           0 :     return false;
     210             : 
     211           0 :   MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
     212             :   if (DefMI->isCopyLike())
     213           0 :     return false;
     214             :   LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
     215             :   LLVM_DEBUG(dbgs() << "*** to: " << MI);
     216           0 :   MRI->replaceRegWith(DstReg, SrcReg);
     217           0 :   MI.eraseFromParent();
     218             : 
     219             :   // Conservatively, clear any kill flags, since it's possible that they are no
     220             :   // longer correct.
     221           0 :   MRI->clearKillFlags(SrcReg);
     222             : 
     223             :   ++NumCoalesces;
     224           0 :   return true;
     225             : }
     226             : 
     227             : /// AllUsesDominatedByBlock - Return true if all uses of the specified register
     228             : /// occur in blocks dominated by the specified block. If any use is in the
     229             : /// definition block, then return false since it is never legal to move def
     230             : /// after uses.
     231             : bool
     232           0 : MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
     233             :                                         MachineBasicBlock *MBB,
     234             :                                         MachineBasicBlock *DefMBB,
     235             :                                         bool &BreakPHIEdge,
     236             :                                         bool &LocalUse) const {
     237             :   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
     238             :          "Only makes sense for vregs");
     239             : 
     240             :   // Ignore debug uses because debug info doesn't affect the code.
     241           0 :   if (MRI->use_nodbg_empty(Reg))
     242           0 :     return true;
     243             : 
     244             :   // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
     245             :   // into and they are all PHI nodes. In this case, machine-sink must break
     246             :   // the critical edge first. e.g.
     247             :   //
     248             :   // %bb.1: derived from LLVM BB %bb4.preheader
     249             :   //   Predecessors according to CFG: %bb.0
     250             :   //     ...
     251             :   //     %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags
     252             :   //     ...
     253             :   //     JE_4 <%bb.37>, implicit %eflags
     254             :   //   Successors according to CFG: %bb.37 %bb.2
     255             :   //
     256             :   // %bb.2: derived from LLVM BB %bb.nph
     257             :   //   Predecessors according to CFG: %bb.0 %bb.1
     258             :   //     %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1
     259           0 :   BreakPHIEdge = true;
     260           0 :   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
     261           0 :     MachineInstr *UseInst = MO.getParent();
     262           0 :     unsigned OpNo = &MO - &UseInst->getOperand(0);
     263           0 :     MachineBasicBlock *UseBlock = UseInst->getParent();
     264           0 :     if (!(UseBlock == MBB && UseInst->isPHI() &&
     265           0 :           UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) {
     266           0 :       BreakPHIEdge = false;
     267           0 :       break;
     268             :     }
     269             :   }
     270           0 :   if (BreakPHIEdge)
     271           0 :     return true;
     272             : 
     273           0 :   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
     274             :     // Determine the block of the use.
     275           0 :     MachineInstr *UseInst = MO.getParent();
     276           0 :     unsigned OpNo = &MO - &UseInst->getOperand(0);
     277           0 :     MachineBasicBlock *UseBlock = UseInst->getParent();
     278             :     if (UseInst->isPHI()) {
     279             :       // PHI nodes use the operand in the predecessor block, not the block with
     280             :       // the PHI.
     281           0 :       UseBlock = UseInst->getOperand(OpNo+1).getMBB();
     282           0 :     } else if (UseBlock == DefMBB) {
     283           0 :       LocalUse = true;
     284           0 :       return false;
     285             :     }
     286             : 
     287             :     // Check that it dominates.
     288           0 :     if (!DT->dominates(MBB, UseBlock))
     289           0 :       return false;
     290             :   }
     291             : 
     292             :   return true;
     293             : }
     294             : 
     295      197746 : bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
     296      197746 :   if (skipFunction(MF.getFunction()))
     297             :     return false;
     298             : 
     299             :   LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
     300             : 
     301      197560 :   TII = MF.getSubtarget().getInstrInfo();
     302      197560 :   TRI = MF.getSubtarget().getRegisterInfo();
     303      197560 :   MRI = &MF.getRegInfo();
     304      197560 :   DT = &getAnalysis<MachineDominatorTree>();
     305      197560 :   PDT = &getAnalysis<MachinePostDominatorTree>();
     306      197560 :   LI = &getAnalysis<MachineLoopInfo>();
     307      197560 :   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
     308      197560 :   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
     309      197560 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     310             : 
     311             :   bool EverMadeChange = false;
     312             : 
     313             :   while (true) {
     314             :     bool MadeChange = false;
     315             : 
     316             :     // Process all basic blocks.
     317             :     CEBCandidates.clear();
     318             :     ToSplit.clear();
     319     1059627 :     for (auto &MBB: MF)
     320      852066 :       MadeChange |= ProcessBlock(MBB);
     321             : 
     322             :     // If we have anything we marked as toSplit, split it now.
     323      236524 :     for (auto &Pair : ToSplit) {
     324       28963 :       auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
     325       28963 :       if (NewSucc != nullptr) {
     326             :         LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
     327             :                           << printMBBReference(*Pair.first) << " -- "
     328             :                           << printMBBReference(*NewSucc) << " -- "
     329             :                           << printMBBReference(*Pair.second) << '\n');
     330             :         MadeChange = true;
     331             :         ++NumSplit;
     332             :       } else
     333             :         LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
     334             :     }
     335             :     // If this iteration over the code changed anything, keep iterating.
     336      207561 :     if (!MadeChange) break;
     337             :     EverMadeChange = true;
     338             :   }
     339             : 
     340             :   // Now clear any kill flags for recorded registers.
     341      197560 :   for (auto I : RegsToClearKillFlags)
     342       23578 :     MRI->clearKillFlags(I);
     343             :   RegsToClearKillFlags.clear();
     344             : 
     345      197559 :   return EverMadeChange;
     346             : }
     347             : 
     348      852066 : bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     349             :   // Can't sink anything out of a block that has less than two successors.
     350      852066 :   if (MBB.succ_size() <= 1 || MBB.empty()) return false;
     351             : 
     352             :   // Don't bother sinking code out of unreachable blocks. In addition to being
     353             :   // unprofitable, it can also lead to infinite looping, because in an
     354             :   // unreachable loop there may be nowhere to stop.
     355      596134 :   if (!DT->isReachableFromEntry(&MBB)) return false;
     356             : 
     357             :   bool MadeChange = false;
     358             : 
     359             :   // Cache all successors, sorted by frequency info and loop depth.
     360             :   AllSuccsCache AllSuccessors;
     361             : 
     362             :   // Walk the basic block bottom-up.  Remember if we saw a store.
     363      298067 :   MachineBasicBlock::iterator I = MBB.end();
     364             :   --I;
     365      298067 :   bool ProcessedBegin, SawStore = false;
     366             :   do {
     367             :     MachineInstr &MI = *I;  // The instruction to sink.
     368             : 
     369             :     // Predecrement I (if it's not begin) so that it isn't invalidated by
     370             :     // sinking.
     371             :     ProcessedBegin = I == MBB.begin();
     372     3936471 :     if (!ProcessedBegin)
     373             :       --I;
     374             : 
     375             :     if (MI.isDebugInstr())
     376             :       continue;
     377             : 
     378     3686156 :     bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
     379     3686156 :     if (Joined) {
     380             :       MadeChange = true;
     381             :       continue;
     382             :     }
     383             : 
     384     3685625 :     if (SinkInstruction(MI, SawStore, AllSuccessors)) {
     385             :       ++NumSunk;
     386             :       MadeChange = true;
     387             :     }
     388             : 
     389             :     // If we just processed the first instruction in the block, we're done.
     390     3936471 :   } while (!ProcessedBegin);
     391             : 
     392             :   return MadeChange;
     393             : }
     394             : 
     395       39513 : bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
     396             :                                                  MachineBasicBlock *From,
     397             :                                                  MachineBasicBlock *To) {
     398             :   // FIXME: Need much better heuristics.
     399             : 
     400             :   // If the pass has already considered breaking this edge (during this pass
     401             :   // through the function), then let's go ahead and break it. This means
     402             :   // sinking multiple "cheap" instructions into the same block.
     403       39513 :   if (!CEBCandidates.insert(std::make_pair(From, To)).second)
     404             :     return true;
     405             : 
     406       35043 :   if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
     407             :     return true;
     408             : 
     409        5816 :   if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
     410             :       BranchProbability(SplitEdgeProbabilityThreshold, 100))
     411         892 :     return true;
     412             : 
     413             :   // MI is cheap, we probably don't want to break the critical edge for it.
     414             :   // However, if this would allow some definitions of its source operands
     415             :   // to be sunk then it's probably worth it.
     416       14010 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     417        9905 :     const MachineOperand &MO = MI.getOperand(i);
     418        9905 :     if (!MO.isReg() || !MO.isUse())
     419             :       continue;
     420        2539 :     unsigned Reg = MO.getReg();
     421        2539 :     if (Reg == 0)
     422             :       continue;
     423             : 
     424             :     // We don't move live definitions of physical registers,
     425             :     // so sinking their uses won't enable any opportunities.
     426        2510 :     if (TargetRegisterInfo::isPhysicalRegister(Reg))
     427             :       continue;
     428             : 
     429             :     // If this instruction is the only user of a virtual register,
     430             :     // check if breaking the edge will enable sinking
     431             :     // both this instruction and the defining instruction.
     432        2489 :     if (MRI->hasOneNonDBGUse(Reg)) {
     433             :       // If the definition resides in same MBB,
     434             :       // claim it's likely we can sink these together.
     435             :       // If definition resides elsewhere, we aren't
     436             :       // blocking it from being sunk so don't break the edge.
     437         859 :       MachineInstr *DefMI = MRI->getVRegDef(Reg);
     438         859 :       if (DefMI->getParent() == MI.getParent())
     439             :         return true;
     440             :     }
     441             :   }
     442             : 
     443             :   return false;
     444             : }
     445             : 
     446       39513 : bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
     447             :                                                MachineBasicBlock *FromBB,
     448             :                                                MachineBasicBlock *ToBB,
     449             :                                                bool BreakPHIEdge) {
     450       39513 :   if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
     451             :     return false;
     452             : 
     453             :   // Avoid breaking back edge. From == To means backedge for single BB loop.
     454       35408 :   if (!SplitEdges || FromBB == ToBB)
     455             :     return false;
     456             : 
     457             :   // Check for backedges of more "complex" loops.
     458      141251 :   if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
     459       35027 :       LI->isLoopHeader(ToBB))
     460             :     return false;
     461             : 
     462             :   // It's not always legal to break critical edges and sink the computation
     463             :   // to the edge.
     464             :   //
     465             :   // %bb.1:
     466             :   // v1024
     467             :   // Beq %bb.3
     468             :   // <fallthrough>
     469             :   // %bb.2:
     470             :   // ... no uses of v1024
     471             :   // <fallthrough>
     472             :   // %bb.3:
     473             :   // ...
     474             :   //       = v1024
     475             :   //
     476             :   // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
     477             :   //
     478             :   // %bb.1:
     479             :   // ...
     480             :   // Bne %bb.2
     481             :   // %bb.4:
     482             :   // v1024 =
     483             :   // B %bb.3
     484             :   // %bb.2:
     485             :   // ... no uses of v1024
     486             :   // <fallthrough>
     487             :   // %bb.3:
     488             :   // ...
     489             :   //       = v1024
     490             :   //
     491             :   // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
     492             :   // flow. We need to ensure the new basic block where the computation is
     493             :   // sunk to dominates all the uses.
     494             :   // It's only legal to break critical edge and sink the computation to the
     495             :   // new block if all the predecessors of "To", except for "From", are
     496             :   // not dominated by "From". Given SSA property, this means these
     497             :   // predecessors are dominated by "To".
     498             :   //
     499             :   // There is no need to do this check if all the uses are PHI nodes. PHI
     500             :   // sources are only defined on the specific predecessor edges.
     501       32658 :   if (!BreakPHIEdge) {
     502             :     for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
     503         723 :            E = ToBB->pred_end(); PI != E; ++PI) {
     504         720 :       if (*PI == FromBB)
     505             :         continue;
     506         872 :       if (!DT->dominates(ToBB, *PI))
     507             :         return false;
     508             :     }
     509             :   }
     510             : 
     511       32249 :   ToSplit.insert(std::make_pair(FromBB, ToBB));
     512             : 
     513       32249 :   return true;
     514             : }
     515             : 
     516             : /// isProfitableToSinkTo - Return true if it is profitable to sink MI.
     517      230523 : bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
     518             :                                           MachineBasicBlock *MBB,
     519             :                                           MachineBasicBlock *SuccToSinkTo,
     520             :                                           AllSuccsCache &AllSuccessors) {
     521             :   assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
     522             : 
     523      230523 :   if (MBB == SuccToSinkTo)
     524             :     return false;
     525             : 
     526             :   // It is profitable if SuccToSinkTo does not post dominate current block.
     527      225002 :   if (!PDT->dominates(SuccToSinkTo, MBB))
     528             :     return true;
     529             : 
     530             :   // It is profitable to sink an instruction from a deeper loop to a shallower
     531             :   // loop, even if the latter post-dominates the former (PR21115).
     532       42685 :   if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
     533             :     return true;
     534             : 
     535             :   // Check if only use in post dominated block is PHI instruction.
     536             :   bool NonPHIUse = false;
     537       87249 :   for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
     538       49784 :     MachineBasicBlock *UseBlock = UseInst.getParent();
     539       49784 :     if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
     540             :       NonPHIUse = true;
     541             :   }
     542       37465 :   if (!NonPHIUse)
     543             :     return true;
     544             : 
     545             :   // If SuccToSinkTo post dominates then also it may be profitable if MI
     546             :   // can further profitably sinked into another block in next round.
     547       10116 :   bool BreakPHIEdge = false;
     548             :   // FIXME - If finding successor is compile time expensive then cache results.
     549       10116 :   if (MachineBasicBlock *MBB2 =
     550       10116 :           FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
     551           0 :     return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
     552             : 
     553             :   // If SuccToSinkTo is final destination and it is a post dominator of current
     554             :   // block then it is not profitable to sink MI into SuccToSinkTo block.
     555             :   return false;
     556             : }
     557             : 
     558             : /// Get the sorted sequence of successors for this MachineBasicBlock, possibly
     559             : /// computing it if it was not already cached.
     560             : SmallVector<MachineBasicBlock *, 4> &
     561      955498 : MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
     562             :                                        AllSuccsCache &AllSuccessors) const {
     563             :   // Do we have the sorted successors in cache ?
     564             :   auto Succs = AllSuccessors.find(MBB);
     565      955498 :   if (Succs != AllSuccessors.end())
     566      677969 :     return Succs->second;
     567             : 
     568             :   SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->succ_begin(),
     569      277529 :                                                MBB->succ_end());
     570             : 
     571             :   // Handle cases where sinking can happen but where the sink point isn't a
     572             :   // successor. For example:
     573             :   //
     574             :   //   x = computation
     575             :   //   if () {} else {}
     576             :   //   use x
     577             :   //
     578             :   const std::vector<MachineDomTreeNode *> &Children =
     579      277529 :     DT->getNode(MBB)->getChildren();
     580      857609 :   for (const auto &DTChild : Children)
     581             :     // DomTree children of MBB that have MBB as immediate dominator are added.
     582     1152412 :     if (DTChild->getIDom()->getBlock() == MI.getParent() &&
     583             :         // Skip MBBs already added to the AllSuccs vector above.
     584      572332 :         !MBB->isSuccessor(DTChild->getBlock()))
     585       87738 :       AllSuccs.push_back(DTChild->getBlock());
     586             : 
     587             :   // Sort Successors according to their loop depth or block frequency info.
     588             :   std::stable_sort(
     589             :       AllSuccs.begin(), AllSuccs.end(),
     590             :       [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
     591             :         uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
     592             :         uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
     593             :         bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
     594             :         return HasBlockFreq ? LHSFreq < RHSFreq
     595             :                             : LI->getLoopDepth(L) < LI->getLoopDepth(R);
     596             :       });
     597             : 
     598      277529 :   auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
     599             : 
     600      277529 :   return it.first->second;
     601             : }
     602             : 
     603             : /// FindSuccToSinkTo - Find a successor to sink this instruction to.
     604             : MachineBasicBlock *
     605     1639220 : MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
     606             :                                  bool &BreakPHIEdge,
     607             :                                  AllSuccsCache &AllSuccessors) {
     608             :   assert (MBB && "Invalid MachineBasicBlock!");
     609             : 
     610             :   // Loop over all the operands of the specified instruction.  If there is
     611             :   // anything we can't handle, bail out.
     612             : 
     613             :   // SuccToSinkTo - This is the successor to sink this instruction to, once we
     614             :   // decide.
     615             :   MachineBasicBlock *SuccToSinkTo = nullptr;
     616     4473452 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     617     4360919 :     const MachineOperand &MO = MI.getOperand(i);
     618     4360919 :     if (!MO.isReg()) continue;  // Ignore non-register operands.
     619             : 
     620     3252488 :     unsigned Reg = MO.getReg();
     621     3252488 :     if (Reg == 0) continue;
     622             : 
     623     3037178 :     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
     624     1758897 :       if (MO.isUse()) {
     625             :         // If the physreg has no defs anywhere, it's just an ambient register
     626             :         // and we can freely move its uses. Alternatively, if it's allocatable,
     627             :         // it could get allocated to something with a def during allocation.
     628      392095 :         if (!MRI->isConstantPhysReg(Reg))
     629             :           return nullptr;
     630     1366802 :       } else if (!MO.isDead()) {
     631             :         // A def that isn't dead. We can't move it.
     632             :         return nullptr;
     633             :       }
     634             :     } else {
     635             :       // Virtual register uses are always safe to sink.
     636     1278281 :       if (MO.isUse()) continue;
     637             : 
     638             :       // If it's not safe to move defs of the register class, then abort.
     639     1912656 :       if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
     640             :         return nullptr;
     641             : 
     642             :       // Virtual register defs can only be sunk if all their uses are in blocks
     643             :       // dominated by one of the successors.
     644      955515 :       if (SuccToSinkTo) {
     645             :         // If a previous operand picked a block to sink to, then this operand
     646             :         // must be sinkable to the same block.
     647          17 :         bool LocalUse = false;
     648          17 :         if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
     649             :                                      BreakPHIEdge, LocalUse))
     650          13 :           return nullptr;
     651             : 
     652           4 :         continue;
     653             :       }
     654             : 
     655             :       // Otherwise, we should look at all the successors and decide which one
     656             :       // we should sink to. If we have reliable block frequency information
     657             :       // (frequency != 0) available, give successors with smaller frequencies
     658             :       // higher priority, otherwise prioritize smaller loop depths.
     659      192201 :       for (MachineBasicBlock *SuccBlock :
     660     2103197 :            GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
     661     1130282 :         bool LocalUse = false;
     662     1130282 :         if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
     663             :                                     BreakPHIEdge, LocalUse)) {
     664             :           SuccToSinkTo = SuccBlock;
     665      230523 :           break;
     666             :         }
     667      899759 :         if (LocalUse)
     668             :           // Def is used locally, it's never safe to move this def.
     669      707558 :           return nullptr;
     670             :       }
     671             : 
     672             :       // If we couldn't find a block to sink to, ignore this instruction.
     673      247940 :       if (!SuccToSinkTo)
     674             :         return nullptr;
     675      230523 :       if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
     676             :         return nullptr;
     677             :     }
     678             :   }
     679             : 
     680             :   // It is not possible to sink an instruction into its own block.  This can
     681             :   // happen with loops.
     682      112533 :   if (MBB == SuccToSinkTo)
     683             :     return nullptr;
     684             : 
     685             :   // It's not safe to sink instructions to EH landing pad. Control flow into
     686             :   // landing pad is implicitly defined.
     687      112533 :   if (SuccToSinkTo && SuccToSinkTo->isEHPad())
     688       13591 :     return nullptr;
     689             : 
     690             :   return SuccToSinkTo;
     691             : }
     692             : 
     693             : /// Return true if MI is likely to be usable as a memory operation by the
     694             : /// implicit null check optimization.
     695             : ///
     696             : /// This is a "best effort" heuristic, and should not be relied upon for
     697             : /// correctness.  This returning true does not guarantee that the implicit null
     698             : /// check optimization is legal over MI, and this returning false does not
     699             : /// guarantee MI cannot possibly be used to do a null check.
     700     1629108 : static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
     701             :                                              const TargetInstrInfo *TII,
     702             :                                              const TargetRegisterInfo *TRI) {
     703             :   using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
     704             : 
     705     1629108 :   auto *MBB = MI.getParent();
     706     1629108 :   if (MBB->pred_size() != 1)
     707             :     return false;
     708             : 
     709      966645 :   auto *PredMBB = *MBB->pred_begin();
     710      966645 :   auto *PredBB = PredMBB->getBasicBlock();
     711             : 
     712             :   // Frontends that don't use implicit null checks have no reason to emit
     713             :   // branches with make.implicit metadata, and this function should always
     714             :   // return false for them.
     715     1907397 :   if (!PredBB ||
     716      966504 :       !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
     717      966634 :     return false;
     718             : 
     719             :   unsigned BaseReg;
     720             :   int64_t Offset;
     721          11 :   if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
     722             :     return false;
     723             : 
     724           8 :   if (!(MI.mayLoad() && !MI.isPredicable()))
     725           0 :     return false;
     726             : 
     727             :   MachineBranchPredicate MBP;
     728           4 :   if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
     729             :     return false;
     730             : 
     731           4 :   return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
     732           4 :          (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
     733           4 :           MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
     734           4 :          MBP.LHS.getReg() == BaseReg;
     735             : }
     736             : 
     737             : /// Sink an instruction and its associated debug instructions.
     738       62959 : static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
     739             :                         MachineBasicBlock::iterator InsertPos) {
     740             :   // Collect matching debug values.
     741             :   SmallVector<MachineInstr *, 2> DbgValuesToSink;
     742       62959 :   MI.collectDebugValues(DbgValuesToSink);
     743             : 
     744             :   // If we cannot find a location to use (merge with), then we erase the debug
     745             :   // location to prevent debug-info driven tools from potentially reporting
     746             :   // wrong location information.
     747       62959 :   if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
     748      122648 :     MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
     749             :                                                  InsertPos->getDebugLoc()));
     750             :   else
     751        3270 :     MI.setDebugLoc(DebugLoc());
     752             : 
     753             :   // Move the instruction.
     754       62959 :   MachineBasicBlock *ParentBlock = MI.getParent();
     755             :   SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
     756             :                       ++MachineBasicBlock::iterator(MI));
     757             : 
     758             :   // Move previously adjacent debug value instructions to the insert position.
     759        7130 :   for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
     760             :                                                  DBE = DbgValuesToSink.end();
     761       70089 :        DBI != DBE; ++DBI) {
     762        7130 :     MachineInstr *DbgMI = *DBI;
     763             :     SuccToSinkTo.splice(InsertPos, ParentBlock, DbgMI,
     764             :                         ++MachineBasicBlock::iterator(DbgMI));
     765             :   }
     766       62959 : }
     767             : 
     768             : /// SinkInstruction - Determine whether it is safe to sink the specified machine
     769             : /// instruction out of its current block into a successor.
     770     3685625 : bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
     771             :                                      AllSuccsCache &AllSuccessors) {
     772             :   // Don't sink instructions that the target prefers not to sink.
     773     3685625 :   if (!TII->shouldSink(MI))
     774             :     return false;
     775             : 
     776             :   // Check if it's safe to move the instruction.
     777     3685615 :   if (!MI.isSafeToMove(AA, SawStore))
     778             :     return false;
     779             : 
     780             :   // Convergent operations may not be made control-dependent on additional
     781             :   // values.
     782     1629198 :   if (MI.isConvergent())
     783             :     return false;
     784             : 
     785             :   // Don't break implicit null checks.  This is a performance heuristic, and not
     786             :   // required for correctness.
     787     1629108 :   if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
     788             :     return false;
     789             : 
     790             :   // FIXME: This should include support for sinking instructions within the
     791             :   // block they are currently in to shorten the live ranges.  We often get
     792             :   // instructions sunk into the top of a large block, but it would be better to
     793             :   // also sink them down before their first use in the block.  This xform has to
     794             :   // be careful not to *increase* register pressure though, e.g. sinking
     795             :   // "x = y + z" down if it kills y and z would increase the live ranges of y
     796             :   // and z and only shrink the live range of x.
     797             : 
     798     1629104 :   bool BreakPHIEdge = false;
     799     1629104 :   MachineBasicBlock *ParentBlock = MI.getParent();
     800             :   MachineBasicBlock *SuccToSinkTo =
     801     1629104 :       FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
     802             : 
     803             :   // If there are no outputs, it must have side-effects.
     804     1629104 :   if (!SuccToSinkTo)
     805             :     return false;
     806             : 
     807             :   // If the instruction to move defines a dead physical register which is live
     808             :   // when leaving the basic block, don't move it because it could turn into a
     809             :   // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
     810      558071 :   for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
     811      459202 :     const MachineOperand &MO = MI.getOperand(I);
     812      459202 :     if (!MO.isReg()) continue;
     813      318152 :     unsigned Reg = MO.getReg();
     814      318152 :     if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     815       36316 :     if (SuccToSinkTo->isLiveIn(Reg))
     816             :       return false;
     817             :   }
     818             : 
     819             :   LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
     820             : 
     821             :   // If the block has multiple predecessors, this is a critical edge.
     822             :   // Decide if we can sink along it or need to break the edge.
     823       98869 :   if (SuccToSinkTo->pred_size() > 1) {
     824             :     // We cannot sink a load across a critical edge - there may be stores in
     825             :     // other code paths.
     826             :     bool TryBreak = false;
     827       43801 :     bool store = true;
     828       43801 :     if (!MI.isSafeToMove(AA, store)) {
     829             :       LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
     830             :       TryBreak = true;
     831             :     }
     832             : 
     833             :     // We don't want to sink across a critical edge if we don't dominate the
     834             :     // successor. We could be introducing calculations to new code paths.
     835       86618 :     if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
     836             :       LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
     837             :       TryBreak = true;
     838             :     }
     839             : 
     840             :     // Don't sink instructions into a loop.
     841       28876 :     if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
     842             :       LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");
     843             :       TryBreak = true;
     844             :     }
     845             : 
     846             :     // Otherwise we are OK with sinking along a critical edge.
     847       43793 :     if (!TryBreak)
     848             :       LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
     849             :     else {
     850             :       // Mark this edge as to be split.
     851             :       // If the edge can actually be split, the next iteration of the main loop
     852             :       // will sink MI in the newly created block.
     853             :       bool Status =
     854       15425 :         PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
     855             :       if (!Status)
     856             :         LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
     857             :                              "break critical edge\n");
     858             :       // The instruction will not be sunk this time.
     859       15425 :       return false;
     860             :     }
     861             :   }
     862             : 
     863       83444 :   if (BreakPHIEdge) {
     864             :     // BreakPHIEdge is true if all the uses are in the successor MBB being
     865             :     // sunken into and they are all PHI nodes. In this case, machine-sink must
     866             :     // break the critical edge first.
     867       24088 :     bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
     868             :                                             SuccToSinkTo, BreakPHIEdge);
     869             :     if (!Status)
     870             :       LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
     871             :                            "break critical edge\n");
     872             :     // The instruction will not be sunk this time.
     873       24088 :     return false;
     874             :   }
     875             : 
     876             :   // Determine where to insert into. Skip phi nodes.
     877             :   MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
     878       64552 :   while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
     879             :     ++InsertPos;
     880             : 
     881       59356 :   performSink(MI, *SuccToSinkTo, InsertPos);
     882             : 
     883             :   // Conservatively, clear any kill flags, since it's possible that they are no
     884             :   // longer correct.
     885             :   // Note that we have to clear the kill flags for any register this instruction
     886             :   // uses as we may sink over another instruction which currently kills the
     887             :   // used registers.
     888      319985 :   for (MachineOperand &MO : MI.operands()) {
     889      260629 :     if (MO.isReg() && MO.isUse())
     890      118016 :       RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
     891             :   }
     892             : 
     893             :   return true;
     894             : }
     895             : 
     896             : //===----------------------------------------------------------------------===//
     897             : // This pass is not intended to be a replacement or a complete alternative
     898             : // for the pre-ra machine sink pass. It is only designed to sink COPY
     899             : // instructions which should be handled after RA.
     900             : //
     901             : // This pass sinks COPY instructions into a successor block, if the COPY is not
     902             : // used in the current block and the COPY is live-in to a single successor
     903             : // (i.e., doesn't require the COPY to be duplicated).  This avoids executing the
     904             : // copy on paths where their results aren't needed.  This also exposes
     905             : // additional opportunites for dead copy elimination and shrink wrapping.
     906             : //
     907             : // These copies were either not handled by or are inserted after the MachineSink
     908             : // pass. As an example of the former case, the MachineSink pass cannot sink
     909             : // COPY instructions with allocatable source registers; for AArch64 these type
     910             : // of copy instructions are frequently used to move function parameters (PhyReg)
     911             : // into virtual registers in the entry block.
     912             : //
     913             : // For the machine IR below, this pass will sink %w19 in the entry into its
     914             : // successor (%bb.1) because %w19 is only live-in in %bb.1.
     915             : // %bb.0:
     916             : //   %wzr = SUBSWri %w1, 1
     917             : //   %w19 = COPY %w0
     918             : //   Bcc 11, %bb.2
     919             : // %bb.1:
     920             : //   Live Ins: %w19
     921             : //   BL @fun
     922             : //   %w0 = ADDWrr %w0, %w19
     923             : //   RET %w0
     924             : // %bb.2:
     925             : //   %w0 = COPY %wzr
     926             : //   RET %w0
     927             : // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
     928             : // able to see %bb.0 as a candidate.
     929             : //===----------------------------------------------------------------------===//
     930             : namespace {
     931             : 
     932             : class PostRAMachineSinking : public MachineFunctionPass {
     933             : public:
     934             :   bool runOnMachineFunction(MachineFunction &MF) override;
     935             : 
     936             :   static char ID;
     937       19695 :   PostRAMachineSinking() : MachineFunctionPass(ID) {}
     938       19548 :   StringRef getPassName() const override { return "PostRA Machine Sink"; }
     939             : 
     940       19532 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     941       19532 :     AU.setPreservesCFG();
     942       19532 :     MachineFunctionPass::getAnalysisUsage(AU);
     943       19532 :   }
     944             : 
     945       19535 :   MachineFunctionProperties getRequiredProperties() const override {
     946       19535 :     return MachineFunctionProperties().set(
     947       19535 :         MachineFunctionProperties::Property::NoVRegs);
     948             :   }
     949             : 
     950             : private:
     951             :   /// Track which register units have been modified and used.
     952             :   LiveRegUnits ModifiedRegUnits, UsedRegUnits;
     953             : 
     954             :   /// Sink Copy instructions unused in the same block close to their uses in
     955             :   /// successors.
     956             :   bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
     957             :                      const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
     958             : };
     959             : } // namespace
     960             : 
     961             : char PostRAMachineSinking::ID = 0;
     962             : char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
     963             : 
     964       85147 : INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
     965             :                 "PostRA Machine Sink", false, false)
     966             : 
     967       15125 : static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
     968             :                                   const TargetRegisterInfo *TRI) {
     969             :   LiveRegUnits LiveInRegUnits(*TRI);
     970       15125 :   LiveInRegUnits.addLiveIns(MBB);
     971       15125 :   return !LiveInRegUnits.available(Reg);
     972             : }
     973             : 
     974             : static MachineBasicBlock *
     975        8165 : getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
     976             :                       const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
     977             :                       unsigned Reg, const TargetRegisterInfo *TRI) {
     978             :   // Try to find a single sinkable successor in which Reg is live-in.
     979             :   MachineBasicBlock *BB = nullptr;
     980       17334 :   for (auto *SI : SinkableBBs) {
     981       11368 :     if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) {
     982             :       // If BB is set here, Reg is live-in to at least two sinkable successors,
     983             :       // so quit.
     984        9163 :       if (BB)
     985        2199 :         return nullptr;
     986             :       BB = SI;
     987             :     }
     988             :   }
     989             :   // Reg is not live-in to any sinkable successors.
     990        5966 :   if (!BB)
     991             :     return nullptr;
     992             : 
     993             :   // Check if any register aliased with Reg is live-in in other successors.
     994       12333 :   for (auto *SI : CurBB.successors()) {
     995        8712 :     if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI))
     996             :       return nullptr;
     997             :   }
     998             :   return BB;
     999             : }
    1000             : 
    1001             : static MachineBasicBlock *
    1002        8147 : getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
    1003             :                       const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
    1004             :                       ArrayRef<unsigned> DefedRegsInCopy,
    1005             :                       const TargetRegisterInfo *TRI) {
    1006             :   MachineBasicBlock *SingleBB = nullptr;
    1007       11768 :   for (auto DefReg : DefedRegsInCopy) {
    1008             :     MachineBasicBlock *BB =
    1009        8165 :         getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
    1010        8165 :     if (!BB || (SingleBB && SingleBB != BB))
    1011             :       return nullptr;
    1012             :     SingleBB = BB;
    1013             :   }
    1014             :   return SingleBB;
    1015             : }
    1016             : 
    1017        3603 : static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
    1018             :                            SmallVectorImpl<unsigned> &UsedOpsInCopy,
    1019             :                            LiveRegUnits &UsedRegUnits,
    1020             :                            const TargetRegisterInfo *TRI) {
    1021        7208 :   for (auto U : UsedOpsInCopy) {
    1022        3605 :     MachineOperand &MO = MI->getOperand(U);
    1023        3605 :     unsigned SrcReg = MO.getReg();
    1024        3605 :     if (!UsedRegUnits.available(SrcReg)) {
    1025        1114 :       MachineBasicBlock::iterator NI = std::next(MI->getIterator());
    1026       10557 :       for (MachineInstr &UI : make_range(NI, CurBB.end())) {
    1027        9475 :         if (UI.killsRegister(SrcReg, TRI)) {
    1028          32 :           UI.clearRegisterKills(SrcReg, TRI);
    1029             :           MO.setIsKill(true);
    1030             :           break;
    1031             :         }
    1032             :       }
    1033             :     }
    1034             :   }
    1035        3603 : }
    1036             : 
    1037        3603 : static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
    1038             :                          SmallVectorImpl<unsigned> &UsedOpsInCopy,
    1039             :                          SmallVectorImpl<unsigned> &DefedRegsInCopy) {
    1040        3603 :   MachineFunction &MF = *SuccBB->getParent();
    1041        3603 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    1042        7224 :   for (unsigned DefReg : DefedRegsInCopy)
    1043       27069 :     for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
    1044       19827 :       SuccBB->removeLiveIn(*S);
    1045        7208 :   for (auto U : UsedOpsInCopy) {
    1046        3605 :     unsigned Reg = MI->getOperand(U).getReg();
    1047        3605 :     if (!SuccBB->isLiveIn(Reg))
    1048             :       SuccBB->addLiveIn(Reg);
    1049             :   }
    1050        3603 : }
    1051             : 
    1052       17199 : static bool hasRegisterDependency(MachineInstr *MI,
    1053             :                                   SmallVectorImpl<unsigned> &UsedOpsInCopy,
    1054             :                                   SmallVectorImpl<unsigned> &DefedRegsInCopy,
    1055             :                                   LiveRegUnits &ModifiedRegUnits,
    1056             :                                   LiveRegUnits &UsedRegUnits) {
    1057             :   bool HasRegDependency = false;
    1058       35574 :   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    1059       27427 :     MachineOperand &MO = MI->getOperand(i);
    1060       27427 :     if (!MO.isReg())
    1061           0 :       continue;
    1062       27427 :     unsigned Reg = MO.getReg();
    1063       27427 :     if (!Reg)
    1064             :       continue;
    1065       27427 :     if (MO.isDef()) {
    1066       17247 :       if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
    1067             :         HasRegDependency = true;
    1068        9052 :         break;
    1069             :       }
    1070       10178 :       DefedRegsInCopy.push_back(Reg);
    1071             : 
    1072             :       // FIXME: instead of isUse(), readsReg() would be a better fix here,
    1073             :       // For example, we can ignore modifications in reg with undef. However,
    1074             :       // it's not perfectly clear if skipping the internal read is safe in all
    1075             :       // other targets.
    1076             :     } else if (MO.isUse()) {
    1077       10180 :       if (!ModifiedRegUnits.available(Reg)) {
    1078             :         HasRegDependency = true;
    1079             :         break;
    1080             :       }
    1081        8197 :       UsedOpsInCopy.push_back(i);
    1082             :     }
    1083             :   }
    1084       17199 :   return HasRegDependency;
    1085             : }
    1086             : 
    1087           0 : bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
    1088             :                                          MachineFunction &MF,
    1089             :                                          const TargetRegisterInfo *TRI,
    1090             :                                          const TargetInstrInfo *TII) {
    1091             :   SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
    1092             :   // FIXME: For now, we sink only to a successor which has a single predecessor
    1093             :   // so that we can directly sink COPY instructions to the successor without
    1094             :   // adding any new block or branch instruction.
    1095           0 :   for (MachineBasicBlock *SI : CurBB.successors())
    1096           0 :     if (!SI->livein_empty() && SI->pred_size() == 1)
    1097           0 :       SinkableBBs.insert(SI);
    1098             : 
    1099           0 :   if (SinkableBBs.empty())
    1100           0 :     return false;
    1101             : 
    1102             :   bool Changed = false;
    1103             : 
    1104             :   // Track which registers have been modified and used between the end of the
    1105             :   // block and the current instruction.
    1106             :   ModifiedRegUnits.clear();
    1107             :   UsedRegUnits.clear();
    1108             : 
    1109           0 :   for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
    1110             :     MachineInstr *MI = &*I;
    1111             :     ++I;
    1112             : 
    1113           0 :     if (MI->isDebugInstr())
    1114           0 :       continue;
    1115             : 
    1116             :     // Do not move any instruction across function call.
    1117           0 :     if (MI->isCall())
    1118           0 :       return false;
    1119             : 
    1120           0 :     if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) {
    1121           0 :       LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
    1122             :                                         TRI);
    1123           0 :       continue;
    1124             :     }
    1125             : 
    1126             :     // Track the operand index for use in Copy.
    1127             :     SmallVector<unsigned, 2> UsedOpsInCopy;
    1128             :     // Track the register number defed in Copy.
    1129             :     SmallVector<unsigned, 2> DefedRegsInCopy;
    1130             : 
    1131             :     // Don't sink the COPY if it would violate a register dependency.
    1132           0 :     if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
    1133           0 :                               ModifiedRegUnits, UsedRegUnits)) {
    1134           0 :       LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
    1135             :                                         TRI);
    1136           0 :       continue;
    1137             :     }
    1138             :     assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
    1139             :            "Unexpect SrcReg or DefReg");
    1140             :     MachineBasicBlock *SuccBB =
    1141           0 :         getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
    1142             :     // Don't sink if we cannot find a single sinkable successor in which Reg
    1143             :     // is live-in.
    1144           0 :     if (!SuccBB) {
    1145           0 :       LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
    1146             :                                         TRI);
    1147           0 :       continue;
    1148             :     }
    1149             :     assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
    1150             :            "Unexpected predecessor");
    1151             : 
    1152             :     // Clear the kill flag if SrcReg is killed between MI and the end of the
    1153             :     // block.
    1154           0 :     clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
    1155           0 :     MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
    1156           0 :     performSink(*MI, *SuccBB, InsertPos);
    1157           0 :     updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
    1158             : 
    1159             :     Changed = true;
    1160             :     ++NumPostRACopySink;
    1161             :   }
    1162             :   return Changed;
    1163             : }
    1164             : 
    1165      193954 : bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
    1166             :   bool Changed = false;
    1167      193954 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    1168      193954 :   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    1169             : 
    1170      193954 :   ModifiedRegUnits.init(*TRI);
    1171      193954 :   UsedRegUnits.init(*TRI);
    1172      651975 :   for (auto &BB : MF)
    1173      458022 :     Changed |= tryToSinkCopy(BB, MF, TRI, TII);
    1174             : 
    1175      193953 :   return Changed;
    1176             : }

Generated by: LCOV version 1.13